Merge branch 'develop' of https://github.com/PaddlePaddle/models into ocr_ctc

744908f0 · wanghaoshuang · fb8ae401 · bc8b6040 · 744908f0 · 744908f0
50 changed file
--- a/.travis/unittest.sh
+++ b/.travis/unittest.sh
@@ -24,11 +24,22 @@ unittest(){
 trap 'abort' 0
 set -e

-for proj in */ ; do
+for proj in * ; do
    if [ -d $proj ]; then
-        unittest $proj
-        if [ $? != 0 ]; then
-            exit 1
+        if [ "$proj" = "fluid" ]; then
+            for proj in fluid/* ; do
+                if [ -d $proj ]; then
+                    unittest $proj
+                    if [ $? != 0 ]; then
+                        exit 1
+                    fi
+                fi
+            done
+        else
+            unittest $proj
+            if [ $? != 0 ]; then
+                exit 1
+            fi
        fi
    fi
 done

--- a/fluid/DeepASR/README.md
+++ b/fluid/DeepASR/README.md
+Deep ASR Kickoff
--- a/fluid/DeepASR/data_utils/__init__.py
+++ b/fluid/DeepASR/data_utils/__init__.py
--- a/fluid/DeepASR/data_utils/augmentor/__init__.py
+++ b/fluid/DeepASR/data_utils/augmentor/__init__.py
--- a/fluid/DeepASR/data_utils/augmentor/tests/data/global_mean_var_search26kHr
+++ b/fluid/DeepASR/data_utils/augmentor/tests/data/global_mean_var_search26kHr
+16.2845556399 11.6891798673
+17.21509949 12.3788567902
+18.1143704548 14.9912618017
+19.2335963752 18.5419556172
+19.9266772451 21.2768220522
+19.8245737202 21.2347210705
+19.5432940972 20.2784036567
+19.4631271754 20.2934452329
+19.3929919324 20.457971868
+19.2924788362 20.3626439234
+18.9207244502 19.9196569759
+18.7202605641 19.5920276899
+18.4844279398 19.2068349019
+18.2670948624 18.8716893824
+18.0929628855 18.5439666541
+17.8428896026 18.0255891747
+17.6646850635 17.473764296
+17.4955705896 16.8966859471
+17.3706720293 16.4294027467
+17.2530867792 16.0514717623
+17.1304341172 15.7234699057
+17.0038353287 15.4344471514
+16.902550309 15.1603287337
+16.8375590047 14.9304337826
+16.816287853 14.9119310513
+16.828838265 15.0930023024
+16.8602209498 15.3771992423
+16.9101763812 15.6897991789
+16.9466065143 15.9364556489
+16.9486061956 16.0699417826
+16.9041374104 16.0796970272
+16.8410093699 16.0111444599
+16.7045718836 15.7991985601
+16.51128489 15.5208920129
+16.3253910608 15.2603181921
+16.1297317333 14.9499965958
+15.903428372 14.5958280409
+15.6131718105 14.2709618
+15.1395035533 13.9993939893
+14.4298229999 13.3841189151
+0.0034970565424 0.246184766149
+0.00501284154705 0.238484972472
+0.00605942680019 0.269064381708
+0.00687266156243 0.319479238011
+0.00734065019253 0.371947383205
+0.00718807218417 0.384426479694
+0.00652195540212 0.384676838281
+0.00660416525951 0.395543910317
+0.00680202057642 0.400803979681
+0.00659144183007 0.393228973031
+0.00605294530423 0.385021118038
+0.00590452969394 0.361763039625
+0.00612315374687 0.346777773373
+0.00582354093973 0.335802403976
+0.00574556002554 0.320733728218
+0.00612254485891 0.310153103033
+0.00626733043219 0.299854747445
+0.00567398408041 0.293353685493
+0.00519236700706 0.287668810947
+0.00529581474367 0.281479660772
+0.00479019484082 0.27451415777
+0.00486381039428 0.266294391154
+0.00491126372868 0.258105116126
+0.00452105305011 0.252926328298
+0.00531483334271 0.250910887373
+0.00546572110469 0.253302256977
+0.00479544857908 0.258484183394
+0.00422106426297 0.264582900173
+0.00401824135188 0.268467945623
+0.0041705465252 0.269699480291
+0.00405239564143 0.270406162975
+0.0040059737566 0.270407601782
+0.00406426729317 0.267951582656
+0.00416613791013 0.264543833042
+0.00427847607653 0.26247798891
+0.00428050903034 0.259635263243
+0.00454842971786 0.255829377617
+0.00393747552387 0.253802307025
+0.00374143688909 0.251011478787
+0.00335475310258 0.236543650856
+0.000373194755312 0.0419494800709
+0.000230909648678 0.0394102370205
+0.000150840015851 0.0414956922398
+8.44401840771e-05 0.0460502231327
+-6.24759314572e-06 0.0528049937739
+-8.82957758148e-05 0.055711244886
+1.16795791952e-05 0.0563188428833
+-1.68716267856e-05 0.0575232763711
+-0.000112625308645 0.057979929947
+-0.000122619090002 0.0564126233493
+1.73569637319e-05 0.05522573909
+6.49872782342e-05 0.0507353361334
+4.17746389178e-05 0.0479568131253
+5.13884475653e-05 0.0461253238047
+1.8860115143e-05 0.0436860476919
+-5.64317701105e-05 0.042516381059
+-0.000136859948115 0.0413574820205
+-7.00847019726e-05 0.0409516370727
+-5.39392223336e-05 0.040441504085
+-9.24897162815e-05 0.0397800398173
+4.7104970622e-05 0.039046286243
+6.24805896165e-06 0.0380185986602
+-2.35272813418e-05 0.036851063786
+5.88344154127e-05 0.0361640489242
+-8.39162076993e-05 0.0357639427311
+-0.000108702805776 0.0358774639538
+3.22013961834e-06 0.0363644530435
+9.43501518394e-05 0.0370309934774
+0.000134406229423 0.0374972993343
+3.84007008533e-05 0.037676222515
+3.05989328157e-05 0.0379111939182
+9.52201629091e-05 0.0380927209106
+0.000102126083729 0.0379925358499
+6.98628072264e-05 0.0377276252241
+4.55782256339e-05 0.0375165468654
+4.76370987786e-05 0.0371482526345
+-2.24128832709e-05 0.0366810742947
+0.000125621306953 0.036628355271
+0.000134568666093 0.0364860461759
+0.000159858844464 0.0345583593149
--- a/fluid/DeepASR/data_utils/augmentor/tests/test_data_trans.py
+++ b/fluid/DeepASR/data_utils/augmentor/tests/test_data_trans.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sys
+import unittest
+import numpy as np
+import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
+import data_utils.augmentor.trans_add_delta as trans_add_delta
+import data_utils.augmentor.trans_splice as trans_splice
+
+
+class TestTransMeanVarianceNorm(unittest.TestCase):
+    """unit test for TransMeanVarianceNorm
+    """
+
+    def setUp(self):
+        self._file_path = "./data_utils/augmentor/tests/data/" \
+                          "global_mean_var_search26kHr"
+
+    def test(self):
+        feature = np.zeros((2, 120), dtype="float32")
+        feature.fill(1)
+        trans = trans_mean_variance_norm.TransMeanVarianceNorm(self._file_path)
+        (feature1, label1) = trans.perform_trans((feature, None))
+        (mean, var) = trans.get_mean_var()
+        feature_flat1 = feature1.flatten()
+        feature_flat = feature.flatten()
+        one = np.ones((1), dtype="float32")
+        for idx, val in enumerate(feature_flat1):
+            cur_idx = idx % 120
+            self.assertAlmostEqual(val, (one[0] - mean[cur_idx]) * var[cur_idx])
+
+
+class TestTransAddDelta(unittest.TestCase):
+    """unit test TestTransAddDelta
+    """
+
+    def test_regress(self):
+        """test regress
+        """
+        feature = np.zeros((14, 120), dtype="float32")
+        feature[0:5, 0:40].fill(1)
+        feature[0 + 5, 0:40].fill(1)
+        feature[1 + 5, 0:40].fill(2)
+        feature[2 + 5, 0:40].fill(3)
+        feature[3 + 5, 0:40].fill(4)
+        feature[8:14, 0:40].fill(4)
+        trans = trans_add_delta.TransAddDelta()
+        feature = feature.reshape((14 * 120))
+        trans._regress(feature, 5 * 120, feature, 5 * 120 + 40, 40, 4, 120)
+        trans._regress(feature, 5 * 120 + 40, feature, 5 * 120 + 80, 40, 4, 120)
+        feature = feature.reshape((14, 120))
+        tmp_feature = feature[5:5 + 4, :]
+        self.assertAlmostEqual(1.0, tmp_feature[0][0])
+        self.assertAlmostEqual(0.24, tmp_feature[0][119])
+        self.assertAlmostEqual(2.0, tmp_feature[1][0])
+        self.assertAlmostEqual(0.13, tmp_feature[1][119])
+        self.assertAlmostEqual(3.0, tmp_feature[2][0])
+        self.assertAlmostEqual(-0.13, tmp_feature[2][119])
+        self.assertAlmostEqual(4.0, tmp_feature[3][0])
+        self.assertAlmostEqual(-0.24, tmp_feature[3][119])
+
+    def test_perform(self):
+        """test perform
+        """
+        feature = np.zeros((4, 40), dtype="float32")
+        feature[0, 0:40].fill(1)
+        feature[1, 0:40].fill(2)
+        feature[2, 0:40].fill(3)
+        feature[3, 0:40].fill(4)
+        trans = trans_add_delta.TransAddDelta()
+        (feature, label) = trans.perform_trans((feature, None))
+        self.assertAlmostEqual(feature.shape[0], 4)
+        self.assertAlmostEqual(feature.shape[1], 120)
+        self.assertAlmostEqual(1.0, feature[0][0])
+        self.assertAlmostEqual(0.24, feature[0][119])
+        self.assertAlmostEqual(2.0, feature[1][0])
+        self.assertAlmostEqual(0.13, feature[1][119])
+        self.assertAlmostEqual(3.0, feature[2][0])
+        self.assertAlmostEqual(-0.13, feature[2][119])
+        self.assertAlmostEqual(4.0, feature[3][0])
+        self.assertAlmostEqual(-0.24, feature[3][119])
+
+
+class TestTransSplict(unittest.TestCase):
+    """unit test Test TransSplict
+    """
+
+    def test_perfrom(self):
+        feature = np.zeros((8, 10), dtype="float32")
+        for i in xrange(feature.shape[0]):
+            feature[i, :].fill(i)
+
+        trans = trans_splice.TransSplice()
+        (feature, label) = trans.perform_trans((feature, None))
+        self.assertEqual(feature.shape[1], 110)
+
+        for i in xrange(8):
+            nzero_num = 5 - i
+            cur_val = 0.0
+            if nzero_num < 0:
+                cur_val = i - 5 - 1
+            for j in xrange(11):
+                if j <= nzero_num:
+                    for k in xrange(10):
+                        self.assertAlmostEqual(feature[i][j * 10 + k], cur_val)
+                else:
+                    if cur_val < 7:
+                        cur_val += 1.0
+                    for k in xrange(10):
+                        self.assertAlmostEqual(feature[i][j * 10 + k], cur_val)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/fluid/DeepASR/data_utils/augmentor/trans_add_delta.py
+++ b/fluid/DeepASR/data_utils/augmentor/trans_add_delta.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import math
+import copy
+
+
+class TransAddDelta(object):
+    """ add delta of feature data 
+        trans feature for shape(a, b) to shape(a, b * 3)
+
+        Attributes:
+            _norder(int):
+            _window(int):
+    """
+
+    def __init__(self, norder=2, nwindow=2):
+        """ init construction
+            Args:
+                norder: default 2 
+                nwindow: default 2
+        """
+        self._norder = norder
+        self._nwindow = nwindow
+
+    def perform_trans(self, sample):
+        """ add delta for feature
+            trans feature shape from (a,b) to (a, b * 3)
+
+            Args: 
+                sample(object,tuple): contain feature numpy and label numpy
+            Returns:
+                (feature, label)
+        """
+        (feature, label) = sample
+        frame_dim = feature.shape[1]
+        d_frame_dim = frame_dim * 3
+        head_filled = 5
+        tail_filled = 5
+        mat = np.zeros(
+            (feature.shape[0] + head_filled + tail_filled, d_frame_dim),
+            dtype="float32")
+        #copy first frame
+        for i in xrange(head_filled):
+            np.copyto(mat[i, 0:frame_dim], feature[0, :])
+
+        np.copyto(mat[head_filled:head_filled + feature.shape[0], 0:frame_dim],
+                  feature[:, :])
+
+        # copy last frame
+        for i in xrange(head_filled + feature.shape[0], mat.shape[0], 1):
+            np.copyto(mat[i, 0:frame_dim], feature[feature.shape[0] - 1, :])
+
+        nframe = feature.shape[0]
+        start = head_filled
+        tmp_shape = mat.shape
+        mat = mat.reshape((tmp_shape[0] * tmp_shape[1]))
+        self._regress(mat, start * d_frame_dim, mat,
+                      start * d_frame_dim + frame_dim, frame_dim, nframe,
+                      d_frame_dim)
+        self._regress(mat, start * d_frame_dim + frame_dim, mat,
+                      start * d_frame_dim + 2 * frame_dim, frame_dim, nframe,
+                      d_frame_dim)
+        mat.shape = tmp_shape
+        return (mat[head_filled:mat.shape[0] - tail_filled, :], label)
+
+    def _regress(self, data_in, start_in, data_out, start_out, size, n, step):
+        """ regress
+            Args:
+                data_in: in data
+                start_in: start index of data_in
+                data_out: out data
+                start_out: start index of data_out
+                size: frame dimentional
+                n: frame num
+                step: 3 * (frame num)
+            Returns:
+                None
+        """
+        sigma_t2 = 0.0
+        delta_window = self._nwindow
+        for t in xrange(1, delta_window + 1):
+            sigma_t2 += t * t
+
+        sigma_t2 *= 2.0
+        for i in xrange(n):
+            fp1 = start_in
+            fp2 = start_out
+            for j in xrange(size):
+                back = fp1
+                forw = fp1
+                sum = 0.0
+                for t in xrange(1, delta_window + 1):
+                    back -= step
+                    forw += step
+                    sum += t * (data_in[forw] - data_in[back])
+
+                data_out[fp2] = sum / sigma_t2
+                fp1 += 1
+                fp2 += 1
+            start_in += step
+            start_out += step
--- a/fluid/DeepASR/data_utils/augmentor/trans_mean_variance_norm.py
+++ b/fluid/DeepASR/data_utils/augmentor/trans_mean_variance_norm.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import math
+
+
+class TransMeanVarianceNorm(object):
+    """ normalization of mean variance for feature data 
+        Attributes:
+            _mean(numpy.array): the feature mean vector
+            _var(numpy.array): the feature variance 
+    """
+
+    def __init__(self, snorm_path):
+        """init construction
+            Args:
+                snorm_path: the path of mean and variance
+        """
+        self._mean = None
+        self._var = None
+        self._load_norm(snorm_path)
+
+    def _load_norm(self, snorm_path):
+        """ load mean var file
+            Args: 
+                snorm_path(str):the file path
+        """
+        lLines = open(snorm_path).readlines()
+        nLen = len(lLines)
+        self._mean = np.zeros((nLen), dtype="float32")
+        self._var = np.zeros((nLen), dtype="float32")
+        self._nLen = nLen
+        for nidx, l in enumerate(lLines):
+            s = l.split()
+            assert len(s) == 2
+            self._mean[nidx] = float(s[0])
+            self._var[nidx] = 1.0 / math.sqrt(float(s[1]))
+            if self._var[nidx] > 100000.0:
+                self._var[nidx] = 100000.0
+
+    def get_mean_var(self):
+        """ get mean and var 
+            Args:
+            Returns:
+                (mean, var)
+        """
+        return (self._mean, self._var)
+
+    def perform_trans(self, sample):
+        """ feature = (feature - mean) * var
+            Args:
+                sample(object):input sample, contain feature numpy and label numpy
+            Returns:
+                (feature, label)
+        """
+        (feature, label) = sample
+        shape = feature.shape
+        assert len(shape) == 2
+        nfeature_len = shape[0] * shape[1]
+        assert nfeature_len % self._nLen == 0
+        ncur_idx = 0
+        feature = feature.reshape((nfeature_len))
+        while ncur_idx < nfeature_len:
+            block = feature[ncur_idx:ncur_idx + self._nLen]
+            block = (block - self._mean) * self._var
+            feature[ncur_idx:ncur_idx + self._nLen] = block
+            ncur_idx += self._nLen
+        feature = feature.reshape(shape)
+        return (feature, label)
--- a/fluid/DeepASR/data_utils/augmentor/trans_splice.py
+++ b/fluid/DeepASR/data_utils/augmentor/trans_splice.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import math
+
+
+class TransSplice(object):
+    """ copy feature context to construct new feature
+        expand feature data from shape (frame_num, frame_dim) 
+        to shape (frame_num, frame_dim * 11)
+
+        Attributes:
+            _nleft_context(int): copy left context number
+            _nright_context(int): copy right context number
+    """
+
+    def __init__(self, nleft_context=5, nright_context=5):
+        """ init construction
+            Args:
+                nleft_context(int):
+                nright_context(int):
+        """
+        self._nleft_context = nleft_context
+        self._nright_context = nright_context
+
+    def perform_trans(self, sample):
+        """ copy feature context 
+        Args:
+            sample(object): input sample(feature, label)
+        Return:
+            (feature, label)
+        """
+        (feature, label) = sample
+        nframe_num = feature.shape[0]
+        nframe_dim = feature.shape[1]
+        nnew_frame_dim = nframe_dim * (
+            self._nleft_context + self._nright_context + 1)
+        mat = np.zeros(
+            (nframe_num + self._nleft_context + self._nright_context,
+             nframe_dim),
+            dtype="float32")
+        ret = np.zeros((nframe_num, nnew_frame_dim), dtype="float32")
+
+        #copy left
+        for i in xrange(self._nleft_context):
+            mat[i, :] = feature[0, :]
+
+        #copy middle 
+        mat[self._nleft_context:self._nleft_context +
+            nframe_num, :] = feature[:, :]
+
+        #copy right
+        for i in xrange(self._nright_context):
+            mat[i + self._nleft_context + nframe_num, :] = feature[-1, :]
+
+        mat = mat.reshape(mat.shape[0] * mat.shape[1])
+        ret = ret.reshape(ret.shape[0] * ret.shape[1])
+        for i in xrange(nframe_num):
+            np.copyto(ret[i * nnew_frame_dim:(i + 1) * nnew_frame_dim],
+                      mat[i * nframe_dim:i * nframe_dim + nnew_frame_dim])
+        ret = ret.reshape((nframe_num, nnew_frame_dim))
+        return (ret, label)
--- a/fluid/DeepASR/data_utils/data_reader.py
+++ b/fluid/DeepASR/data_utils/data_reader.py
+"""This module contains data processing related logic.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import random
+import struct
+import Queue
+import time
+import numpy as np
+from threading import Thread
+import signal
+from multiprocessing import Manager, Process
+import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
+import data_utils.augmentor.trans_add_delta as trans_add_delta
+from data_utils.util import suppress_complaints, suppress_signal
+
+
+class SampleInfo(object):
+    """SampleInfo holds the necessary information to load a sample from disk.
+
+    Args:
+        feature_bin_path (str): File containing the feature data.
+        feature_start (int): Start position of the sample's feature data.
+        feature_size (int): Byte count of the sample's feature data.
+        feature_frame_num (int): Time length of the sample.
+        feature_dim (int): Feature dimension of one frame.
+        label_bin_path (str): File containing the label data.
+        label_size (int): Byte count of the sample's label data. 
+        label_frame_num (int): Label number of the sample.
+    """
+
+    def __init__(self, feature_bin_path, feature_start, feature_size,
+                 feature_frame_num, feature_dim, label_bin_path, label_start,
+                 label_size, label_frame_num):
+        self.feature_bin_path = feature_bin_path
+        self.feature_start = feature_start
+        self.feature_size = feature_size
+        self.feature_frame_num = feature_frame_num
+        self.feature_dim = feature_dim
+
+        self.label_bin_path = label_bin_path
+        self.label_start = label_start
+        self.label_size = label_size
+        self.label_frame_num = label_frame_num
+
+
+class SampleInfoBucket(object):
+    """SampleInfoBucket contains paths of several description files. Feature
+    description file contains necessary information (including path of binary 
+    data, sample start position, sample byte number etc.) to access samples' 
+    feature data and the same with the label description file. SampleInfoBucket 
+    is the minimum unit to do shuffle.
+
+    Args:
+        feature_bin_paths (list|tuple): Files containing the binary feature 
+                                        data.
+        feature_desc_paths (list|tuple): Files containing the description of 
+                                         samples' feature data. 
+        label_bin_paths (list|tuple): Files containing the binary label data.
+        label_desc_paths (list|tuple): Files containing the description of
+                                       samples' label data.
+    """
+
+    def __init__(self, feature_bin_paths, feature_desc_paths, label_bin_paths,
+                 label_desc_paths):
+        block_num = len(label_bin_paths)
+        assert len(label_desc_paths) == block_num
+        assert len(feature_bin_paths) == block_num
+        assert len(feature_desc_paths) == block_num
+        self._block_num = block_num
+
+        self._feature_bin_paths = feature_bin_paths
+        self._feature_desc_paths = feature_desc_paths
+        self._label_bin_paths = label_bin_paths
+        self._label_desc_paths = label_desc_paths
+
+    def generate_sample_info_list(self):
+        sample_info_list = []
+        for block_idx in xrange(self._block_num):
+            label_bin_path = self._label_bin_paths[block_idx]
+            label_desc_path = self._label_desc_paths[block_idx]
+            feature_bin_path = self._feature_bin_paths[block_idx]
+            feature_desc_path = self._feature_desc_paths[block_idx]
+
+            label_desc_lines = open(label_desc_path).readlines()
+            feature_desc_lines = open(feature_desc_path).readlines()
+
+            sample_num = int(label_desc_lines[0].split()[1])
+            assert sample_num == int(feature_desc_lines[0].split()[1])
+
+            for i in xrange(sample_num):
+                feature_desc_split = feature_desc_lines[i + 1].split()
+                feature_start = int(feature_desc_split[2])
+                feature_size = int(feature_desc_split[3])
+                feature_frame_num = int(feature_desc_split[4])
+                feature_dim = int(feature_desc_split[5])
+
+                label_desc_split = label_desc_lines[i + 1].split()
+                label_start = int(label_desc_split[2])
+                label_size = int(label_desc_split[3])
+                label_frame_num = int(label_desc_split[4])
+
+                sample_info_list.append(
+                    SampleInfo(feature_bin_path, feature_start, feature_size,
+                               feature_frame_num, feature_dim, label_bin_path,
+                               label_start, label_size, label_frame_num))
+
+        return sample_info_list
+
+
+class EpochEndSignal():
+    pass
+
+
+class DataReader(object):
+    """DataReader provides basic audio sample preprocessing pipeline including
+    data loading and data augmentation.
+
+    Args:
+        feature_file_list (str): File containing paths of feature data file and
+                                 corresponding description file.
+        label_file_list (str): File containing paths of label data file and 
+                               corresponding description file.
+        drop_frame_len (int): Samples whose label length above the value will be
+                              dropped.
+        process_num (int): Number of processes for processing data.
+        sample_buffer_size (int): Buffer size to indicate the maximum samples 
+                                  cached.
+        sample_info_buffer_size (int): Buffer size to indicate the maximum 
+                                       sample information cached.
+        batch_buffer_size (int): Buffer size to indicate the maximum batch 
+                                 cached.
+        shuffle_block_num (int): Block number indicating the minimum unit to do 
+                                 shuffle.
+        random_seed (int): Random seed.
+        verbose (int): If set to 0, complaints including exceptions and signal 
+                       traceback from sub-process will be suppressed. If set 
+                       to 1, all complaints will be printed.
+    """
+
+    def __init__(self,
+                 feature_file_list,
+                 label_file_list,
+                 drop_frame_len=512,
+                 process_num=10,
+                 sample_buffer_size=1024,
+                 sample_info_buffer_size=1024,
+                 batch_buffer_size=1024,
+                 shuffle_block_num=1,
+                 random_seed=0,
+                 verbose=0):
+        self._feature_file_list = feature_file_list
+        self._label_file_list = label_file_list
+        self._drop_frame_len = drop_frame_len
+        self._shuffle_block_num = shuffle_block_num
+        self._block_info_list = None
+        self._rng = random.Random(random_seed)
+        self._bucket_list = None
+        self.generate_bucket_list(True)
+        self._order_id = 0
+        self._manager = Manager()
+        self._sample_buffer_size = sample_buffer_size
+        self._sample_info_buffer_size = sample_info_buffer_size
+        self._batch_buffer_size = batch_buffer_size
+        self._process_num = process_num
+        self._verbose = verbose
+
+    def generate_bucket_list(self, is_shuffle):
+        if self._block_info_list is None:
+            block_feature_info_lines = open(self._feature_file_list).readlines()
+            block_label_info_lines = open(self._label_file_list).readlines()
+            assert len(block_feature_info_lines) == len(block_label_info_lines)
+            self._block_info_list = []
+            for i in xrange(0, len(block_feature_info_lines), 2):
+                block_info = (block_feature_info_lines[i],
+                              block_feature_info_lines[i + 1],
+                              block_label_info_lines[i],
+                              block_label_info_lines[i + 1])
+                self._block_info_list.append(
+                    map(lambda line: line.strip(), block_info))
+
+        if is_shuffle:
+            self._rng.shuffle(self._block_info_list)
+
+        self._bucket_list = []
+        for i in xrange(0, len(self._block_info_list), self._shuffle_block_num):
+            bucket_block_info = self._block_info_list[i:i +
+                                                      self._shuffle_block_num]
+            self._bucket_list.append(
+                SampleInfoBucket(
+                    map(lambda info: info[0], bucket_block_info),
+                    map(lambda info: info[1], bucket_block_info),
+                    map(lambda info: info[2], bucket_block_info),
+                    map(lambda info: info[3], bucket_block_info)))
+
+    # @TODO make this configurable
+    def set_transformers(self, transformers):
+        self._transformers = transformers
+
+    def _sample_generator(self):
+        sample_info_queue = self._manager.Queue(self._sample_info_buffer_size)
+        sample_queue = self._manager.Queue(self._sample_buffer_size)
+        self._order_id = 0
+
+        @suppress_complaints(verbose=self._verbose)
+        def ordered_feeding_task(sample_info_queue):
+            for sample_info_bucket in self._bucket_list:
+                sample_info_list = sample_info_bucket.generate_sample_info_list(
+                )
+                self._rng.shuffle(sample_info_list)  # do shuffle here
+                for sample_info in sample_info_list:
+                    sample_info_queue.put((sample_info, self._order_id))
+                    self._order_id += 1
+
+            for i in xrange(self._process_num):
+                sample_info_queue.put(EpochEndSignal())
+
+        feeding_thread = Thread(
+            target=ordered_feeding_task, args=(sample_info_queue, ))
+        feeding_thread.daemon = True
+        feeding_thread.start()
+
+        @suppress_complaints(verbose=self._verbose)
+        def ordered_processing_task(sample_info_queue, sample_queue, out_order):
+            if self._verbose == 0:
+                signal.signal(signal.SIGTERM, suppress_signal)
+                signal.signal(signal.SIGINT, suppress_signal)
+
+            def read_bytes(fpath, start, size):
+                f = open(fpath, 'r')
+                f.seek(start, 0)
+                binary_bytes = f.read(size)
+                f.close()
+                return binary_bytes
+
+            ins = sample_info_queue.get()
+
+            while not isinstance(ins, EpochEndSignal):
+                sample_info, order_id = ins
+
+                feature_bytes = read_bytes(sample_info.feature_bin_path,
+                                           sample_info.feature_start,
+                                           sample_info.feature_size)
+
+                label_bytes = read_bytes(sample_info.label_bin_path,
+                                         sample_info.label_start,
+                                         sample_info.label_size)
+
+                assert sample_info.label_frame_num * 4 == len(label_bytes)
+                label_array = struct.unpack('I' * sample_info.label_frame_num,
+                                            label_bytes)
+                label_data = np.array(
+                    label_array, dtype='int64').reshape(
+                        (sample_info.label_frame_num, 1))
+
+                feature_frame_num = sample_info.feature_frame_num
+                feature_dim = sample_info.feature_dim
+                assert feature_frame_num * feature_dim * 4 == len(feature_bytes)
+                feature_array = struct.unpack('f' * feature_frame_num *
+                                              feature_dim, feature_bytes)
+                feature_data = np.array(
+                    feature_array, dtype='float32').reshape((
+                        sample_info.feature_frame_num, sample_info.feature_dim))
+
+                sample_data = (feature_data, label_data)
+                for transformer in self._transformers:
+                    # @TODO(pkuyym) to make transfomer only accept feature_data
+                    sample_data = transformer.perform_trans(sample_data)
+
+                while order_id != out_order[0]:
+                    time.sleep(0.001)
+
+                # drop long sentence
+                if self._drop_frame_len >= sample_data[0].shape[0]:
+                    sample_queue.put(sample_data)
+
+                out_order[0] += 1
+                ins = sample_info_queue.get()
+
+            sample_queue.put(EpochEndSignal())
+
+        out_order = self._manager.list([0])
+        args = (sample_info_queue, sample_queue, out_order)
+        workers = [
+            Process(
+                target=ordered_processing_task, args=args)
+            for _ in xrange(self._process_num)
+        ]
+
+        for w in workers:
+            w.daemon = True
+            w.start()
+
+        finished_process_num = 0
+
+        while finished_process_num < self._process_num:
+            sample = sample_queue.get()
+            if isinstance(sample, EpochEndSignal):
+                finished_process_num += 1
+                continue
+            yield sample
+
+        feeding_thread.join()
+        for w in workers:
+            w.join()
+
+    def batch_iterator(self, batch_size, minimum_batch_size):
+        def batch_to_ndarray(batch_samples, lod):
+            assert len(batch_samples)
+            frame_dim = batch_samples[0][0].shape[1]
+            batch_feature = np.zeros((lod[-1], frame_dim), dtype="float32")
+            batch_label = np.zeros((lod[-1], 1), dtype="int64")
+            start = 0
+            for sample in batch_samples:
+                frame_num = sample[0].shape[0]
+                batch_feature[start:start + frame_num, :] = sample[0]
+                batch_label[start:start + frame_num, :] = sample[1]
+                start += frame_num
+            return (batch_feature, batch_label)
+
+        @suppress_complaints(verbose=self._verbose)
+        def batch_assembling_task(sample_generator, batch_queue):
+            batch_samples = []
+            lod = [0]
+            for sample in sample_generator():
+                batch_samples.append(sample)
+                lod.append(lod[-1] + sample[0].shape[0])
+                if len(batch_samples) == batch_size:
+                    (batch_feature, batch_label) = batch_to_ndarray(
+                        batch_samples, lod)
+                    batch_queue.put((batch_feature, batch_label, lod))
+                    batch_samples = []
+                    lod = [0]
+
+            if len(batch_samples) >= minimum_batch_size:
+                (batch_feature, batch_label) = batch_to_ndarray(batch_samples,
+                                                                lod)
+                batch_queue.put((batch_feature, batch_label, lod))
+
+            batch_queue.put(EpochEndSignal())
+
+        batch_queue = Queue.Queue(self._batch_buffer_size)
+
+        assembling_thread = Thread(
+            target=batch_assembling_task,
+            args=(self._sample_generator, batch_queue))
+        assembling_thread.daemon = True
+        assembling_thread.start()
+
+        while True:
+            try:
+                batch_data = batch_queue.get_nowait()
+            except Queue.Empty:
+                time.sleep(0.001)
+            else:
+                if isinstance(batch_data, EpochEndSignal):
+                    break
+                yield batch_data
+
+        assembling_thread.join()
--- a/fluid/DeepASR/data_utils/util.py
+++ b/fluid/DeepASR/data_utils/util.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import sys
+from six import reraise
+from tblib import Traceback
+
+import numpy as np
+
+
+def to_lodtensor(data, place):
+    """convert tensor to lodtensor
+    """
+    seq_lens = [len(seq) for seq in data]
+    cur_len = 0
+    lod = [cur_len]
+    for l in seq_lens:
+        cur_len += l
+        lod.append(cur_len)
+    flattened_data = numpy.concatenate(data, axis=0).astype("int64")
+    flattened_data = flattened_data.reshape([len(flattened_data), 1])
+    res = fluid.LoDTensor()
+    res.set(flattened_data, place)
+    res.set_lod([lod])
+    return res
+
+
+def lodtensor_to_ndarray(lod_tensor):
+    """conver lodtensor to ndarray
+    """
+    dims = lod_tensor.get_dims()
+    ret = np.zeros(shape=dims).astype('float32')
+    for i in xrange(np.product(dims)):
+        ret.ravel()[i] = lod_tensor.get_float_element(i)
+    return ret, lod_tensor.lod()
+
+
+def suppress_signal(signo, stack_frame):
+    pass
+
+
+def suppress_complaints(verbose):
+    def decorator_maker(func):
+        def suppress_warpper(*args, **kwargs):
+            try:
+                func(*args, **kwargs)
+            except:
+                et, ev, tb = sys.exc_info()
+                tb = Traceback(tb)
+                if verbose == 1:
+                    reraise(et, ev, tb.as_traceback())
+
+        return suppress_warpper
+
+    return decorator_maker
--- a/fluid/DeepASR/model_utils/model.py
+++ b/fluid/DeepASR/model_utils/model.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle.v2 as paddle
+import paddle.v2.fluid as fluid
+
+
+def stacked_lstmp_model(hidden_dim,
+                        proj_dim,
+                        stacked_num,
+                        class_num,
+                        parallel=False,
+                        is_train=True):
+    """ The model for DeepASR. The main structure is composed of stacked 
+        identical LSTMP (LSTM with recurrent projection) layers.
+
+        When running in training and validation phase, the feeding dictionary
+        is {'feature', 'label'}, fed by the LodTensor for feature data and 
+        label data respectively. And in inference, only `feature` is needed.
+
+    Args:
+	hidden_dim(int): The hidden state's dimension of the LSTMP layer.
+	proj_dim(int): The projection size of the LSTMP layer.
+	stacked_num(int): The number of stacked LSTMP layers.
+	parallel(bool): Run in parallel or not, default `False`.
+	is_train(bool): Run in training phase or not, default `True`.
+	class_dim(int): The number of output classes.
+    """
+
+    # network configuration
+    def _net_conf(feature, label):
+        seq_conv1 = fluid.layers.sequence_conv(
+            input=feature,
+            num_filters=1024,
+            filter_size=3,
+            filter_stride=1,
+            bias_attr=True)
+        bn1 = fluid.layers.batch_norm(
+            input=seq_conv1,
+            act="sigmoid",
+            is_test=not is_train,
+            momentum=0.9,
+            epsilon=1e-05,
+            data_layout='NCHW')
+
+        stack_input = bn1
+        for i in range(stacked_num):
+            fc = fluid.layers.fc(input=stack_input,
+                                 size=hidden_dim * 4,
+                                 bias_attr=True)
+            proj, cell = fluid.layers.dynamic_lstmp(
+                input=fc,
+                size=hidden_dim * 4,
+                proj_size=proj_dim,
+                bias_attr=True,
+                use_peepholes=True,
+                is_reverse=False,
+                cell_activation="tanh",
+                proj_activation="tanh")
+            bn = fluid.layers.batch_norm(
+                input=proj,
+                act="sigmoid",
+                is_test=not is_train,
+                momentum=0.9,
+                epsilon=1e-05,
+                data_layout='NCHW')
+            stack_input = bn
+
+        prediction = fluid.layers.fc(input=stack_input,
+                                     size=class_num,
+                                     act='softmax')
+
+        cost = fluid.layers.cross_entropy(input=prediction, label=label)
+        avg_cost = fluid.layers.mean(x=cost)
+        acc = fluid.layers.accuracy(input=prediction, label=label)
+        return prediction, avg_cost, acc
+
+    # data feeder
+    feature = fluid.layers.data(
+        name="feature", shape=[-1, 120 * 11], dtype="float32", lod_level=1)
+    label = fluid.layers.data(
+        name="label", shape=[-1, 1], dtype="int64", lod_level=1)
+
+    if parallel:
+        # When the execution place is specified to CUDAPlace, the program will
+        # run on all $CUDA_VISIBLE_DEVICES GPUs. Otherwise the program will 
+        # run on all CPU devices.
+        places = fluid.layers.get_places()
+        pd = fluid.layers.ParallelDo(places)
+        with pd.do():
+            feat_ = pd.read_input(feature)
+            label_ = pd.read_input(label)
+            prediction, avg_cost, acc = _net_conf(feat_, label_)
+            for out in [avg_cost, acc]:
+                pd.write_output(out)
+
+        # get mean loss and acc through every devices.
+        avg_cost, acc = pd()
+        avg_cost = fluid.layers.mean(x=avg_cost)
+        acc = fluid.layers.mean(x=acc)
+    else:
+        prediction, avg_cost, acc = _net_conf(feature, label)
+
+    return prediction, avg_cost, acc
--- a/fluid/DeepASR/tools/_init_paths.py
+++ b/fluid/DeepASR/tools/_init_paths.py
+"""Add the parent directory to $PYTHONPATH"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os.path
+import sys
+
+
+def add_path(path):
+    if path not in sys.path:
+        sys.path.insert(0, path)
+
+
+this_dir = os.path.dirname(__file__)
+
+# Add project path to PYTHONPATH
+proj_path = os.path.join(this_dir, '..')
+add_path(proj_path)
--- a/fluid/DeepASR/tools/profile.py
+++ b/fluid/DeepASR/tools/profile.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sys
+import numpy as np
+import argparse
+import time
+
+import paddle.v2.fluid as fluid
+import paddle.v2.fluid.profiler as profiler
+import _init_paths
+import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
+import data_utils.augmentor.trans_add_delta as trans_add_delta
+import data_utils.augmentor.trans_splice as trans_splice
+import data_utils.data_reader as reader
+from model_utils.model import stacked_lstmp_model
+from data_utils.util import lodtensor_to_ndarray
+
+
+def parse_args():
+    parser = argparse.ArgumentParser("Profiling for the stacked LSTMP model.")
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=32,
+        help='The sequence number of a batch data. (default: %(default)d)')
+    parser.add_argument(
+        '--minimum_batch_size',
+        type=int,
+        default=1,
+        help='The minimum sequence number of a batch data. '
+        '(default: %(default)d)')
+    parser.add_argument(
+        '--stacked_num',
+        type=int,
+        default=5,
+        help='Number of lstmp layers to stack. (default: %(default)d)')
+    parser.add_argument(
+        '--proj_dim',
+        type=int,
+        default=512,
+        help='Project size of lstmp unit. (default: %(default)d)')
+    parser.add_argument(
+        '--hidden_dim',
+        type=int,
+        default=1024,
+        help='Hidden size of lstmp unit. (default: %(default)d)')
+    parser.add_argument(
+        '--learning_rate',
+        type=float,
+        default=0.002,
+        help='Learning rate used to train. (default: %(default)f)')
+    parser.add_argument(
+        '--device',
+        type=str,
+        default='GPU',
+        choices=['CPU', 'GPU'],
+        help='The device type. (default: %(default)s)')
+    parser.add_argument(
+        '--parallel', action='store_true', help='If set, run in parallel.')
+    parser.add_argument(
+        '--mean_var',
+        type=str,
+        default='data/global_mean_var_search26kHr',
+        help='mean var path')
+    parser.add_argument(
+        '--feature_lst',
+        type=str,
+        default='data/feature.lst',
+        help='feature list path.')
+    parser.add_argument(
+        '--label_lst',
+        type=str,
+        default='data/label.lst',
+        help='label list path.')
+    parser.add_argument(
+        '--max_batch_num',
+        type=int,
+        default=10,
+        help='Maximum number of batches for profiling. (default: %(default)d)')
+    parser.add_argument(
+        '--first_batches_to_skip',
+        type=int,
+        default=1,
+        help='Number of first batches to skip for profiling. '
+        '(default: %(default)d)')
+    parser.add_argument(
+        '--print_train_acc',
+        action='store_true',
+        help='If set, output training accuray.')
+    parser.add_argument(
+        '--sorted_key',
+        type=str,
+        default='total',
+        choices=['None', 'total', 'calls', 'min', 'max', 'ave'],
+        help='Different types of time to sort the profiling report. '
+        '(default: %(default)s)')
+    args = parser.parse_args()
+    return args
+
+
+def print_arguments(args):
+    print('-----------  Configuration Arguments -----------')
+    for arg, value in sorted(vars(args).iteritems()):
+        print('%s: %s' % (arg, value))
+    print('------------------------------------------------')
+
+
+def profile(args):
+    """profile the training process.
+    """
+
+    if not args.first_batches_to_skip < args.max_batch_num:
+        raise ValueError("arg 'first_batches_to_skip' must be smaller than "
+                         "'max_batch_num'.")
+    if not args.first_batches_to_skip >= 0:
+        raise ValueError(
+            "arg 'first_batches_to_skip' must not be smaller than 0.")
+
+    _, avg_cost, accuracy = stacked_lstmp_model(
+        hidden_dim=args.hidden_dim,
+        proj_dim=args.proj_dim,
+        stacked_num=args.stacked_num,
+        class_num=1749,
+        parallel=args.parallel)
+
+    adam_optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
+    adam_optimizer.minimize(avg_cost)
+
+    place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
+    exe = fluid.Executor(place)
+    exe.run(fluid.default_startup_program())
+
+    ltrans = [
+        trans_add_delta.TransAddDelta(2, 2),
+        trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var),
+        trans_splice.TransSplice()
+    ]
+
+    data_reader = reader.DataReader(args.feature_lst, args.label_lst)
+    data_reader.set_transformers(ltrans)
+
+    feature_t = fluid.LoDTensor()
+    label_t = fluid.LoDTensor()
+
+    sorted_key = None if args.sorted_key is 'None' else args.sorted_key
+    with profiler.profiler(args.device, sorted_key) as prof:
+        frames_seen, start_time = 0, 0.0
+        for batch_id, batch_data in enumerate(
+                data_reader.batch_iterator(args.batch_size,
+                                           args.minimum_batch_size)):
+            if batch_id >= args.max_batch_num:
+                break
+            if args.first_batches_to_skip == batch_id:
+                profiler.reset_profiler()
+                start_time = time.time()
+                frames_seen = 0
+            # load_data
+            (features, labels, lod) = batch_data
+            feature_t.set(features, place)
+            feature_t.set_lod([lod])
+            label_t.set(labels, place)
+            label_t.set_lod([lod])
+
+            frames_seen += lod[-1]
+
+            outs = exe.run(fluid.default_main_program(),
+                           feed={"feature": feature_t,
+                                 "label": label_t},
+                           fetch_list=[avg_cost, accuracy],
+                           return_numpy=False)
+
+            if args.print_train_acc:
+                print("Batch %d acc: %f" %
+                      (batch_id, lodtensor_to_ndarray(outs[1])[0]))
+            else:
+                sys.stdout.write('.')
+                sys.stdout.flush()
+        time_consumed = time.time() - start_time
+        frames_per_sec = frames_seen / time_consumed
+        print("\nTime consumed: %f s, performance: %f frames/s." %
+              (time_consumed, frames_per_sec))
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    print_arguments(args)
+    profile(args)
--- a/fluid/DeepASR/train.py
+++ b/fluid/DeepASR/train.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sys
+import os
+import numpy as np
+import argparse
+import time
+
+import paddle.v2.fluid as fluid
+import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
+import data_utils.augmentor.trans_add_delta as trans_add_delta
+import data_utils.augmentor.trans_splice as trans_splice
+import data_utils.data_reader as reader
+from data_utils.util import lodtensor_to_ndarray
+from model_utils.model import stacked_lstmp_model
+
+
+def parse_args():
+    parser = argparse.ArgumentParser("Training for stacked LSTMP model.")
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=32,
+        help='The sequence number of a batch data. (default: %(default)d)')
+    parser.add_argument(
+        '--minimum_batch_size',
+        type=int,
+        default=1,
+        help='The minimum sequence number of a batch data. '
+        '(default: %(default)d)')
+    parser.add_argument(
+        '--stacked_num',
+        type=int,
+        default=5,
+        help='Number of lstm layers to stack. (default: %(default)d)')
+    parser.add_argument(
+        '--proj_dim',
+        type=int,
+        default=512,
+        help='Project size of lstm unit. (default: %(default)d)')
+    parser.add_argument(
+        '--hidden_dim',
+        type=int,
+        default=1024,
+        help='Hidden size of lstm unit. (default: %(default)d)')
+    parser.add_argument(
+        '--pass_num',
+        type=int,
+        default=100,
+        help='Epoch number to train. (default: %(default)d)')
+    parser.add_argument(
+        '--print_per_batches',
+        type=int,
+        default=100,
+        help='Interval to print training accuracy. (default: %(default)d)')
+    parser.add_argument(
+        '--learning_rate',
+        type=float,
+        default=0.002,
+        help='Learning rate used to train. (default: %(default)f)')
+    parser.add_argument(
+        '--device',
+        type=str,
+        default='GPU',
+        choices=['CPU', 'GPU'],
+        help='The device type. (default: %(default)s)')
+    parser.add_argument(
+        '--parallel', action='store_true', help='If set, run in parallel.')
+    parser.add_argument(
+        '--mean_var',
+        type=str,
+        default='data/global_mean_var_search26kHr',
+        help='mean var path')
+    parser.add_argument(
+        '--train_feature_lst',
+        type=str,
+        default='data/feature.lst',
+        help='feature list path for training.')
+    parser.add_argument(
+        '--train_label_lst',
+        type=str,
+        default='data/label.lst',
+        help='label list path for training.')
+    parser.add_argument(
+        '--val_feature_lst',
+        type=str,
+        default='data/val_feature.lst',
+        help='feature list path for validation.')
+    parser.add_argument(
+        '--val_label_lst',
+        type=str,
+        default='data/val_label.lst',
+        help='label list path for validation.')
+    parser.add_argument(
+        '--model_save_dir',
+        type=str,
+        default='./checkpoints',
+        help='directory to save model. Do not save model if set to '
+        '.')
+    args = parser.parse_args()
+    return args
+
+
+def print_arguments(args):
+    print('-----------  Configuration Arguments -----------')
+    for arg, value in sorted(vars(args).iteritems()):
+        print('%s: %s' % (arg, value))
+    print('------------------------------------------------')
+
+
+def train(args):
+    """train in loop.
+    """
+
+    # prediction, avg_cost, accuracy = stacked_lstmp_model(args.hidden_dim, 
+    #    args.proj_dim, args.stacked_num, class_num=1749, args.parallel)
+    prediction, avg_cost, accuracy = stacked_lstmp_model(
+        hidden_dim=args.hidden_dim,
+        proj_dim=args.proj_dim,
+        stacked_num=args.stacked_num,
+        class_num=1749,
+        parallel=args.parallel)
+
+    adam_optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
+    adam_optimizer.minimize(avg_cost)
+
+    # program for test
+    test_program = fluid.default_main_program().clone()
+    with fluid.program_guard(test_program):
+        test_program = fluid.io.get_inference_program([avg_cost, accuracy])
+
+    place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
+    exe = fluid.Executor(place)
+    exe.run(fluid.default_startup_program())
+
+    ltrans = [
+        trans_add_delta.TransAddDelta(2, 2),
+        trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var),
+        trans_splice.TransSplice()
+    ]
+
+    feature_t = fluid.LoDTensor()
+    label_t = fluid.LoDTensor()
+
+    # validation
+    def test(exe):
+        # If test data not found, return invalid cost and accuracy
+        if not (os.path.exists(args.val_feature_lst) and
+                os.path.exists(args.val_label_lst)):
+            return -1.0, -1.0
+        # test data reader
+        test_data_reader = reader.DataReader(args.val_feature_lst,
+                                             args.val_label_lst)
+        test_data_reader.set_transformers(ltrans)
+        test_costs, test_accs = [], []
+        for batch_id, batch_data in enumerate(
+                test_data_reader.batch_iterator(args.batch_size,
+                                                args.minimum_batch_size)):
+            # load_data
+            (features, labels, lod) = batch_data
+            feature_t.set(features, place)
+            feature_t.set_lod([lod])
+            label_t.set(labels, place)
+            label_t.set_lod([lod])
+
+            cost, acc = exe.run(test_program,
+                                feed={"feature": feature_t,
+                                      "label": label_t},
+                                fetch_list=[avg_cost, accuracy],
+                                return_numpy=False)
+            test_costs.append(lodtensor_to_ndarray(cost)[0])
+            test_accs.append(lodtensor_to_ndarray(acc)[0])
+        return np.mean(test_costs), np.mean(test_accs)
+
+    # train data reader
+    train_data_reader = reader.DataReader(args.train_feature_lst,
+                                          args.train_label_lst)
+    train_data_reader.set_transformers(ltrans)
+    # train
+    for pass_id in xrange(args.pass_num):
+        pass_start_time = time.time()
+        for batch_id, batch_data in enumerate(
+                train_data_reader.batch_iterator(args.batch_size,
+                                                 args.minimum_batch_size)):
+            # load_data
+            (features, labels, lod) = batch_data
+            feature_t.set(features, place)
+            feature_t.set_lod([lod])
+            label_t.set(labels, place)
+            label_t.set_lod([lod])
+
+            cost, acc = exe.run(fluid.default_main_program(),
+                                feed={"feature": feature_t,
+                                      "label": label_t},
+                                fetch_list=[avg_cost, accuracy],
+                                return_numpy=False)
+
+            if batch_id > 0 and (batch_id % args.print_per_batches == 0):
+                print("\nBatch %d, train cost: %f, train acc: %f" %
+                      (batch_id, lodtensor_to_ndarray(cost)[0],
+                       lodtensor_to_ndarray(acc)[0]))
+            else:
+                sys.stdout.write('.')
+                sys.stdout.flush()
+        # run test
+        val_cost, val_acc = test(exe)
+        # save model 
+        if args.model_save_dir != '':
+            model_path = os.path.join(
+                args.model_save_dir, "deep_asr.pass_" + str(pass_id) + ".model")
+            fluid.io.save_inference_model(model_path, ["feature"],
+                                          [prediction], exe)
+        # cal pass time
+        pass_end_time = time.time()
+        time_consumed = pass_end_time - pass_start_time
+        # print info at pass end
+        print("\nPass %d, time consumed: %f s, val cost: %f, val acc: %f\n" %
+              (pass_id, time_consumed, val_cost, val_acc))
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    print_arguments(args)
+
+    if args.model_save_dir != '' and not os.path.exists(args.model_save_dir):
+        os.mkdir(args.model_save_dir)
+
+    train(args)
--- a/fluid/adversarial/advbox/__init__.py
+++ b/fluid/adversarial/advbox/__init__.py
 """
-   A set of tools for generating adversarial example on paddle platform 
+   A set of tools for generating adversarial example on paddle platform
 """
+
+from . import attacks
+from . import models
+from .adversary import Adversary
--- a/fluid/adversarial/advbox/adversary.py
+++ b/fluid/adversarial/advbox/adversary.py
+"""
+Defines a class that contains the original object, the target and the
+adversarial example.
+
+"""
+
+
+class Adversary(object):
+    """
+    Adversary contains the original object, the target and the adversarial
+    example.
+    """
+
+    def __init__(self, original, original_label=None):
+        """
+        :param original: The original instance, such as an image.
+        :param original_label: The original instance's label.
+        """
+        assert original is not None
+
+        self.__original = original
+        self.__original_label = original_label
+        self.__target_label = None
+        self.__target = None
+        self.__is_targeted_attack = False
+        self.__adversarial_example = None
+        self.__adversarial_label = None
+
+    def set_target(self, is_targeted_attack, target=None, target_label=None):
+        """
+        Set the target be targeted or untargeted.
+
+        :param is_targeted_attack: bool
+        :param target: The target.
+        :param target_label: If is_targeted_attack is true and target_label is
+                    None, self.target_label will be set by the Attack class.
+                    If is_targeted_attack is false, target_label must be None.
+        """
+        assert (target_label is None) or is_targeted_attack
+        self.__is_targeted_attack = is_targeted_attack
+        self.__target_label = target_label
+        self.__target = target
+        if not is_targeted_attack:
+            self.__target_label = None
+            self.__target = None
+
+    def set_original(self, original, original_label=None):
+        """
+        Reset the original.
+
+        :param original: Original instance.
+        :param original_label: Original instance's label.
+        """
+        if original != self.__original:
+            self.__original = original
+            self.__original_label = original_label
+            self.__adversarial_example = None
+        if original is None:
+            self.__original_label = None
+
+    def _is_successful(self, adversarial_label):
+        """
+        Is the adversarial_label is the expected adversarial label.
+
+        :param adversarial_label: adversarial label.
+        :return: bool
+        """
+        if self.__target_label is not None:
+            return adversarial_label == self.__target_label
+        else:
+            return (adversarial_label is not None) and \
+                   (adversarial_label != self.__original_label)
+
+    def is_successful(self):
+        """
+        Has the adversarial example been found.
+
+        :return: bool
+        """
+        return self._is_successful(self.__adversarial_label)
+
+    def try_accept_the_example(self, adversarial_example, adversarial_label):
+        """
+        If adversarial_label the target label that we are finding.
+        The adversarial_example and adversarial_label will be accepted and
+        True will be returned.
+
+        :return: bool
+        """
+        assert adversarial_example is not None
+        assert self.__original.shape == adversarial_example.shape
+
+        ok = self._is_successful(adversarial_label)
+        if ok:
+            self.__adversarial_example = adversarial_example
+            self.__adversarial_label = adversarial_label
+        return ok
+
+    def perturbation(self, multiplying_factor=1.0):
+        """
+        The perturbation that the adversarial_example is added.
+
+        :param multiplying_factor: float.
+        :return: The perturbation that is multiplied by multiplying_factor.
+        """
+        assert self.__original is not None
+        assert self.__adversarial_example is not None
+        return multiplying_factor * (
+            self.__adversarial_example - self.__original)
+
+    @property
+    def is_targeted_attack(self):
+        """
+        :property: is_targeted_attack
+        """
+        return self.__is_targeted_attack
+
+    @property
+    def target_label(self):
+        """
+        :property: target_label
+        """
+        return self.__target_label
+
+    @target_label.setter
+    def target_label(self, label):
+        """
+        :property: target_label
+        """
+        self.__target_label = label
+
+    @property
+    def target(self):
+        """
+        :property: target
+        """
+        return self.__target
+
+    @property
+    def original(self):
+        """
+        :property: original
+        """
+        return self.__original
+
+    @property
+    def original_label(self):
+        """
+        :property: original
+        """
+        return self.__original_label
+
+    @original_label.setter
+    def original_label(self, label):
+        """
+        original_label setter
+        """
+        self.__original_label = label
+
+    @property
+    def adversarial_example(self):
+        """
+        :property: adversarial_example
+        """
+        return self.__adversarial_example
+
+    @adversarial_example.setter
+    def adversarial_example(self, example):
+        """
+        adversarial_example setter
+        """
+        self.__adversarial_example = example
+
+    @property
+    def adversarial_label(self):
+        """
+        :property: adversarial_label
+        """
+        return self.__adversarial_label
+
+    @adversarial_label.setter
+    def adversarial_label(self, label):
+        """
+        adversarial_label setter
+        """
+        self.__adversarial_label = label
--- a/fluid/adversarial/advbox/attacks/__init__.py
+++ b/fluid/adversarial/advbox/attacks/__init__.py
+"""
+Attack methods
+"""
+
+from .base import Attack
+from .deepfool import DeepFoolAttack
+from .gradientsign import FGSM
+from .gradientsign import GradientSignAttack
+from .iterator_gradientsign import IFGSM
+from .iterator_gradientsign import IteratorGradientSignAttack
--- a/fluid/adversarial/advbox/attacks/base.py
+++ b/fluid/adversarial/advbox/attacks/base.py
 """
 The base model of the model.
 """
-from abc import ABCMeta, abstractmethod
+import logging
+from abc import ABCMeta
+from abc import abstractmethod
+
+import numpy as np


 class Attack(object):
    """
-    Abstract base class for adversarial attacks. `Attack` represent an adversarial attack
-    which search an adversarial example. subclass should implement the _apply() method.
+    Abstract base class for adversarial attacks. `Attack` represent an
+    adversarial attack which search an adversarial example. subclass should
+    implement the _apply() method.

    Args:
        model(Model): an instance of the class advbox.base.Model.
@@ -18,22 +23,50 @@ class Attack(object):
    def __init__(self, model):
        self.model = model

-    def __call__(self, image_label):
+    def __call__(self, adversary, **kwargs):
        """
        Generate the adversarial sample.

        Args:
-        image_label(list): The image and label tuple list with one element.
+        adversary(object): The adversary object.
+        **kwargs: Other named arguments.
        """
-        adv_img = self._apply(image_label)
-        return adv_img
+        self._preprocess(adversary)
+        return self._apply(adversary, **kwargs)

    @abstractmethod
-    def _apply(self, image_label):
+    def _apply(self, adversary, **kwargs):
        """
        Search an adversarial example.

        Args:
-        image_batch(list): The image and label tuple list with one element.
+        adversary(object): The adversary object.
+        **kwargs: Other named arguments.
        """
        raise NotImplementedError
+
+    def _preprocess(self, adversary):
+        """
+        Preprocess the adversary object.
+
+        :param adversary: adversary
+        :return: None
+        """
+        if adversary.original_label is None:
+            adversary.original_label = np.argmax(
+                self.model.predict(adversary.original))
+        if adversary.is_targeted_attack and adversary.target_label is None:
+            if adversary.target is None:
+                raise ValueError(
+                    'When adversary.is_targeted_attack is True, '
+                    'adversary.target_label or adversary.target must be set.')
+            else:
+                adversary.target_label_label = np.argmax(
+                    self.model.predict(
+                        self.model.scale_input(adversary.target)))
+
+        logging.info('adversary:\noriginal_label: {}'
+                     '\n          target_lable: {}'
+                     '\n          is_targeted_attack: {}'
+                     ''.format(adversary.original_label, adversary.target_label,
+                               adversary.is_targeted_attack))
--- a/fluid/adversarial/advbox/attacks/deepfool.py
+++ b/fluid/adversarial/advbox/attacks/deepfool.py
+"""
+This module provide the attack method for deepfool. Deepfool is a simple and
+accurate adversarial attack.
+"""
+from __future__ import division
+
+import logging
+
+import numpy as np
+
+from .base import Attack
+
+
+class DeepFoolAttack(Attack):
+    """
+    DeepFool: a simple and accurate method to fool deep neural networks",
+    Seyed-Mohsen Moosavi-Dezfooli, Alhussein Fawzi, Pascal Frossard,
+    https://arxiv.org/abs/1511.04599
+    """
+
+    def _apply(self, adversary, iterations=100, overshoot=0.02):
+        """
+          Apply the deep fool attack.
+
+          Args:
+              adversary(Adversary): The Adversary object.
+              iterations(int): The iterations.
+              overshoot(float): We add (1+overshoot)*pert every iteration.
+          Return:
+              adversary: The Adversary object.
+          """
+        assert adversary is not None
+
+        pre_label = adversary.original_label
+        min_, max_ = self.model.bounds()
+        f = self.model.predict(adversary.original)
+        if adversary.is_targeted_attack:
+            labels = [adversary.target_label]
+        else:
+            max_class_count = 10
+            class_count = self.model.num_classes()
+            if class_count > max_class_count:
+                labels = np.argsort(f)[-(max_class_count + 1):-1]
+            else:
+                labels = np.arange(class_count)
+
+        gradient = self.model.gradient(adversary.original, pre_label)
+        x = adversary.original
+        for iteration in xrange(iterations):
+            w = np.inf
+            w_norm = np.inf
+            pert = np.inf
+            for k in labels:
+                if k == pre_label:
+                    continue
+                gradient_k = self.model.gradient(x, k)
+                w_k = gradient_k - gradient
+                f_k = f[k] - f[pre_label]
+                w_k_norm = np.linalg.norm(w_k) + 1e-8
+                pert_k = (np.abs(f_k) + 1e-8) / w_k_norm
+                if pert_k < pert:
+                    pert = pert_k
+                    w = w_k
+                    w_norm = w_k_norm
+
+            r_i = -w * pert / w_norm  # The gradient is -gradient in the paper.
+            x = x + (1 + overshoot) * r_i
+            x = np.clip(x, min_, max_)
+
+            f = self.model.predict(x)
+            gradient = self.model.gradient(x, pre_label)
+            adv_label = np.argmax(f)
+            logging.info('iteration = {}, f = {}, pre_label = {}'
+                         ', adv_label={}'.format(iteration, f[pre_label],
+                                                 pre_label, adv_label))
+            if adversary.try_accept_the_example(x, adv_label):
+                return adversary
+
+        return adversary
--- a/fluid/adversarial/advbox/attacks/gradientsign.py
+++ b/fluid/adversarial/advbox/attacks/gradientsign.py
@@ -2,37 +2,59 @@
 This module provide the attack method for FGSM's implement.
 """
 from __future__ import division
-import numpy as np
+
+import logging
 from collections import Iterable
+
+import numpy as np
+
 from .base import Attack


 class GradientSignAttack(Attack):
    """
    This attack was originally implemented by Goodfellow et al. (2015) with the
-    infinity norm (and is known as the "Fast Gradient Sign Method"). This is therefore called
-    the Fast Gradient Method.
+    infinity norm (and is known as the "Fast Gradient Sign Method").
+    This is therefore called the Fast Gradient Method.
    Paper link: https://arxiv.org/abs/1412.6572
    """

-    def _apply(self, image_label, epsilons=1000):
-        assert len(image_label) == 1
-        pre_label = np.argmax(self.model.predict(image_label))
+    def _apply(self, adversary, epsilons=1000):
+        """
+          Apply the gradient sign attack.
+          Args:
+              adversary(Adversary): The Adversary object.
+              epsilons(list|tuple|int): The epsilon (input variation parameter).
+          Return:
+              adversary: The Adversary object.
+          """
+        assert adversary is not None

+        if not isinstance(epsilons, Iterable):
+            epsilons = np.linspace(0, 1, num=epsilons + 1)[1:]
+
+        pre_label = adversary.original_label
        min_, max_ = self.model.bounds()
-        gradient = self.model.gradient(image_label)
-        gradient_sign = np.sign(gradient) * (max_ - min_)

-        if not isinstance(epsilons, Iterable):
-            epsilons = np.linspace(0, 1, num=epsilons + 1)
+        if adversary.is_targeted_attack:
+            gradient = self.model.gradient(adversary.original,
+                                           adversary.target_label)
+            gradient_sign = -np.sign(gradient) * (max_ - min_)
+        else:
+            gradient = self.model.gradient(adversary.original,
+                                           adversary.original_label)
+            gradient_sign = np.sign(gradient) * (max_ - min_)

        for epsilon in epsilons:
-            adv_img = image_label[0][0].reshape(
-                gradient_sign.shape) + epsilon * gradient_sign
+            adv_img = adversary.original + epsilon * gradient_sign
            adv_img = np.clip(adv_img, min_, max_)
-            adv_label = np.argmax(self.model.predict([(adv_img, 0)]))
-            if pre_label != adv_label:
-                return adv_img
+            adv_label = np.argmax(self.model.predict(adv_img))
+            logging.info('epsilon = {:.3f}, pre_label = {}, adv_label={}'.
+                         format(epsilon, pre_label, adv_label))
+            if adversary.try_accept_the_example(adv_img, adv_label):
+                return adversary
+
+        return adversary


 FGSM = GradientSignAttack
--- a/fluid/adversarial/advbox/attacks/iterator_gradientsign.py
+++ b/fluid/adversarial/advbox/attacks/iterator_gradientsign.py
@@ -2,8 +2,12 @@
 This module provide the attack method for Iterator FGSM's implement.
 """
 from __future__ import division
-import numpy as np
+
+import logging
 from collections import Iterable
+
+import numpy as np
+
 from .base import Attack


@@ -13,31 +17,43 @@ class IteratorGradientSignAttack(Attack):
    Paper link: https://arxiv.org/pdf/1607.02533.pdf
    """

-    def _apply(self, image_label, epsilons=100, steps=10):
+    def _apply(self, adversary, epsilons=100, steps=10):
        """
        Apply the iterative gradient sign attack.
        Args:
-            image_label(list): The image and label tuple list of one element.
+            adversary(Adversary): The Adversary object.
            epsilons(list|tuple|int): The epsilon (input variation parameter).
            steps(int): The number of iterator steps.
        Return:
-            numpy.ndarray: The adversarail sample generated by the algorithm.
+            adversary(Adversary): The Adversary object.
        """
-        assert len(image_label) == 1
-        pre_label = np.argmax(self.model.predict(image_label))
-        gradient = self.model.gradient(image_label)
-        min_, max_ = self.model.bounds()

        if not isinstance(epsilons, Iterable):
-            epsilons = np.linspace(0, 1, num=epsilons + 1)
+            epsilons = np.linspace(0, 1 / steps, num=epsilons + 1)[1:]
+
+        pre_label = adversary.original_label
+        min_, max_ = self.model.bounds()

        for epsilon in epsilons:
-            adv_img = image_label[0][0].reshape(gradient.shape)
+            adv_img = adversary.original
            for _ in range(steps):
-                gradient = self.model.gradient([(adv_img, image_label[0][1])])
-                gradient_sign = np.sign(gradient) * (max_ - min_)
-                adv_img = adv_img + epsilon * gradient_sign
+                if adversary.is_targeted_attack:
+                    gradient = self.model.gradient(adversary.original,
+                                                   adversary.target_label)
+                    gradient_sign = -np.sign(gradient) * (max_ - min_)
+                else:
+                    gradient = self.model.gradient(adversary.original,
+                                                   adversary.original_label)
+                    gradient_sign = np.sign(gradient) * (max_ - min_)
+                adv_img = adv_img + gradient_sign * epsilon
                adv_img = np.clip(adv_img, min_, max_)
-                adv_label = np.argmax(self.model.predict([(adv_img, 0)]))
-                if pre_label != adv_label:
-                    return adv_img
+                adv_label = np.argmax(self.model.predict(adv_img))
+                logging.info('epsilon = {:.3f}, pre_label = {}, adv_label={}'.
+                             format(epsilon, pre_label, adv_label))
+                if adversary.try_accept_the_example(adv_img, adv_label):
+                    return adversary
+
+        return adversary
+
+
+IFGSM = IteratorGradientSignAttack
--- a/fluid/adversarial/advbox/models/__init__.py
+++ b/fluid/adversarial/advbox/models/__init__.py
 """
-Paddle model for target of attack 
+Paddle model for target of attack
 """
+from .base import Model
+from .paddle import PaddleModel
--- a/fluid/adversarial/advbox/models/base.py
+++ b/fluid/adversarial/advbox/models/base.py
@@ -2,21 +2,21 @@
 The base model of the model.
 """
 from abc import ABCMeta
-import abc
+from abc import abstractmethod

-abstractmethod = abc.abstractmethod
+import numpy as np


 class Model(object):
    """
    Base class of model to provide attack.

-
    Args:
        bounds(tuple): The lower and upper bound for the image pixel.
-        channel_axis(int): The index of the axis that represents the color channel.
-        preprocess(tuple): Two element tuple used to preprocess the input. First
-            substract the first element, then divide the second element.
+        channel_axis(int): The index of the axis that represents the color
+                channel.
+        preprocess(tuple): Two element tuple used to preprocess the input.
+            First substract the first element, then divide the second element.
    """
    __metaclass__ = ABCMeta

@@ -43,25 +43,32 @@ class Model(object):
        return self._channel_axis

    def _process_input(self, input_):
-        res = input_
+        res = None
        sub, div = self._preprocess
-        if sub != 0:
+        if np.any(sub != 0):
            res = input_ - sub
-        assert div != 0
-        if div != 1:
-            res /= div
+        assert np.any(div != 0)
+        if np.any(div != 1):
+            if res is None:  # "res = input_ - sub" is not executed!
+                res = input_ / div
+            else:
+                res /= div
+        if res is None:  # "res = (input_ - sub)/ div" is not executed!
+            return input_
        return res

    @abstractmethod
-    def predict(self, image_batch):
+    def predict(self, data):
        """
-        Calculate the prediction of the image batch.
+        Calculate the prediction of the data.

        Args:
-            image_batch(numpy.ndarray): image batch of shape (batch_size, height, width, channels).
+            data(numpy.ndarray): input data with shape (size,
+            height, width, channels).

        Return:
-            numpy.ndarray: predictions of the images with shape (batch_size, num_of_classes).
+            numpy.ndarray: predictions of the data with shape (batch_size,
+                num_of_classes).
        """
        raise NotImplementedError

@@ -76,15 +83,17 @@ class Model(object):
        raise NotImplementedError

    @abstractmethod
-    def gradient(self, image_batch):
+    def gradient(self, data, label):
        """
        Calculate the gradient of the cross-entropy loss w.r.t the image.

        Args:
-            image_batch(list): The image and label tuple list.
+            data(numpy.ndarray): input data with shape (size, height, width,
+            channels).
+            label(int): Label used to calculate the gradient.

        Return:
-            numpy.ndarray: gradient of the cross-entropy loss w.r.t the image with
-                the shape (height, width, channel).
+            numpy.ndarray: gradient of the cross-entropy loss w.r.t the image
+                with the shape (height, width, channel).
        """
        raise NotImplementedError
--- a/fluid/adversarial/advbox/models/paddle.py
+++ b/fluid/adversarial/advbox/models/paddle.py
+"""
+Paddle model
+"""
 from __future__ import absolute_import

 import numpy as np
-import paddle.v2 as paddle
 import paddle.v2.fluid as fluid
-from paddle.v2.fluid.framework import program_guard

 from .base import Model

@@ -11,10 +12,12 @@ from .base import Model
 class PaddleModel(Model):
    """
    Create a PaddleModel instance.
-    When you need to generate a adversarial sample, you should construct an instance of PaddleModel.
+    When you need to generate a adversarial sample, you should construct an
+    instance of PaddleModel.

    Args:
-        program(paddle.v2.fluid.framework.Program): The program of the model which generate the adversarial sample.
+        program(paddle.v2.fluid.framework.Program): The program of the model
+            which generate the adversarial sample.
        input_name(string): The name of the input.
        logits_name(string): The name of the logits.
        predict_name(string): The name of the predict.
@@ -30,12 +33,12 @@ class PaddleModel(Model):
                 bounds,
                 channel_axis=3,
                 preprocess=None):
-        super(PaddleModel, self).__init__(
-            bounds=bounds, channel_axis=channel_axis, preprocess=preprocess)
-
        if preprocess is None:
            preprocess = (0, 1)

+        super(PaddleModel, self).__init__(
+            bounds=bounds, channel_axis=channel_axis, preprocess=preprocess)
+
        self._program = program
        self._place = fluid.CPUPlace()
        self._exe = fluid.Executor(self._place)
@@ -49,30 +52,36 @@ class PaddleModel(Model):
        loss = self._program.block(0).var(self._cost_name)
        param_grads = fluid.backward.append_backward(
            loss, parameter_list=[self._input_name])
-        self._gradient = dict(param_grads)[self._input_name]
+        self._gradient = filter(lambda p: p[0].name == self._input_name,
+                                param_grads)[0][1]

-    def predict(self, image_batch):
+    def predict(self, data):
        """
-            Predict the label of the image_batch.
+        Calculate the prediction of the data.

-            Args:
-                image_batch(list): The image and label tuple list.
-            Return:
-                numpy.ndarray: predictions of the images with shape (batch_size, num_of_classes).
+        Args:
+            data(numpy.ndarray): input data with shape (size,
+            height, width, channels).
+
+        Return:
+            numpy.ndarray: predictions of the data with shape (batch_size,
+                num_of_classes).
        """
+        scaled_data = self._process_input(data)
        feeder = fluid.DataFeeder(
            feed_list=[self._input_name, self._logits_name],
            place=self._place,
            program=self._program)
        predict_var = self._program.block(0).var(self._predict_name)
        predict = self._exe.run(self._program,
-                                feed=feeder.feed(image_batch),
+                                feed=feeder.feed([(scaled_data, 0)]),
                                fetch_list=[predict_var])
+        predict = np.squeeze(predict, axis=0)
        return predict

    def num_classes(self):
        """
-            Calculate the number of classes of the output label. 
+            Calculate the number of classes of the output label.

        Return:
            int: the number of classes
@@ -81,21 +90,27 @@ class PaddleModel(Model):
        assert len(predict_var.shape) == 2
        return predict_var.shape[1]

-    def gradient(self, image_batch):
+    def gradient(self, data, label):
        """
-        Calculate the gradient of the loss w.r.t the input.
+        Calculate the gradient of the cross-entropy loss w.r.t the image.

        Args:
-            image_batch(list): The image and label tuple list.
+            data(numpy.ndarray): input data with shape (size, height, width,
+            channels).
+            label(int): Label used to calculate the gradient.
+
        Return:
-            list: The list of the gradient of the image.
+            numpy.ndarray: gradient of the cross-entropy loss w.r.t the image
+                with the shape (height, width, channel).
        """
+        scaled_data = self._process_input(data)
+
        feeder = fluid.DataFeeder(
            feed_list=[self._input_name, self._logits_name],
            place=self._place,
            program=self._program)

        grad, = self._exe.run(self._program,
-                              feed=feeder.feed(image_batch),
+                              feed=feeder.feed([(scaled_data, label)]),
                              fetch_list=[self._gradient])
-        return grad
+        return grad.reshape(data.shape)
--- a/fluid/adversarial/mnist_tutorial_fgsm.py
+++ b/fluid/adversarial/mnist_tutorial_fgsm.py
 """
 FGSM demos on mnist using advbox tool.
 """
+import matplotlib.pyplot as plt
 import paddle.v2 as paddle
 import paddle.v2.fluid as fluid
-import matplotlib.pyplot as plt
-import numpy as np

-from advbox.models.paddle import PaddleModel
+from advbox import Adversary
 from advbox.attacks.gradientsign import GradientSignAttack
+from advbox.models.paddle import PaddleModel


 def cnn_model(img):
@@ -18,7 +18,7 @@ def cnn_model(img):
    Returns:
        Variable: the label prediction
    """
-    #conv1 = fluid.nets.conv2d()
+    # conv1 = fluid.nets.conv2d()
    conv_pool_1 = fluid.nets.simple_img_conv_pool(
        input=img,
        num_filters=20,
@@ -76,10 +76,11 @@ def main():
    att = GradientSignAttack(m)
    for data in train_reader():
        # fgsm attack
-        adv_img = att(data)
-        plt.imshow(n[0][0], cmap='Greys_r')
-        plt.show()
-        #np.save('adv_img', adv_img)
+        adversary = att(Adversary(data[0][0], data[0][1]))
+        if adversary.is_successful():
+            plt.imshow(adversary.target, cmap='Greys_r')
+            plt.show()
+            # np.save('adv_img', adversary.target)
        break



--- a/fluid/image_classification/README.md
+++ b/fluid/image_classification/README.md
+# SE-ResNeXt for image classification
+
+This model built with paddle fluid is still under active development and is not
+the final version. We welcome feedbacks.
--- a/fluid/image_classification/reader.py
+++ b/fluid/image_classification/reader.py
 import os
+import math
 import random
 import functools
 import numpy as np
@@ -7,10 +8,6 @@ from PIL import Image, ImageEnhance

 random.seed(0)

-_R_MEAN = 123.0
-_G_MEAN = 117.0
-_B_MEAN = 104.0
-
 DATA_DIM = 224

 THREAD = 8
@@ -20,7 +17,8 @@ DATA_DIR = 'ILSVRC2012'
 TRAIN_LIST = 'ILSVRC2012/train_list.txt'
 TEST_LIST = 'ILSVRC2012/test_list.txt'

-img_mean = np.array([_R_MEAN, _G_MEAN, _B_MEAN]).reshape((3, 1, 1))
+img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
+img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))


 def resize_short(img, target_size):
@@ -46,6 +44,36 @@ def crop_image(img, target_size, center):
    return img


+def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]):
+    aspect_ratio = math.sqrt(random.uniform(*ratio))
+    w = 1. * aspect_ratio
+    h = 1. / aspect_ratio
+
+    bound = min((float(img.size[0]) / img.size[1]) / (w**2),
+                (float(img.size[1]) / img.size[0]) / (h**2))
+    scale_max = min(scale[1], bound)
+    scale_min = min(scale[0], bound)
+
+    target_area = img.size[0] * img.size[1] * random.uniform(scale_min,
+                                                             scale_max)
+    target_size = math.sqrt(target_area)
+    w = int(target_size * w)
+    h = int(target_size * h)
+
+    i = random.randint(0, img.size[0] - w)
+    j = random.randint(0, img.size[1] - h)
+
+    img = img.crop((i, j, i + w, j + h))
+    img = img.resize((size, size), Image.LANCZOS)
+    return img
+
+
+def rotate_image(img):
+    angle = random.randint(-10, 10)
+    img = img.rotate(angle)
+    return img
+
+
 def distort_color(img):
    def random_brightness(img, lower=0.5, upper=1.5):
        e = random.uniform(lower, upper)
@@ -69,25 +97,28 @@ def distort_color(img):
    return img


-def process_image(sample, mode):
+def process_image(sample, mode, color_jitter, rotate):
    img_path = sample[0]

    img = Image.open(img_path)
    if mode == 'train':
-        img = resize_short(img, DATA_DIM + 32)
+        if rotate: img = rotate_image(img)
+        img = random_crop(img, DATA_DIM)
    else:
        img = resize_short(img, DATA_DIM)
-    img = crop_image(img, target_size=DATA_DIM, center=(mode != 'train'))
+        img = crop_image(img, target_size=DATA_DIM, center=True)
    if mode == 'train':
-        img = distort_color(img)
+        if color_jitter:
+            img = distort_color(img)
        if random.randint(0, 1) == 1:
            img = img.transpose(Image.FLIP_LEFT_RIGHT)

    if img.mode != 'RGB':
        img = img.convert('RGB')

-    img = np.array(img).astype('float32').transpose((2, 0, 1))
+    img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255
    img -= img_mean
+    img /= img_std

    if mode == 'train' or mode == 'test':
        return img, sample[1]
@@ -95,7 +126,11 @@ def process_image(sample, mode):
        return img


-def _reader_creator(file_list, mode, shuffle=False):
+def _reader_creator(file_list,
+                    mode,
+                    shuffle=False,
+                    color_jitter=False,
+                    rotate=False):
    def reader():
        with open(file_list) as flist:
            lines = [line.strip() for line in flist]
@@ -110,13 +145,15 @@ def _reader_creator(file_list, mode, shuffle=False):
                    img_path = os.path.join(DATA_DIR, line)
                    yield [img_path]

-    mapper = functools.partial(process_image, mode=mode)
+    mapper = functools.partial(
+        process_image, mode=mode, color_jitter=color_jitter, rotate=rotate)

    return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE)


 def train():
-    return _reader_creator(TRAIN_LIST, 'train', shuffle=True)
+    return _reader_creator(
+        TRAIN_LIST, 'train', shuffle=True, color_jitter=True, rotate=True)


 def test():

--- a/fluid/image_classification/se_resnext.py
+++ b/fluid/image_classification/se_resnext.py
 import os
-
 import paddle.v2 as paddle
 import paddle.v2.fluid as fluid
 import reader
@@ -35,7 +34,11 @@ def squeeze_excitation(input, num_channels, reduction_ratio):
 def shortcut(input, ch_out, stride):
    ch_in = input.shape[1]
    if ch_in != ch_out:
-        return conv_bn_layer(input, ch_out, 3, stride)
+        if stride == 1:
+            filter_size = 1
+        else:
+            filter_size = 3
+        return conv_bn_layer(input, ch_out, filter_size, stride)
    else:
        return input

@@ -75,7 +78,7 @@ def SE_ResNeXt(input, class_dim, infer=False):
    conv = conv_bn_layer(
        input=conv, num_filters=128, filter_size=3, stride=1, act='relu')
    conv = fluid.layers.pool2d(
-        input=conv, pool_size=3, pool_stride=2, pool_type='max')
+        input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')

    for block in range(len(depth)):
        for i in range(depth[block]):
@@ -96,7 +99,11 @@ def SE_ResNeXt(input, class_dim, infer=False):
    return out


-def train(learning_rate, batch_size, num_passes, model_save_dir='model'):
+def train(learning_rate,
+          batch_size,
+          num_passes,
+          init_model=None,
+          model_save_dir='model'):
    class_dim = 1000
    image_shape = [3, 224, 224]

@@ -109,9 +116,9 @@ def train(learning_rate, batch_size, num_passes, model_save_dir='model'):
    avg_cost = fluid.layers.mean(x=cost)

    optimizer = fluid.optimizer.Momentum(
-        learning_rate=learning_rate / batch_size,
+        learning_rate=learning_rate,
        momentum=0.9,
-        regularization=fluid.regularizer.L2Decay(1e-4 * batch_size))
+        regularization=fluid.regularizer.L2Decay(1e-4))
    opts = optimizer.minimize(avg_cost)
    accuracy = fluid.evaluator.Accuracy(input=out, label=label)

@@ -125,6 +132,9 @@ def train(learning_rate, batch_size, num_passes, model_save_dir='model'):
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

+    if init_model is not None:
+        fluid.io.load_persistables_if_exist(exe, init_model)
+
    train_reader = paddle.batch(reader.train(), batch_size=batch_size)
    test_reader = paddle.batch(reader.test(), batch_size=batch_size)
    feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
@@ -141,16 +151,18 @@ def train(learning_rate, batch_size, num_passes, model_save_dir='model'):

        test_accuracy.reset(exe)
        for data in test_reader():
-            out, acc = exe.run(inference_program,
-                               feed=feeder.feed(data),
-                               fetch_list=[avg_cost] + test_accuracy.metrics)
+            loss, acc = exe.run(inference_program,
+                                feed=feeder.feed(data),
+                                fetch_list=[avg_cost] + test_accuracy.metrics)
        test_pass_acc = test_accuracy.eval(exe)
        print("End pass {0}, train_acc {1}, test_acc {2}".format(
            pass_id, pass_acc, test_pass_acc))

        model_path = os.path.join(model_save_dir, str(pass_id))
-        fluid.io.save_inference_model(model_path, ['image'], [out], exe)
+        if not os.path.isdir(model_path):
+            os.makedirs(model_path)
+        fluid.io.save_persistables(exe, model_path)


 if __name__ == '__main__':
-    train(learning_rate=0.1, batch_size=7, num_passes=100)
+    train(learning_rate=0.1, batch_size=8, num_passes=100, init_model=None)
--- a/fluid/ocr_recognition/ctc_reader.py
+++ b/fluid/ocr_recognition/ctc_reader.py
+import os
+import cv2
+import numpy as np
+from PIL import Image
+
+from paddle.v2.image import load_image
+
+
+class DataGenerator(object):
+    def __init__(self):
+        pass
+
+    def train_reader(self, img_root_dir, img_label_list, batchsize):
+        '''
+        Reader interface for training.
+
+        :param img_root_dir: The root path of the image for training.
+        :type file_list: str 
+
+        :param img_label_list: The path of the <image_name, label> file for training.
+        :type file_list: str 
+
+        '''
+
+        img_label_lines = []
+        if batchsize == 1:
+            to_file = "tmp.txt"
+            cmd = "cat " + img_label_list + " | awk '{print $1,$2,$3,$4;}' | shuf > " + to_file
+            print "cmd: " + cmd
+            os.system(cmd)
+            print "finish batch shuffle"
+            img_label_lines = open(to_file, 'r').readlines()
+        else:
+            to_file = "tmp.txt"
+            #cmd1: partial shuffle
+            cmd = "cat " + img_label_list + " | awk '{printf(\"%04d%.4f %s\\n\", $1, rand(), $0)}' | sort | sed 1,$((1 + RANDOM % 100))d | "
+            #cmd2: batch merge and shuffle
+            cmd += "awk '{printf $2\" \"$3\" \"$4\" \"$5\" \"; if(NR % " + str(
+                batchsize) + " == 0) print \"\";}' | shuf | "
+            #cmd3: batch split
+            cmd += "awk '{if(NF == " + str(
+                batchsize
+            ) + " * 4) {for(i = 0; i < " + str(
+                batchsize
+            ) + "; i++) print $(4*i+1)\" \"$(4*i+2)\" \"$(4*i+3)\" \"$(4*i+4);}}' > " + to_file
+            print "cmd: " + cmd
+            os.system(cmd)
+            print "finish batch shuffle"
+            img_label_lines = open(to_file, 'r').readlines()
+
+        def reader():
+            sizes = len(img_label_lines) / batchsize
+            for i in range(sizes):
+                result = []
+                sz = [0, 0]
+                for j in range(batchsize):
+                    line = img_label_lines[i * batchsize + j]
+                    # h, w, img_name, labels
+                    items = line.split(' ')
+
+                    label = [int(c) for c in items[-1].split(',')]
+                    img = Image.open(os.path.join(img_root_dir, items[
+                        2])).convert('L')  #zhuanhuidu
+                    if j == 0:
+                        sz = img.size
+                    img = img.resize((sz[0], sz[1]))
+                    img = np.array(img) - 127.5
+                    img = img[np.newaxis, ...]
+                    result.append([img, label])
+                yield result
+
+        return reader
+
+    def test_reader(self, img_root_dir, img_label_list):
+        '''
+        Reader interface for inference.
+
+        :param img_root_dir: The root path of the images for training.
+        :type file_list: str 
+
+        :param img_label_list: The path of the <image_name, label> file for testing.
+        :type file_list: list
+        '''
+
+        def reader():
+            for line in open(img_label_list):
+                # h, w, img_name, labels
+                items = line.split(' ')
+
+                label = [int(c) for c in items[-1].split(',')]
+                img = Image.open(os.path.join(img_root_dir, items[2])).convert(
+                    'L')
+                img = np.array(img) - 127.5
+                img = img[np.newaxis, ...]
+                yield img, label
+
+        return reader
--- a/fluid/text_classification/README.md
+++ b/fluid/text_classification/README.md
+# Text Classification
+
+## Data Preparation
+```
+wget http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz
+tar zxf aclImdb_v1.tar.gz
+```
+
+## Training
+```
+python train.py --dict_path 'aclImdb/imdb.vocab'
+```
--- a/fluid/text_classification/config.py
+++ b/fluid/text_classification/config.py
+class TrainConfig(object):
+
+    # Whether to use GPU in training or not.
+    use_gpu = False
+
+    # The training batch size.
+    batch_size = 4
+
+    # The epoch number.
+    num_passes = 30
+
+    # The global learning rate.
+    learning_rate = 0.01
+
+    # Training log will be printed every log_period.
+    log_period = 100
--- a/fluid/text_classification/train.py
+++ b/fluid/text_classification/train.py
+import numpy as np
+import sys
+import os
+import argparse
+import time
+
+import paddle.v2 as paddle
+import paddle.v2.fluid as fluid
+
+from config import TrainConfig as conf
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--dict_path',
+        type=str,
+        required=True,
+        help="Path of the word dictionary.")
+    return parser.parse_args()
+
+
+# Define to_lodtensor function to process the sequential data.
+def to_lodtensor(data, place):
+    seq_lens = [len(seq) for seq in data]
+    cur_len = 0
+    lod = [cur_len]
+    for l in seq_lens:
+        cur_len += l
+        lod.append(cur_len)
+    flattened_data = np.concatenate(data, axis=0).astype("int64")
+    flattened_data = flattened_data.reshape([len(flattened_data), 1])
+    res = fluid.LoDTensor()
+    res.set(flattened_data, place)
+    res.set_lod([lod])
+    return res
+
+
+# Load the dictionary.
+def load_vocab(filename):
+    vocab = {}
+    with open(filename) as f:
+        for idx, line in enumerate(f):
+            vocab[line.strip()] = idx
+    return vocab
+
+
+# Define the convolution model.
+def conv_net(dict_dim,
+             window_size=3,
+             emb_dim=128,
+             num_filters=128,
+             fc0_dim=96,
+             class_dim=2):
+
+    data = fluid.layers.data(
+        name="words", shape=[1], dtype="int64", lod_level=1)
+
+    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+
+    emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
+
+    conv_3 = fluid.nets.sequence_conv_pool(
+        input=emb,
+        num_filters=num_filters,
+        filter_size=window_size,
+        act="tanh",
+        pool_type="max")
+
+    fc_0 = fluid.layers.fc(input=[conv_3], size=fc0_dim)
+
+    prediction = fluid.layers.fc(input=[fc_0], size=class_dim, act="softmax")
+
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+
+    avg_cost = fluid.layers.mean(x=cost)
+
+    return data, label, prediction, avg_cost
+
+
+def main(dict_path):
+    word_dict = load_vocab(dict_path)
+    word_dict["<unk>"] = len(word_dict)
+    dict_dim = len(word_dict)
+    print("The dictionary size is : %d" % dict_dim)
+
+    data, label, prediction, avg_cost = conv_net(dict_dim)
+
+    sgd_optimizer = fluid.optimizer.SGD(learning_rate=conf.learning_rate)
+    sgd_optimizer.minimize(avg_cost)
+
+    accuracy = fluid.evaluator.Accuracy(input=prediction, label=label)
+
+    inference_program = fluid.default_main_program().clone()
+    with fluid.program_guard(inference_program):
+        test_target = accuracy.metrics + accuracy.states
+        inference_program = fluid.io.get_inference_program(test_target)
+
+    # The training data set.
+    train_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.imdb.train(word_dict), buf_size=51200),
+        batch_size=conf.batch_size)
+
+    # The testing data set.
+    test_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.imdb.test(word_dict), buf_size=51200),
+        batch_size=conf.batch_size)
+
+    if conf.use_gpu:
+        place = fluid.CUDAPlace(0)
+    else:
+        place = fluid.CPUPlace()
+
+    exe = fluid.Executor(place)
+
+    feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
+
+    exe.run(fluid.default_startup_program())
+
+    def test(exe):
+        accuracy.reset(exe)
+        for batch_id, data in enumerate(test_reader()):
+            input_seq = to_lodtensor(map(lambda x: x[0], data), place)
+            y_data = np.array(map(lambda x: x[1], data)).astype("int64")
+            y_data = y_data.reshape([-1, 1])
+            acc = exe.run(inference_program,
+                          feed={"words": input_seq,
+                                "label": y_data})
+        test_acc = accuracy.eval(exe)
+        return test_acc
+
+    total_time = 0.
+    for pass_id in xrange(conf.num_passes):
+        accuracy.reset(exe)
+        start_time = time.time()
+        for batch_id, data in enumerate(train_reader()):
+            cost_val, acc_val = exe.run(
+                fluid.default_main_program(),
+                feed=feeder.feed(data),
+                fetch_list=[avg_cost, accuracy.metrics[0]])
+            pass_acc = accuracy.eval(exe)
+            if batch_id and batch_id % conf.log_period == 0:
+                print("Pass id: %d, batch id: %d, cost: %f, pass_acc %f" %
+                      (pass_id, batch_id, cost_val, pass_acc))
+        end_time = time.time()
+        total_time += (end_time - start_time)
+        pass_test_acc = test(exe)
+        print("Pass id: %d, test_acc: %f" % (pass_id, pass_test_acc))
+    print("Total train time: %f" % (total_time))
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    main(args.dict_path)
--- a/nce_cost/README.md
+++ b/nce_cost/README.md
@@ -106,7 +106,6 @@ NCE 层的一些重要参数解释如下：
 | param\_attr / bias\_attr | 用来设置参数名字 |方便预测阶段加载参数，具体在预测一节中介绍。|
 | num\_neg\_samples | 负样本采样个数|可以控制正负样本比例，这个值取值区间为 [1, 字典大小-1]，负样本个数越多则整个模型的训练速度越慢，模型精度也会越高 |
 | neg\_distribution | 生成负样例标签的分布，默认是一个均匀分布| 可以自行控制负样本采样时各个类别的采样权重。例如：希望正样例为“晴天”时，负样例“洪水”在训练时更被着重区分，则可以将“洪水”这个类别的采样权重增加|
-| act | 使用何种激活函数| 根据 NCE 的原理，这里应该使用 sigmoid 函数 |

 ## 预测
 1. 在命令行运行 :

--- a/ssd/README.cn.md
+++ b/ssd/README.cn.md
@@ -82,7 +82,7 @@ SSD使用一个卷积神经网络实现“端到端”的检测：输入为原
    文件共两个字段，第一个字段为图像文件的相对路径，第二个字段为对应标注文件的相对路径。

 ### 预训练模型准备
-下载预训练的VGG-16模型，我们提供了一个转换好的模型，下载好模型后，放置路径为```vgg/vgg_model.tar.gz```。
+下载预训练的VGG-16模型，我们提供了一个转换好的模型，下载模型[http://paddlepaddle.bj.bcebos.com/model_zoo/detection/ssd_model/vgg_model.tar.gz](http://paddlepaddle.bj.bcebos.com/model_zoo/detection/ssd_model/vgg_model.tar.gz)，并将其放置路径为```vgg/vgg_model.tar.gz```。

 ### 模型训练
 直接执行```python train.py```即可进行训练。需要注意本示例仅支持CUDA GPU环境，无法在CPU上训练，主要因为使用CPU训练速度很慢，实践中一般使用GPU来处理图像任务，这里实现采用硬编码方式使用cuDNN，不提供CPU版本。```train.py```的一些关键执行逻辑：

--- a/text_classification/train.py
+++ b/text_classification/train.py
@@ -46,7 +46,7 @@ def train(topology,
        word_dict = paddle.dataset.imdb.word_dict()
        train_reader = paddle.batch(
            paddle.reader.shuffle(
-                lambda: paddle.dataset.imdb.train(word_dict)(), buf_size=1000),
+                lambda: paddle.dataset.imdb.train(word_dict)(), buf_size=51200),
            batch_size=100)
        test_reader = paddle.batch(
            lambda: paddle.dataset.imdb.test(word_dict)(), batch_size=100)
@@ -83,16 +83,14 @@ def train(topology,
        train_reader = paddle.batch(
            paddle.reader.shuffle(
                reader.train_reader(train_data_dir, word_dict, lbl_dict),
-                buf_size=1000),
+                buf_size=51200),
            batch_size=batch_size)

        if test_data_dir is not None:
            # here, because training and testing data share a same format,
            # we still use the reader.train_reader to read the testing data.
            test_reader = paddle.batch(
-                paddle.reader.shuffle(
-                    reader.train_reader(test_data_dir, word_dict, lbl_dict),
-                    buf_size=1000),
+                reader.train_reader(test_data_dir, word_dict, lbl_dict),
                batch_size=batch_size)
        else:
            test_reader = None

--- a/youtube_recall/README.cn.md
+++ b/youtube_recall/README.cn.md
--- a/youtube_recall/README.md
+++ b/youtube_recall/README.md
--- a/youtube_recall/data/data.tar
+++ b/youtube_recall/data/data.tar
--- a/youtube_recall/data_processor.py
+++ b/youtube_recall/data_processor.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import sys
+import argparse
+import os
+import cPickle
+
+from utils import logger
+"""
+This script will output 2 files:
+1. feature_dict.pkl
+2. item_freq.pkl
+"""
+
+
+class FeatureGenerator(object):
+    """
+    Encode feature values with low-frequency filtering.
+    """
+
+    def __init__(self, feat_appear_limit=20):
+        """
+        @feat_appear_limit: int
+        """
+        self._dic = None  # feature value --> id
+        self._count = None  # numbers of appearances of feature values
+        self._feat_appear_limit = feat_appear_limit
+
+    def add_feat_val(self, feat_val):
+        """
+        Add feature values and count numbers of its appearance. 
+        """
+        if self._count is None:
+            self._count = {'<unk>': 0}
+        if feat_val == "NULL":
+            feat_val = '<unk>'
+        if feat_val not in self._count:
+            self._count[feat_val] = 1
+        else:
+            self._count[feat_val] += 1
+            self._count['<unk>'] += 1
+
+    def _filter_feat(self):
+        """
+        Filter low-frequency feature values.
+        """
+        self._items = filter(lambda x: x[1] > self._feat_appear_limit,
+                             self._count.items())
+        self._items.sort(key=lambda x: x[1], reverse=True)
+
+    def _build_dict(self):
+        """
+        Build feature values --> ids dict.
+        """
+        self._dic = {}
+        self._filter_feat()
+        for i in xrange(len(self._items)):
+            self._dic[self._items[i][0]] = i
+        self.dim = len(self._dic)
+
+    def get_feat_id(self, feat_val):
+        """
+        Get id of feature value after encoding.
+        """
+        # build dict
+        if self._dic is None:
+            self._build_dict()
+
+        # find id
+        if feat_val in self._dic:
+            return self._dic[feat_val]
+        else:
+            return self._dic['<unk>']
+
+    def get_dim(self):
+        """
+        Get dim.
+        """
+        # build dict
+        if self._dic is None:
+            self._build_dict()
+        return len(self._dic)
+
+    def get_dict(self):
+        """
+        Get dict.
+        """
+        # build dict
+        if self._dic is None:
+            self._build_dict()
+        return self._dic
+
+    def get_total_count(self):
+        """
+        Compute total num of count.
+        """
+        total_count = 0
+        for i in xrange(len(self._items)):
+            feat_val = self._items[i][0]
+            c = self._items[i][1]
+            total_count += c
+        return total_count
+
+    def count_iterator(self):
+        """
+        Iterate feature values and its num of appearance.
+        """
+        for i in xrange(len(self._items)):
+            yield self._items[i][0], self._items[i][1]
+
+    def __repr__(self):
+        """
+        """
+        return '<FeatureGenerator %d>' % self._dim
+
+
+def scan_build_dict(data_path, features_dict):
+    """
+    Scan the raw data and add all feature values.
+    """
+    logger.info('scan data set')
+
+    with open(data_path, 'r') as f:
+        for (line_id, line) in enumerate(f):
+            fields = line.strip('\n').split('\t')
+            user_id = fields[0]
+            province = fields[1]
+            features_dict['province'].add_feat_val(province)
+            city = fields[2]
+            features_dict['city'].add_feat_val(city)
+            item_infos = fields[3]
+            phone = fields[4]
+            features_dict['phone'].add_feat_val(phone)
+            for item_info in item_infos.split(";"):
+                item_info_array = item_info.split(":")
+                item = item_info_array[0]
+                features_dict['history_clicked_items'].add_feat_val(item)
+                features_dict['user_id'].add_feat_val(user_id)
+                category = item_info_array[1]
+                features_dict['history_clicked_categories'].add_feat_val(
+                    category)
+                tags = item_info_array[2]
+                for tag in tags.split("_"):
+                    features_dict['history_clicked_tags'].add_feat_val(tag)
+
+
+def parse_args():
+    """
+    parse arguments
+    """
+    parser = argparse.ArgumentParser(
+        description="PaddlePaddle Youtube Recall Model Example")
+    parser.add_argument(
+        '--train_set_path',
+        type=str,
+        required=True,
+        help="path of the train set")
+    parser.add_argument(
+        '--output_dir', type=str, required=True, help="directory to output")
+    parser.add_argument(
+        '--feat_appear_limit',
+        type=int,
+        default=20,
+        help="the minimum number of feature values appears (default: 20)")
+    return parser.parse_args()
+
+
+if __name__ == '__main__':
+    args = parse_args()
+
+    # check argument
+    assert os.path.exists(
+        args.train_set_path), 'The train set path does not exist.'
+
+    # features used
+    features = [
+        'user_id', 'province', 'city', 'phone', 'history_clicked_items',
+        'history_clicked_tags', 'history_clicked_categories'
+    ]
+
+    # init feature generators
+    features_dict = {}
+    for feature in features:
+        features_dict[feature] = FeatureGenerator(
+            feat_appear_limit=args.feat_appear_limit)
+
+    # scan data for building dict
+    scan_build_dict(args.train_set_path, features_dict)
+
+    # generate feature_dict.pkl
+    feature_encoding_dict = {}
+    for feature in features:
+        d = features_dict[feature].get_dict()
+        feature_encoding_dict[feature] = d
+        logger.info('Feature:%s, dimension is %d' % (feature, len(d)))
+    output_dict_path = os.path.join(args.output_dir, 'feature_dict.pkl')
+    with open(output_dict_path, "w") as f:
+        cPickle.dump(feature_encoding_dict, f, -1)
+
+    # generate item_freq.pkl
+    item_freq_list = []
+    g = features_dict['history_clicked_items']
+    total_count = g.get_total_count()
+    for feat_val, feat_count in g.count_iterator():
+        item_freq_list.append(float(feat_count) / total_count)
+    logger.info('item_freq, dimension is %d' % (len(item_freq_list)))
+    output_item_freq_path = os.path.join(args.output_dir, 'item_freq.pkl')
+    with open(output_item_freq_path, "w") as f:
+        cPickle.dump(item_freq_list, f, -1)
+
+    logger.info('Complete!')
--- a/youtube_recall/images/model_network.png
+++ b/youtube_recall/images/model_network.png
--- a/youtube_recall/images/recommendation_system.png
+++ b/youtube_recall/images/recommendation_system.png
--- a/youtube_recall/infer.py
+++ b/youtube_recall/infer.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+import gzip
+import paddle.v2 as paddle
+import argparse
+import cPickle
+
+from reader import Reader
+from network_conf import DNNmodel
+from utils import logger
+
+
+def parse_args():
+    """
+    parse arguments
+    :return:
+    """
+    parser = argparse.ArgumentParser(
+        description="PaddlePaddle Youtube Recall Model Example")
+    parser.add_argument(
+        '--infer_set_path',
+        type=str,
+        required=True,
+        help="path of the infer set")
+    parser.add_argument(
+        '--model_path', type=str, required=True, help="path of the model")
+    parser.add_argument(
+        '--feature_dict',
+        type=str,
+        required=True,
+        help="path of feature_dict.pkl")
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=50,
+        help="size of mini-batch (default:50)")
+    return parser.parse_args()
+
+
+def infer():
+    """
+    infer
+    """
+    args = parse_args()
+
+    # check argument
+    assert os.path.exists(
+        args.infer_set_path), 'The infer_set_path path does not exist.'
+    assert os.path.exists(
+        args.model_path), 'The model_path path does not exist.'
+    assert os.path.exists(
+        args.feature_dict), 'The feature_dict path does not exist.'
+
+    paddle.init(use_gpu=False, trainer_count=1)
+
+    with open(args.feature_dict) as f:
+        feature_dict = cPickle.load(f)
+
+    nid_dict = feature_dict['history_clicked_items']
+    nid_to_word = dict((v, k) for k, v in nid_dict.items())
+
+    # load the trained model.
+    with gzip.open(args.model_path) as f:
+        parameters = paddle.parameters.Parameters.from_tar(f)
+
+    # build model
+    prediction_layer, fc = DNNmodel(
+        dnn_layer_dims=[256, 31], feature_dict=feature_dict,
+        is_infer=True).model_cost
+    inferer = paddle.inference.Inference(
+        output_layer=[prediction_layer, fc], parameters=parameters)
+
+    reader = Reader(feature_dict)
+    test_batch = []
+    for idx, item in enumerate(reader.infer(args.infer_set_path)):
+        test_batch.append(item)
+        if len(test_batch) == args.batch_size:
+            infer_a_batch(inferer, test_batch, nid_to_word)
+            test_batch = []
+    if len(test_batch):
+        infer_a_batch(inferer, test_batch, nid_to_word)
+
+
+def infer_a_batch(inferer, test_batch, nid_to_word):
+    """
+    input a batch of data and infer 
+    """
+    feeding = {
+        'user_id': 0,
+        'province': 1,
+        'city': 2,
+        'history_clicked_items': 3,
+        'history_clicked_categories': 4,
+        'history_clicked_tags': 5,
+        'phone': 6
+    }
+    probs = inferer.infer(
+        input=test_batch,
+        feeding=feeding,
+        field=["value"],
+        flatten_result=False)
+    for i, res in enumerate(zip(test_batch, probs[0], probs[1])):
+        softmax_output = res[1]
+        sort_nid = res[1].argsort()
+        # print top 30 recommended item 
+        ret = ""
+        for j in range(1, 30):
+            item_id = sort_nid[-1 * j]
+            item_id_to_word = nid_to_word[item_id]
+            ret += "%s:%.6f," \
+                    % (item_id_to_word, softmax_output[item_id])
+
+        print ret.rstrip(",")
+
+
+if __name__ == "__main__":
+    infer()
--- a/youtube_recall/infer_user.py
+++ b/youtube_recall/infer_user.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+import gzip
+import paddle.v2 as paddle
+import argparse
+import cPickle
+
+from reader import Reader
+from network_conf import DNNmodel
+from utils import logger
+import numpy as np
+
+
+def parse_args():
+    """
+    parse arguments
+    :return:
+    """
+    parser = argparse.ArgumentParser(
+        description="PaddlePaddle Youtube Recall Model Example")
+    parser.add_argument(
+        '--model_path', type=str, required=True, help="path of the model")
+    parser.add_argument(
+        '--feature_dict',
+        type=str,
+        required=True,
+        help="path of feature_dict.pkl")
+    return parser.parse_args()
+
+
+def infer_user():
+    """
+    infer_user
+    """
+    args = parse_args()
+
+    # check argument
+    assert os.path.exists(
+        args.model_path), 'The model_path path does not exist.'
+    assert os.path.exists(
+        args.feature_dict), 'The feature_dict path does not exist.'
+
+    paddle.init(use_gpu=False, trainer_count=1)
+
+    with open(args.feature_dict) as f:
+        feature_dict = cPickle.load(f)
+
+    nid_dict = feature_dict['history_clicked_items']
+    nid_to_word = dict((v, k) for k, v in nid_dict.items())
+
+    # load the trained model.
+    with gzip.open(args.model_path) as f:
+        parameters = paddle.parameters.Parameters.from_tar(f)
+    parameters.set('_proj_province', \
+            np.zeros(shape=parameters.get('_proj_province').shape))
+    parameters.set('_proj_city', \
+            np.zeros(shape=parameters.get('_proj_city').shape))
+    parameters.set('_proj_phone', \
+            np.zeros(shape=parameters.get('_proj_phone').shape))
+    parameters.set('_proj_history_clicked_items', \
+            np.zeros(shape= parameters.get('_proj_history_clicked_items').shape))
+    parameters.set('_proj_history_clicked_categories', \
+            np.zeros(shape= parameters.get('_proj_history_clicked_categories').shape))
+    parameters.set('_proj_history_clicked_tags', \
+            np.zeros(shape= parameters.get('_proj_history_clicked_tags').shape))
+
+    # build model
+    prediction_layer, fc = DNNmodel(
+        dnn_layer_dims=[256, 31], feature_dict=feature_dict,
+        is_infer=True).model_cost
+    inferer = paddle.inference.Inference(
+        output_layer=[prediction_layer, fc], parameters=parameters)
+
+    reader = Reader(feature_dict)
+    test_batch = []
+    for idx, item in enumerate(
+            reader.infer_user(['USER_ID_0', 'USER_ID_981', 'USER_ID_310806'])):
+        test_batch.append(item)
+    infer_a_batch(inferer, test_batch, nid_to_word)
+
+
+def infer_a_batch(inferer, test_batch, nid_to_word):
+    """
+    input a batch of data and infer 
+    """
+    feeding = {
+        'user_id': 0,
+        'province': 1,
+        'city': 2,
+        'history_clicked_items': 3,
+        'history_clicked_categories': 4,
+        'history_clicked_tags': 5,
+        'phone': 6
+    }
+    probs = inferer.infer(
+        input=test_batch,
+        feeding=feeding,
+        field=["value"],
+        flatten_result=False)
+    for i, res in enumerate(zip(test_batch, probs[0], probs[1])):
+        softmax_output = res[1]
+        sort_nid = res[1].argsort()
+
+        # print top 30 recommended item 
+        ret = ""
+        for j in range(1, 30):
+            item_id = sort_nid[-1 * j]
+            item_id_to_word = nid_to_word[item_id]
+            ret += "%s:%.6f," \
+                    % (item_id_to_word, softmax_output[item_id])
+        print ret.rstrip(",")
+
+
+if __name__ == "__main__":
+    infer_user()
--- a/youtube_recall/item_vector.py
+++ b/youtube_recall/item_vector.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+import gzip
+import paddle.v2 as paddle
+import argparse
+import cPickle
+
+from reader import Reader
+from network_conf import DNNmodel
+from utils import logger
+import numpy as np
+import math
+
+
+def parse_args():
+    """
+    parse arguments
+    :return:
+    """
+    parser = argparse.ArgumentParser(
+        description="PaddlePaddle Youtube Recall Model Example")
+    parser.add_argument(
+        '--model_path', type=str, required=True, help="path of the model")
+    parser.add_argument(
+        '--feature_dict',
+        type=str,
+        required=True,
+        help="path of feature_dict.pkl")
+    return parser.parse_args()
+
+
+def get_item_vec_from_softmax(nce_w, nce_b):
+    """
+    get item vectors from softmax parameter 
+    """
+    if nce_w is None or nce_b is None:
+        return None
+    vector = []
+    total_items_num = nce_w.shape[0]
+    if total_items_num != nce_b.shape[1]:
+        return None
+    dim_vector = nce_w.shape[1] + 1
+    for i in range(0, total_items_num):
+        vector.append([])
+        vector[i].append(nce_b[0][i])
+        for j in range(1, dim_vector):
+            vector[i].append(nce_w[i][j - 1])
+    return vector
+
+
+def convt_simple_lsh(vector):
+    """
+    do simple lsh conversion
+    """
+    max_norm = 0
+    num_of_vec = len(vector)
+    for i in range(0, num_of_vec):
+        norm = np.linalg.norm(vector[i])
+        if norm > max_norm:
+            max_norm = norm
+    for i in range(0, num_of_vec):
+        vector[i].append(
+            math.sqrt(
+                math.pow(max_norm, 2) - math.pow(np.linalg.norm(vector[i]), 2)))
+    return vector
+
+
+def item_vector():
+    """
+    get item vectors
+    """
+    args = parse_args()
+
+    # check argument
+    assert os.path.exists(
+        args.model_path), 'The model_path path does not exist.'
+    assert os.path.exists(
+        args.feature_dict), 'The feature_dict path does not exist.'
+
+    paddle.init(use_gpu=False, trainer_count=1)
+
+    with open(args.feature_dict) as f:
+        feature_dict = cPickle.load(f)
+
+    # load the trained model.
+    with gzip.open(args.model_path) as f:
+        parameters = paddle.parameters.Parameters.from_tar(f)
+
+    nid_dict = feature_dict['history_clicked_items']
+    nid_to_word = dict((v, k) for k, v in nid_dict.items())
+
+    nce_w = parameters.get("nce_w")
+    nce_b = parameters.get("nce_b")
+    item_vector = convt_simple_lsh(get_item_vec_from_softmax(nce_w, nce_b))
+    for i in range(0, len(item_vector)):
+        itemid = nid_to_word[i]
+        print itemid + "\t" + ",".join(map(str, item_vector[i]))
+
+
+if __name__ == "__main__":
+    item_vector()
--- a/youtube_recall/network_conf.py
+++ b/youtube_recall/network_conf.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import paddle.v2 as paddle
+import cPickle
+
+
+class DNNmodel(object):
+    """
+    Deep Neural Networks for YouTube candidate generation
+    """
+
+    def __init__(self,
+                 dnn_layer_dims=None,
+                 feature_dict=None,
+                 item_freq=None,
+                 is_infer=False):
+        """
+        initialize model
+        @dnn_layer_dims: dimension of each hidden layer
+        @feature_dict: dictionary of encoded feature
+        @item_freq: dictionary of feature values and its frequency
+        @is_infer: if infer mode
+        """
+        self._dnn_layer_dims = dnn_layer_dims
+        self._feature_dict = feature_dict
+        self._item_freq = item_freq
+
+        self._is_infer = is_infer
+
+        # build model
+        self._build_input_layer()
+        self._build_embedding_layer()
+        self.model_cost = self._build_dnn_model()
+
+    def _build_input_layer(self):
+        """
+        build input layer
+        """
+        self._history_clicked_items = paddle.layer.data(
+            name="history_clicked_items",
+            type=paddle.data_type.integer_value_sequence(
+                len(self._feature_dict['history_clicked_items'])))
+        self._history_clicked_categories = paddle.layer.data(
+            name="history_clicked_categories",
+            type=paddle.data_type.integer_value_sequence(
+                len(self._feature_dict['history_clicked_categories'])))
+        self._history_clicked_tags = paddle.layer.data(
+            name="history_clicked_tags",
+            type=paddle.data_type.integer_value_sequence(
+                len(self._feature_dict['history_clicked_tags'])))
+        self._user_id = paddle.layer.data(
+            name="user_id",
+            type=paddle.data_type.integer_value(
+                len(self._feature_dict['user_id'])))
+        self._province = paddle.layer.data(
+            name="province",
+            type=paddle.data_type.integer_value(
+                len(self._feature_dict['province'])))
+        self._city = paddle.layer.data(
+            name="city",
+            type=paddle.data_type.integer_value(
+                len(self._feature_dict['city'])))
+        self._phone = paddle.layer.data(
+            name="phone",
+            type=paddle.data_type.integer_value(
+                len(self._feature_dict['phone'])))
+        self._target_item = paddle.layer.data(
+            name="target_item",
+            type=paddle.data_type.integer_value(
+                len(self._feature_dict['history_clicked_items'])))
+
+    def _create_emb_attr(self, name):
+        """
+        create embedding parameter
+        """
+        return paddle.attr.Param(
+            name=name,
+            initial_std=0.001,
+            learning_rate=1,
+            l2_rate=0,
+            sparse_update=False)
+
+    def _build_embedding_layer(self):
+        """
+        build embedding layer
+        """
+        self._user_id_emb = paddle.layer.embedding(
+            input=self._user_id,
+            size=64,
+            param_attr=self._create_emb_attr('_proj_user_id'))
+        self._province_emb = paddle.layer.embedding(
+            input=self._province,
+            size=8,
+            param_attr=self._create_emb_attr('_proj_province'))
+        self._city_emb = paddle.layer.embedding(
+            input=self._city,
+            size=16,
+            param_attr=self._create_emb_attr('_proj_city'))
+        self._phone_emb = paddle.layer.embedding(
+            input=self._phone,
+            size=16,
+            param_attr=self._create_emb_attr('_proj_phone'))
+        self._history_clicked_items_emb = paddle.layer.embedding(
+            input=self._history_clicked_items,
+            size=64,
+            param_attr=self._create_emb_attr('_proj_history_clicked_items'))
+        self._history_clicked_categories_emb = paddle.layer.embedding(
+            input=self._history_clicked_categories,
+            size=8,
+            param_attr=self._create_emb_attr(
+                '_proj_history_clicked_categories'))
+        self._history_clicked_tags_emb = paddle.layer.embedding(
+            input=self._history_clicked_tags,
+            size=64,
+            param_attr=self._create_emb_attr('_proj_history_clicked_tags'))
+
+    def _build_dnn_model(self):
+        """
+        build dnn model
+        """
+        self._rnn_cell = paddle.networks.simple_lstm(
+            input=self._history_clicked_items_emb, size=64)
+        self._lstm_last = paddle.layer.pooling(
+            input=self._rnn_cell, pooling_type=paddle.pooling.Max())
+        self._avg_emb_cats = paddle.layer.pooling(
+            input=self._history_clicked_categories_emb,
+            pooling_type=paddle.pooling.Avg())
+        self._avg_emb_tags = paddle.layer.pooling(
+            input=self._history_clicked_tags_emb,
+            pooling_type=paddle.pooling.Avg())
+        self._fc_0 = paddle.layer.fc(
+            name="Relu1",
+            input=[
+                self._lstm_last, self._user_id_emb, self._province_emb,
+                self._city_emb, self._avg_emb_cats, self._avg_emb_tags,
+                self._phone_emb
+            ],
+            size=self._dnn_layer_dims[0],
+            act=paddle.activation.Relu())
+
+        self._fc_1 = paddle.layer.fc(name="Relu2",
+                                     input=self._fc_0,
+                                     size=self._dnn_layer_dims[1],
+                                     act=paddle.activation.Relu())
+
+        if not self._is_infer:
+            return paddle.layer.nce(
+                input=self._fc_1,
+                label=self._target_item,
+                num_classes=len(self._feature_dict['history_clicked_items']),
+                param_attr=paddle.attr.Param(name="nce_w"),
+                bias_attr=paddle.attr.Param(name="nce_b"),
+                num_neg_samples=5,
+                neg_distribution=self._item_freq)
+        else:
+            self.prediction_layer = paddle.layer.mixed(
+                size=len(self._feature_dict['history_clicked_items']),
+                input=paddle.layer.trans_full_matrix_projection(
+                    self._fc_1, param_attr=paddle.attr.Param(name="nce_w")),
+                act=paddle.activation.Softmax(),
+                bias_attr=paddle.attr.Param(name="nce_b"))
+            return self.prediction_layer, self._fc_1
+
+
+if __name__ == "__main__":
+    # this is to test and debug the network topology defination.
+    # please set the hyper-parameters as needed.
+    item_freq_path = "./output/item_freq.pkl"
+    with open(item_freq_path) as f:
+        item_freq = cPickle.load(f)
+
+    feature_dict_path = "./output/feature_dict.pkl"
+    with open(feature_dict_path) as f:
+        feature_dict = cPickle.load(f)
+
+    a = DNNmodel(
+        dnn_layer_dims=[256, 31],
+        feature_dict=feature_dict,
+        item_freq=item_freq,
+        is_infer=False)
--- a/youtube_recall/reader.py
+++ b/youtube_recall/reader.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import sys
+
+from utils import logger
+from utils import TaskMode
+
+
+class Reader(object):
+    """
+    Reader
+    """
+
+    def __init__(self, feature_dict=None, window_size=20):
+        """
+        init
+        @window_size: window_size
+        """
+        self._feature_dict = feature_dict
+        self._window_size = window_size
+
+    def train(self, path):
+        """
+        load train set
+        @path: train set path
+        """
+        logger.info("start train reader from %s" % path)
+        mode = TaskMode.create_train()
+        return self._reader(path, mode)
+
+    def test(self, path):
+        """
+        load test set
+        @path: test set path
+        """
+        logger.info("start test reader from %s" % path)
+        mode = TaskMode.create_test()
+        return self._reader(path, mode)
+
+    def infer(self, path):
+        """
+        load infer set
+        @path: infer set path
+        """
+        logger.info("start infer reader from %s" % path)
+        mode = TaskMode.create_infer()
+        return self._reader(path, mode)
+
+    def infer_user(self, user_list):
+        """
+        load user set to infer
+        @user_list: user list
+        """
+        return self._reader_user(user_list)
+
+    def _reader(self, path, mode):
+        """
+        parse data set
+        """
+        USER_ID_UNK = self._feature_dict['user_id'].get('<unk>')
+        PROVINCE_UNK = self._feature_dict['province'].get('<unk>')
+        CITY_UNK = self._feature_dict['city'].get('<unk>')
+        ITEM_UNK = self._feature_dict['history_clicked_items'].get('<unk>')
+        CATEGORY_UNK = self._feature_dict['history_clicked_categories'].get(
+            '<unk>')
+        TAG_UNK = self._feature_dict['history_clicked_tags'].get('<unk>')
+        PHONE_UNK = self._feature_dict['phone'].get('<unk>')
+        with open(path) as f:
+            for line in f:
+                fields = line.strip('\n').split('\t')
+                user_id = self._feature_dict['user_id'].get(fields[0],
+                                                            USER_ID_UNK)
+                province = self._feature_dict['province'].get(fields[1],
+                                                              PROVINCE_UNK)
+                city = self._feature_dict['city'].get(fields[2], CITY_UNK)
+                item_infos = fields[3]
+                phone = self._feature_dict['phone'].get(fields[4], PHONE_UNK)
+                history_clicked_items_all = []
+                history_clicked_tags_all = []
+                history_clicked_categories_all = []
+                for item_info in item_infos.split(';'):
+                    item_info_array = item_info.split(':')
+                    item = item_info_array[0]
+                    item_encoded_id = self._feature_dict['history_clicked_items'].get(\
+                            item, ITEM_UNK)
+                    if item_encoded_id != ITEM_UNK:
+                        history_clicked_items_all.append(item_encoded_id)
+                        category = item_info_array[1]
+                        history_clicked_categories_all.append(
+                                self._feature_dict['history_clicked_categories'].get(\
+                                        category, CATEGORY_UNK))
+                        tags = item_info_array[2]
+                        tag_split = map(str, [self._feature_dict['history_clicked_tags'].get(\
+                                tag, TAG_UNK) \
+                                for tag in tags.strip().split("_")])
+                        history_clicked_tags_all.append("_".join(tag_split))
+
+                if not mode.is_infer():
+                    history_clicked_items_all.insert(0, 0)
+                    history_clicked_tags_all.insert(0, "0")
+                    history_clicked_categories_all.insert(0, 0)
+
+                    for i in range(1, len(history_clicked_items_all)):
+                        start = max(0, i - self._window_size)
+                        history_clicked_items = history_clicked_items_all[start:
+                                                                          i]
+                        history_clicked_categories = history_clicked_categories_all[
+                            start:i]
+                        history_clicked_tags_str = history_clicked_tags_all[
+                            start:i]
+                        history_clicked_tags = []
+                        for tags_a in history_clicked_tags_str:
+                            for tag in tags_a.split("_"):
+                                history_clicked_tags.append(int(tag))
+                        target_item = history_clicked_items_all[i]
+                        yield user_id, province, city, \
+                              history_clicked_items, history_clicked_categories, \
+                              history_clicked_tags, phone, target_item
+                else:
+                    history_clicked_items = history_clicked_items_all
+                    history_clicked_categories = history_clicked_categories_all
+                    history_clicked_tags_str = history_clicked_tags_all
+                    history_clicked_tags = []
+                    for tags_a in history_clicked_tags_str:
+                        for tag in tags_a.split("_"):
+                            history_clicked_tags.append(int(tag))
+                    yield user_id, province, city, \
+                          history_clicked_items, history_clicked_categories, \
+                          history_clicked_tags, phone
+
+    def _reader_user(self, user_list):
+        """
+        parse user list
+        """
+        USER_ID_UNK = self._feature_dict['user_id'].get('<unk>')
+        for user in user_list:
+            user_id = self._feature_dict['user_id'].get(user, USER_ID_UNK)
+            yield user_id, 0, 0, [0], [0], [0], 0
+
+
+if __name__ == "__main__":
+    # this is to test and debug reader function
+    train_data = sys.argv[1]
+    feature_dict = sys.argv[2]
+    window_size = int(sys.argv[3])
+
+    import cPickle
+    with open(feature_dict) as f:
+        feature_dict = cPickle.load(f)
+
+    r = Reader(feature_dict, window_size)
+
+    for dat in r.train(train_data):
+        print dat
--- a/youtube_recall/train.py
+++ b/youtube_recall/train.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+import gzip
+import paddle.v2 as paddle
+import argparse
+import cPickle
+
+from reader import Reader
+from network_conf import DNNmodel
+from utils import logger
+
+
+def parse_args():
+    """
+    parse arguments
+    """
+    parser = argparse.ArgumentParser(
+        description="PaddlePaddle Youtube Recall Model Example")
+    parser.add_argument(
+        '--train_set_path',
+        type=str,
+        required=True,
+        help="path of the train set")
+    parser.add_argument(
+        '--test_set_path', type=str, required=True, help="path of the test set")
+    parser.add_argument(
+        '--model_output_dir',
+        type=str,
+        required=True,
+        help="directory to output")
+    parser.add_argument(
+        '--feature_dict',
+        type=str,
+        required=True,
+        help="path of feature_dict.pkl")
+    parser.add_argument(
+        '--item_freq', type=str, required=True, help="path of item_freq.pkl ")
+    parser.add_argument(
+        '--window_size', type=int, default=20, help="window size(default: 20)")
+    parser.add_argument(
+        '--num_passes', type=int, default=1, help="number of passes to train")
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=50,
+        help="size of mini-batch (default:50)")
+    return parser.parse_args()
+
+
+def train():
+    """
+    train
+    """
+    args = parse_args()
+
+    # check argument
+    assert os.path.exists(
+        args.train_set_path), 'The train_set_path path does not exist.'
+    assert os.path.exists(
+        args.test_set_path), 'The test_set_path path does not exist.'
+    assert os.path.exists(
+        args.feature_dict), 'The feature_dict path does not exist.'
+    assert os.path.exists(args.item_freq), 'The item_freq path does not exist.'
+    assert os.path.exists(
+        args.model_output_dir), 'The model_output_dir path does not exist.'
+
+    paddle.init(use_gpu=False, trainer_count=1)
+
+    with open(args.feature_dict) as f:
+        feature_dict = cPickle.load(f)
+
+    with open(args.item_freq) as f:
+        item_freq = cPickle.load(f)
+
+    feeding = {
+        'user_id': 0,
+        'province': 1,
+        'city': 2,
+        'history_clicked_items': 3,
+        'history_clicked_categories': 4,
+        'history_clicked_tags': 5,
+        'phone': 6,
+        'target_item': 7
+    }
+    optimizer = paddle.optimizer.AdaGrad(
+        learning_rate=1e-1,
+        regularization=paddle.optimizer.L2Regularization(rate=1e-3))
+
+    cost = DNNmodel(
+        dnn_layer_dims=[256, 31],
+        feature_dict=feature_dict,
+        item_freq=item_freq,
+        is_infer=False).model_cost
+    parameters = paddle.parameters.create(cost)
+
+    trainer = paddle.trainer.SGD(cost, parameters, optimizer)
+
+    def event_handler(event):
+        """
+        event handler
+        """
+        if isinstance(event, paddle.event.EndIteration):
+            if event.batch_id and not event.batch_id % 10:
+                logger.info("Pass %d, Batch %d, Cost %f" %
+                            (event.pass_id, event.batch_id, event.cost))
+        elif isinstance(event, paddle.event.EndPass):
+            save_path = os.path.join(args.model_output_dir,
+                                     "model_pass_%05d.tar.gz" % event.pass_id)
+            logger.info("Save model into %s ..." % save_path)
+            with gzip.open(save_path, "w") as f:
+                trainer.save_parameter_to_tar(f)
+
+    reader = Reader(feature_dict, args.window_size)
+    trainer.train(
+        paddle.batch(
+            paddle.reader.shuffle(
+                lambda: reader.train(args.train_set_path), buf_size=7000),
+            args.batch_size),
+        num_passes=args.num_passes,
+        feeding=feeding,
+        event_handler=event_handler)
+
+
+if __name__ == "__main__":
+    train()
--- a/youtube_recall/user_vector.py
+++ b/youtube_recall/user_vector.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+import gzip
+import paddle.v2 as paddle
+import argparse
+import cPickle
+
+from reader import Reader
+from network_conf import DNNmodel
+from utils import logger
+import numpy as np
+
+
+def parse_args():
+    """
+    parse arguments
+    """
+    parser = argparse.ArgumentParser(
+        description="PaddlePaddle Youtube Recall Model Example")
+    parser.add_argument(
+        '--infer_set_path',
+        type=str,
+        required=True,
+        help="path of the infer set")
+    parser.add_argument(
+        '--model_path', type=str, required=True, help="path of the model")
+    parser.add_argument(
+        '--feature_dict',
+        type=str,
+        required=True,
+        help="path of feature_dict.pkl")
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=50,
+        help="size of mini-batch (default:50)")
+    return parser.parse_args()
+
+
+def user_vector():
+    """
+    get user vectors
+    """
+    args = parse_args()
+
+    # check argument
+    assert os.path.exists(
+        args.infer_set_path), 'The infer_set_path path does not exist.'
+    assert os.path.exists(
+        args.model_path), 'The model_path path does not exist.'
+    assert os.path.exists(
+        args.feature_dict), 'The feature_dict path does not exist.'
+
+    paddle.init(use_gpu=False, trainer_count=1)
+
+    with open(args.feature_dict) as f:
+        feature_dict = cPickle.load(f)
+
+    # load the trained model.
+    with gzip.open(args.model_path) as f:
+        parameters = paddle.parameters.Parameters.from_tar(f)
+
+    # build model
+    prediction_layer, fc = DNNmodel(
+        dnn_layer_dims=[256, 31], feature_dict=feature_dict,
+        is_infer=True).model_cost
+    inferer = paddle.inference.Inference(
+        output_layer=[prediction_layer, fc], parameters=parameters)
+
+    reader = Reader(feature_dict)
+    test_batch = []
+    for idx, item in enumerate(reader.infer(args.infer_set_path)):
+        test_batch.append(item)
+        if len(test_batch) == args.batch_size:
+            get_a_batch_user_vector(inferer, test_batch)
+            test_batch = []
+    if len(test_batch):
+        get_a_batch_user_vector(inferer, test_batch)
+
+
+def get_a_batch_user_vector(inferer, test_batch):
+    """
+    input a batch of data and get user vectors
+    """
+    feeding = {
+        'user_id': 0,
+        'province': 1,
+        'city': 2,
+        'history_clicked_items': 3,
+        'history_clicked_categories': 4,
+        'history_clicked_tags': 5,
+        'phone': 6
+    }
+    probs = inferer.infer(
+        input=test_batch,
+        feeding=feeding,
+        field=["value"],
+        flatten_result=False)
+    for i, res in enumerate(zip(probs[1])):
+        # do simple lsh conversion
+        user_vector = [1.000]
+        for i in res[0]:
+            user_vector.append(i)
+        user_vector.append(0.000)
+        norm = np.linalg.norm(user_vector)
+        user_vector_norm = [str(_ / norm) for _ in user_vector]
+        print ",".join(user_vector_norm)
+
+
+if __name__ == "__main__":
+    user_vector()
--- a/youtube_recall/utils.py
+++ b/youtube_recall/utils.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import logging
+
+logging.basicConfig()
+logger = logging.getLogger("paddle")
+logger.setLevel(logging.INFO)
+
+
+class TaskMode(object):
+    """
+    TaskMode
+    """
+    TRAIN_MODE = 0
+    TEST_MODE = 1
+    INFER_MODE = 2
+
+    def __init__(self, mode):
+        """
+
+        :param mode:
+        """
+        self.mode = mode
+
+    def is_train(self):
+        """
+
+        :return:
+        """
+        return self.mode == self.TRAIN_MODE
+
+    def is_test(self):
+        """
+
+        :return:
+        """
+        return self.mode == self.TEST_MODE
+
+    def is_infer(self):
+        """
+
+        :return:
+        """
+        return self.mode == self.INFER_MODE
+
+    @staticmethod
+    def create_train():
+        """
+
+        :return:
+        """
+        return TaskMode(TaskMode.TRAIN_MODE)
+
+    @staticmethod
+    def create_test():
+        """
+
+        :return:
+        """
+        return TaskMode(TaskMode.TEST_MODE)
+
+    @staticmethod
+    def create_infer():
+        """
+
+        :return:
+        """
+        return TaskMode(TaskMode.INFER_MODE)