remove legacy python code

ef038743 · Tao Luo · 81da8549 · 81da8549 · 81da8549 · 81da8549
214 changed file
--- a/python/paddle/trainer/PyDataProvider2.py
+++ b/python/paddle/trainer/PyDataProvider2.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import cPickle
-import logging
-import collections
-import functools
-import itertools
-
-logging.basicConfig(format="[%(levelname)s %(asctime)s %(filename)s:%(lineno)s]"
-                    " %(message)s")
-
-
-class SequenceType(object):
-    NO_SEQUENCE = 0
-    SEQUENCE = 1
-    SUB_SEQUENCE = 2
-
-    @classmethod
-    def tostring(cls, value):
-        for k in cls.__dict__:
-            if not k.startswith('__'):
-                if getattr(cls, k) == value:
-                    return cls.__name__ + '.' + k
-        return 'INVALID(' + str(value) + ')'
-
-
-# TODO(yuyang18): Add string data type here.
-class DataType(object):
-    Dense = 0
-    SparseNonValue = 1
-    SparseValue = 2
-    Index = 3
-
-    @classmethod
-    def tostring(cls, value):
-        for k in cls.__dict__:
-            if not k.startswith('__'):
-                if getattr(cls, k) == value:
-                    return cls.__name__ + '.' + k
-        return 'INVALID(' + str(value) + ')'
-
-
-class CacheType(object):
-    NO_CACHE = 0  # No cache at all
-
-    # First pass, read data from python.  And store them in memory. Read from
-    # memory during rest passes.
-    CACHE_PASS_IN_MEM = 1
-
-
-class InputType(object):
-    """
-    InputType is the base class for paddle input types.
-
-    ..  note::
-
-        this is a base class, and should never be used by user.
-
-    :param dim: dimension of input. If the input is an integer, it means the
-                value range. Otherwise, it means the size of layer.
-    :type dim: int
-    :param seq_type: sequence type of input. 0 means it is not a sequence. 1
-                     means it is a variable length sequence. 2 means it is a
-                     nested sequence.
-    :type seq_type: int
-    :param type: data type of input.
-    :type type: int
-    """
-    __slots__ = ['dim', 'seq_type', 'type']
-
-    def __init__(self, dim, seq_type, tp):
-        self.dim = dim
-        self.seq_type = seq_type
-        self.type = tp
-
-    def __repr__(self):
-        """
-        Return a human readable representation like 'InputType(dim=25921, 
-            seq_type=SequenceType.NO_SEQUENCE, type=DataType.Dense)'
-        """
-        repr_str = type(self).__name__
-        repr_str += '('
-        serialize_func_map = {
-            'dim': repr,
-            'seq_type': SequenceType.tostring,
-            'type': DataType.tostring
-        }
-        for idx, k in enumerate(self.__slots__):
-            if idx != 0:
-                repr_str += ', '
-            repr_str += (
-                k + '=' + serialize_func_map.get(k, repr)(getattr(self, k)))
-        repr_str += ')'
-        return repr_str
-
-
-def dense_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
-    """
-    Dense Array. It means the input feature is dense array with float type.
-    For example, if the input is an image with 28*28 pixels, the input of
-    Paddle neural network could be a dense vector with dimension 784 or a
-    numpy array with shape (28, 28).
-
-    For the 2-D convolution operation, each sample in one mini-batch must have
-    the similarly size in PaddlePaddle now. But, it supports variable-dimension
-    feature across mini-batch. For the variable-dimension, the param dim is not
-    used. While the data reader must yield numpy array and the data feeder will
-    set the data shape correctly.
-
-    :param dim: dimension of this vector.
-    :type dim: int
-    :param seq_type: sequence type of input.
-    :type seq_type: int
-    :return: An input type object.
-    :rtype: InputType
-    """
-    return InputType(dim, seq_type, DataType.Dense)
-
-
-def sparse_non_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
-    """
-    Sparse binary vector. It means the input feature is a sparse vector and the
-    every element in this vector is either zero or one.
-
-    :param dim: dimension of this vector.
-    :type dim: int
-    :param seq_type: sequence type of this input.
-    :type seq_type: int
-    :return: An input type object.
-    :rtype: InputType
-    """
-    return InputType(dim, seq_type, DataType.SparseNonValue)
-
-
-def sparse_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
-    """
-    Sparse vector. It means the input feature is a sparse vector. Most of the
-    elements in this vector are zero, others could be any float value.
-
-    :param dim: dimension of this vector.
-    :type dim: int
-    :param seq_type: sequence type of this input.
-    :type seq_type: int
-    :return: An input type object.
-    :rtype: InputType
-    """
-    return InputType(dim, seq_type, DataType.SparseValue)
-
-
-def index_slot(value_range, seq_type=SequenceType.NO_SEQUENCE):
-    """
-    Data type of integer.
-
-    :param seq_type: sequence type of this input.
-    :type seq_type: int
-    :param value_range: range of this integer.
-    :type value_range: int
-    :return: An input type object
-    :rtype: InputType
-    """
-    return InputType(value_range, seq_type, DataType.Index)
-
-
-dense_vector = dense_slot
-sparse_binary_vector = sparse_non_value_slot
-sparse_float_vector = sparse_value_slot
-integer_value = index_slot
-
-# dense_array can be used for variable-length input feature.
-# Each feature is not a vector, but a multi-dimensional array.
-dense_array = dense_slot
-
-
-def dense_vector_sequence(dim):
-    """
-    Data type of a sequence of dense vector.
-
-    :param dim: dimension of dense vector.
-    :type dim: int
-    :return: An input type object
-    :rtype: InputType
-    """
-    return dense_vector(dim, seq_type=SequenceType.SEQUENCE)
-
-
-def dense_vector_sub_sequence(dim):
-    return dense_vector(dim, seq_type=SequenceType.SUB_SEQUENCE)
-
-
-def sparse_binary_vector_sequence(dim):
-    """
-    Data type of a sequence of sparse vector, which every element is either zero
-     or one.
-
-    :param dim: dimension of sparse vector.
-    :type dim: int
-    :return: An input type object
-    :rtype: InputType
-    """
-    return sparse_binary_vector(dim, seq_type=SequenceType.SEQUENCE)
-
-
-def sparse_binary_vector_sub_sequence(dim):
-    return sparse_binary_vector(dim, seq_type=SequenceType.SUB_SEQUENCE)
-
-
-def sparse_float_vector_sequence(dim):
-    """
-    Data type of a sequence of sparse vector, which most elements are zero,
-    others could be any float value.
-
-    :param dim: dimension of sparse vector.
-    :type dim: int
-    :return: An input type object
-    :rtype: InputType
-    """
-    return sparse_float_vector(dim, seq_type=SequenceType.SEQUENCE)
-
-
-def sparse_float_vector_sub_sequence(dim):
-    return sparse_float_vector(dim, seq_type=SequenceType.SUB_SEQUENCE)
-
-
-def integer_value_sequence(value_range):
-    """
-    Data type of a sequence of integer.
-
-    :param value_range: range of each element.
-    :type value_range: int
-    """
-    return integer_value(value_range, seq_type=SequenceType.SEQUENCE)
-
-
-def integer_value_sub_sequence(dim):
-    return integer_value(dim, seq_type=SequenceType.SUB_SEQUENCE)
-
-
-integer_sequence = integer_value_sequence
-
-
-class SingleSlotWrapper(object):
-    def __init__(self, generator):
-        self.generator = generator
-
-    def __call__(self, obj, filename):
-        for item in self.generator(obj, filename):
-            if isinstance(item, dict):
-                yield item
-            else:
-                yield [item]
-
-
-class InputOrderWrapper(object):
-    def __init__(self, generator, input_order):
-        self.generator = generator
-        self.input_order = input_order
-
-    def __call__(self, obj, filename):
-        for item in self.generator(obj, filename):
-            if isinstance(item, dict):
-                yield [
-                    item.get(input_name, None)
-                    for input_name in self.input_order
-                ]
-            else:
-                yield item
-
-
-class CheckWrapper(object):
-    def __init__(self, generator, input_types, check_fail_continue, logger):
-        self.generator = generator
-        self.input_types = input_types
-        self.check_fail_continue = check_fail_continue
-        self.logger = logger
-
-    def __call__(self, obj, filename):
-        for items in self.generator(obj, filename):
-            try:
-                assert len(items) == len(self.input_types)
-                assert len(filter(lambda x: x is None, items)) == 0
-                for item, input_type in itertools.izip(items, self.input_types):
-                    callback = functools.partial(CheckWrapper.loop_callback,
-                                                 input_type)
-
-                    for _ in xrange(input_type.seq_type):
-                        callback = functools.partial(CheckWrapper.loop_check,
-                                                     callback)
-                    callback(item)
-
-                yield items
-            except AssertionError as e:
-                self.logger.warning(
-                    "Item (%s) is not fit the input type with error %s" %
-                    (repr(item), repr(e)))
-
-                if self.check_fail_continue:
-                    continue
-                else:
-                    raise
-
-    @staticmethod
-    def loop_callback(input_type, each):
-        assert isinstance(input_type, InputType)
-        if input_type.type == DataType.Dense:
-            assert isinstance(each, collections.Sequence)
-            for d in each:
-                assert isinstance(d, float)
-            assert len(each) == input_type.dim
-        elif input_type.type == DataType.Index:
-            assert isinstance(each, int)
-            assert each < input_type.dim
-        elif input_type.type == DataType.SparseNonValue \
-                or input_type.type == DataType.SparseValue:
-            assert isinstance(each, collections.Sequence)
-            sparse_id = set()
-            for k in each:
-                if input_type.type == DataType.SparseValue:
-                    k, v = k
-                    assert isinstance(v, float)
-                assert isinstance(k, int)
-                assert k < input_type.dim
-                sparse_id.add(k)
-            assert len(sparse_id) == len(each)
-        else:
-            raise RuntimeError("Not support input type")
-
-    @staticmethod
-    def loop_check(callback, item):
-        for each in item:
-            callback(each)
-
-
-class CheckInputTypeWrapper(object):
-    def __init__(self, generator, input_types, logger):
-        self.generator = generator
-        self.input_types = input_types
-        self.logger = logger
-
-    def __call__(self, obj, filename):
-        for items in self.generator(obj, filename):
-            try:
-                # dict type is required for input_types when item is dict type
-                assert (isinstance(items, dict) and \
-                        not isinstance(self.input_types, dict))==False
-                yield items
-            except AssertionError as e:
-                self.logger.error(
-                    "%s type is required for input type but got %s" %
-                    (repr(type(items)), repr(type(self.input_types))))
-                raise
-
-
-def provider(input_types=None,
-             should_shuffle=None,
-             pool_size=-1,
-             min_pool_size=-1,
-             can_over_batch_size=True,
-             calc_batch_size=None,
-             cache=CacheType.NO_CACHE,
-             check=False,
-             check_fail_continue=False,
-             init_hook=None,
-             **outter_kwargs):
-    """
-    Provider decorator. Use it to make a function into PyDataProvider2 object.
-    In this function, user only need to get each sample for some train/test
-    file.
-
-    The basic usage is:
-
-    ..  code-block:: python
-
-        @provider(some data provider config here...)
-        def process(settings, file_name):
-            while not at end of file_name:
-                sample = readOneSampleFromFile(file_name)
-                yield sample.
-
-    The configuration of data provider should be setup by\:
-
-    :param input_types: Specify the input types, can also be set in init_hook.
-                        It could be a list of InputType object. For example,
-                        input_types=[dense_vector(9), integer_value(2)]. Or user
-                        can set a dict of InputType object, which key is
-                        data_layer's name. For example, input_types=\
-                        {'img': img_features, 'label': label}. when using dict of
-                        InputType, user could yield a dict of feature values, which
-                        key is also data_layer's name.
-
-    :type input_types: list|tuple|dict
-
-    :param should_shuffle: True if data should shuffle. Pass None means shuffle
-                           when is training and not to shuffle when is testing.
-    :type should_shuffle: bool
-
-    :param pool_size: Max number of sample in data pool.
-    :type pool_size: int
-
-    :param min_pool_size: Set minimal sample in data pool. The PaddlePaddle will
-                          random pick sample in pool. So the min_pool_size
-                          effect the randomize of data.
-    :type min_pool_size: int
-
-    :param can_over_batch_size: True if paddle can return a mini-batch larger
-                                than batch size in settings. It is useful when
-                                custom calculate one sample's batch_size.
-
-                                It is very danger to set it to false and use
-                                calc_batch_size together. Default is true.
-    :type can_over_batch_size: bool
-
-    :param calc_batch_size: a method to calculate each sample's batch size.
-                            Default each sample's batch size is 1. But to you
-                            can customize each sample's batch size.
-    :type calc_batch_size: callable
-
-    :param cache: Cache strategy of Data Provider. Default is CacheType.NO_CACHE
-    :type cache: int
-
-    :param init_hook: Initialize hook. Useful when data provider need load some
-                      external data like dictionary. The parameter is
-                      (settings, file_list, \*\*kwargs).
-
-                      - settings. It is the global settings object. User can set
-                        settings.input_types here.
-                      - file_list. All file names for passed to data provider.
-                      - is_train. Is this data provider used for training or not.
-                      - kwargs. Other keyword arguments passed from
-                        trainer_config's args parameter.
-    :type init_hook: callable
-
-    :param check: Check the yield data format is as same as input_types. Enable
-                  this will make data provide process slow but it is very useful
-                  for debug. Default is disabled.
-    :type check: bool
-
-    :param check_fail_continue: Continue train or not when check failed. Just
-                                drop the wrong format data when it is True. Has
-                                no effect when check set to False.
-    :type check_fail_continue: bool
-    """
-
-    def __wrapper__(generator):
-        class DataProvider(object):
-            def __init__(self, file_list, **kwargs):
-                self.logger = logging.getLogger("")
-                self.logger.setLevel(logging.INFO)
-                self.input_types = None
-                self.should_shuffle = should_shuffle
-
-                true_table = [1, 't', 'true', 'on']
-                false_table = [0, 'f', 'false', 'off']
-                if not isinstance(self.should_shuffle, bool) and \
-                                self.should_shuffle is not None:
-
-                    if isinstance(self.should_shuffle, basestring):
-                        self.should_shuffle = self.should_shuffle.lower()
-
-                    if self.should_shuffle in true_table:
-                        self.should_shuffle = True
-                    elif self.should_shuffle in false_table:
-                        self.should_shuffle = False
-                    else:
-                        self.logger.warning(
-                            "Could not recognize should_shuffle (%s), "
-                            "just use default value of should_shuffle."
-                            " Please set should_shuffle to bool value or "
-                            "something in %s" %
-                            (repr(self.should_shuffle),
-                             repr(true_table + false_table)))
-                        self.should_shuffle = None
-
-                self.pool_size = pool_size
-                self.can_over_batch_size = can_over_batch_size
-                self.calc_batch_size = calc_batch_size
-                self.file_list = file_list
-                self.generator = generator
-                self.cache = cache
-                self.min_pool_size = min_pool_size
-                self.input_order = kwargs['input_order']
-                self.check = check
-                if init_hook is not None:
-                    init_hook(self, file_list=file_list, **kwargs)
-
-                if 'slots' in outter_kwargs:
-                    self.logger.warning('setting slots value is deprecated, '
-                                        'please use input_types instead.')
-                    self.slots = outter_kwargs['slots']
-                if input_types is not None:
-                    self.slots = input_types
-
-                if self.input_types is not None:
-                    self.slots = self.input_types
-
-                assert self.slots is not None, \
-                    "Data Provider's input_types must be set"
-                assert self.generator is not None
-
-                use_dynamic_order = False
-                if isinstance(self.slots, dict):  # reorder input_types
-                    self.slots = [self.slots[ipt] for ipt in self.input_order]
-                    use_dynamic_order = True
-
-                if len(self.slots) == 1:
-                    self.generator = SingleSlotWrapper(self.generator)
-
-                if use_dynamic_order:
-                    self.generator = InputOrderWrapper(self.generator,
-                                                       self.input_order)
-                else:
-                    self.generator = CheckInputTypeWrapper(
-                        self.generator, self.slots, self.logger)
-                if self.check:
-                    self.generator = CheckWrapper(self.generator, self.slots,
-                                                  check_fail_continue,
-                                                  self.logger)
-
-        return DataProvider
-
-    return __wrapper__
-
-
-def deserialize_args(args):
-    """
-    Internal use only.
-    :param args:
-    :return:
-    """
-    return cPickle.loads(args)
--- a/python/paddle/trainer/PyDataProviderWrapper.py
+++ b/python/paddle/trainer/PyDataProviderWrapper.py
--- a/python/paddle/trainer/__init__.py
+++ b/python/paddle/trainer/__init__.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
--- a/python/paddle/trainer/config_parser_extension.py
+++ b/python/paddle/trainer/config_parser_extension.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.proto.DataConfig_pb2 import DataConfig
-
-g_config = None
-
-
-def SimpleData(files=None,
-               feat_dim=None,
-               context_len=None,
-               buffer_capacity=None):
-
-    data_config = DataConfig()
-    data_config.type = 'simple'
-    data_config.files = files
-    data_config.feat_dim = feat_dim
-    if context_len is not None:
-        data_config.context_len = context_len
-    if buffer_capacity:
-        data_config.buffer_capacity = buffer_capacity
-    return data_config
-
-
-def get_config_funcs(trainer_config):
-    global g_config
-    g_config = trainer_config
-    return dict(SimpleData=SimpleData)
--- a/python/paddle/trainer/recurrent_units.py
+++ b/python/paddle/trainer/recurrent_units.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# recurrent_units.py
-# Version 2.0
-#
-# Some recurrent units can be used in recurrent layer group,
-#   to use these units, import this module in your config_file:
-#     import trainer.recurrent_units
-#
-# The modules in this file are DEPRECATED.
-# If you would like to use lstm/gru
-# please use the functions defined in paddle.trainer_config_helpers.
-
-from paddle.trainer.config_parser import *
-
-
-# long short term memory, can be used in recurrent machine
-# *inputs* must be a list of Projections, for example:
-#   inputs = [FullMatrixProjection("input_layer_name")],
-# *para_prefix* defines parameter names, if the *para_prefix* of
-#   two LstmRecurrentUnit is same, they share same parameters
-# *out_memory* can be defined outside if it's used outside
-def LstmRecurrentUnit(name,
-                      size,
-                      active_type,
-                      state_active_type,
-                      gate_active_type,
-                      inputs,
-                      para_prefix=None,
-                      error_clipping_threshold=0,
-                      out_memory=None):
-
-    if para_prefix is None:
-        para_prefix = name
-    if out_memory is None:
-        out_memory = Memory(name=name, size=size)
-
-    state_memory = Memory(name=name + "_" + "state", size=size)
-
-    Layer(
-        name=name + "_" + "input_recurrent",
-        type="mixed",
-        size=size * 4,  #(input_s, input_gate, forget_gate, output_gate)
-        error_clipping_threshold=error_clipping_threshold,
-        bias=Bias(
-            initial_std=0, parameter_name=para_prefix + "_input_recurrent.b"),
-        inputs=inputs + [
-            FullMatrixProjection(
-                out_memory, parameter_name=para_prefix + "_input_recurrent.w"),
-        ], )
-    LstmStepLayer(
-        name=name,
-        size=size,
-        bias=Bias(parameter_name=para_prefix + "_check.b"),
-        inputs=[name + "_" + "input_recurrent", state_memory],
-        active_type=active_type,
-        active_gate_type=gate_active_type,
-        active_state_type=state_active_type, )
-    GetOutputLayer(
-        name=name + "_" + "state",
-        size=size,
-        inputs=Input(
-            name, input_layer_argument="state"), )
-
-
-def LstmRecurrentUnitNaive(name,
-                           size,
-                           active_type,
-                           state_active_type,
-                           gate_active_type,
-                           inputs,
-                           para_prefix=None,
-                           error_clipping_threshold=0,
-                           out_memory=None):
-
-    if para_prefix is None:
-        para_prefix = name
-    if out_memory is None:
-        out_memory = Memory(name=name, size=size)
-
-    state_memory = Memory(name=name + "_" + "state", size=size)
-
-    Layer(
-        name=name + "_" + "input_recurrent",
-        type="mixed",
-        size=size * 4,  #(input_s, input_gate, forget_gate, output_gate)
-        error_clipping_threshold=error_clipping_threshold,
-        bias=Bias(
-            initial_std=0, parameter_name=para_prefix + "_input_recurrent.b"),
-        inputs=inputs + [
-            FullMatrixProjection(
-                out_memory, parameter_name=para_prefix + "_input_recurrent.w"),
-        ], )
-    ExpressionLayer(
-        name=name + "_" + "input_s",
-        size=size,
-        active_type=active_type,
-        inputs=[
-            IdentityOffsetProjection(
-                name + "_" + "input_recurrent", offset=0)
-        ], )
-    ExpressionLayer(
-        name=name + "_" + "input_gate",
-        active_type=gate_active_type,
-        inputs=[
-            IdentityOffsetProjection(
-                name + "_" + "input_recurrent", offset=size), DotMulProjection(
-                    state_memory, parameter_name=para_prefix + "_input_check.w")
-        ], )
-    ExpressionLayer(
-        name=name + "_" + "forget_gate",
-        active_type=gate_active_type,
-        inputs=[
-            IdentityOffsetProjection(
-                name + "_" + "input_recurrent", offset=size * 2),
-            DotMulProjection(
-                state_memory, parameter_name=para_prefix + "_forget_check.w")
-        ], )
-    ExpressionLayer(
-        name=name + "_" + "state",
-        inputs=[
-            DotMulOperator([name + "_" + "input_s", name + "_" + "input_gate"]),
-            DotMulOperator([state_memory, name + "_" + "forget_gate"]),
-        ], )
-    ExpressionLayer(
-        name=name + "_" + "output_gate",
-        active_type=gate_active_type,
-        inputs=[
-            IdentityOffsetProjection(
-                name + "_" + "input_recurrent", offset=size * 3),
-            DotMulProjection(
-                name + "_" + "state",
-                parameter_name=para_prefix + "_output_check.w")
-        ], )
-    ExpressionLayer(
-        name=name + "_" + "state_atv",
-        active_type=state_active_type,
-        inputs=IdentityProjection(name + "_" + "state"), )
-    ExpressionLayer(
-        name=name,
-        inputs=DotMulOperator(
-            [name + "_" + "state_atv", name + "_" + "output_gate"]), )
-
-
-# like LstmRecurrentUnit, but it's a layer group.
-# it is equivalent to LstmLayer
-def LstmRecurrentLayerGroup(name,
-                            size,
-                            active_type,
-                            state_active_type,
-                            gate_active_type,
-                            inputs,
-                            para_prefix=None,
-                            error_clipping_threshold=0,
-                            seq_reversed=False):
-
-    input_layer_name = name + "_" + "transform_input"
-    Layer(
-        name=input_layer_name,
-        type="mixed",
-        size=size * 4,
-        active_type="",
-        bias=False,
-        inputs=inputs, )
-
-    RecurrentLayerGroupBegin(
-        name + "_layer_group",
-        in_links=[input_layer_name],
-        out_links=[name],
-        seq_reversed=seq_reversed)
-
-    LstmRecurrentUnit(
-        name=name,
-        size=size,
-        active_type=active_type,
-        state_active_type=state_active_type,
-        gate_active_type=gate_active_type,
-        inputs=[IdentityProjection(input_layer_name)],
-        para_prefix=para_prefix,
-        error_clipping_threshold=error_clipping_threshold, )
-
-    RecurrentLayerGroupEnd(name + "_layer_group")
-
-
-# gated recurrent unit, can be used in recurrent machine
-# *inputs* should be a list of Projections, for example:
-#   inputs = [FullMatrixProjection("input_layer_name")],
-# *para_prefix* defines parameter names, if the *para_prefix* of
-#   two GatedRecurrentUnit is same, they share same parameters
-# *out_memory* can be defined outside if it's used outside
-
-
-def GatedRecurrentUnit(name,
-                       size,
-                       active_type,
-                       gate_active_type,
-                       inputs,
-                       para_prefix=None,
-                       error_clipping_threshold=0,
-                       out_memory=None):
-    if type_of(inputs) == str:  #only used by GatedRecurrentLayerGroup
-        input_layer_name = inputs
-    else:
-        input_layer_name = name + "_" + "transform_input"
-        Layer(
-            name=input_layer_name,
-            type="mixed",
-            size=size * 3,
-            active_type="",
-            bias=False,
-            inputs=inputs, )
-
-    if para_prefix is None:
-        para_prefix = name
-    if out_memory is None:
-        out_memory = Memory(name=name, size=size)
-
-    GruStepLayer(
-        name=name,
-        size=size,
-        bias=Bias(parameter_name=para_prefix + "_gate.b"),
-        inputs=[
-            input_layer_name, Input(
-                out_memory, parameter_name=para_prefix + "_gate.w")
-        ],
-        active_type=active_type,
-        active_gate_type=gate_active_type, )
-
-
-def GatedRecurrentUnitNaive(name,
-                            size,
-                            active_type,
-                            gate_active_type,
-                            inputs,
-                            para_prefix=None,
-                            error_clipping_threshold=0,
-                            out_memory=None):
-
-    if type_of(inputs) == str:  #only used by GatedRecurrentLayerGroup
-        input_layer_name = inputs
-    else:
-        input_layer_name = name + "_" + "transform_input"
-        Layer(
-            name=input_layer_name,
-            type="mixed",
-            size=size * 3,
-            active_type="",
-            bias=False,
-            inputs=inputs, )
-
-    if para_prefix is None:
-        para_prefix = name
-    if out_memory is None:
-        out_memory = Memory(name=name, size=size)
-
-    Layer(
-        name=name + "_" + "update_gate",
-        type="mixed",
-        size=size,
-        active_type=gate_active_type,
-        error_clipping_threshold=error_clipping_threshold,
-        bias=Bias(
-            initial_std=0, parameter_name=para_prefix + "_update_gate.b"),
-        inputs=[
-            IdentityOffsetProjection(
-                input_layer_name, offset=0), FullMatrixProjection(
-                    out_memory, parameter_name=para_prefix + "_update_gate.w")
-        ], )
-    Layer(
-        name=name + "_" + "reset_gate",
-        type="mixed",
-        size=size,
-        active_type=gate_active_type,
-        error_clipping_threshold=error_clipping_threshold,
-        bias=Bias(
-            initial_std=0, parameter_name=para_prefix + "_reset_gate.b"),
-        inputs=[
-            IdentityOffsetProjection(
-                input_layer_name, offset=size), FullMatrixProjection(
-                    out_memory, parameter_name=para_prefix + "_reset_gate.w")
-        ], )
-    ExpressionLayer(
-        name=name + "_" + "reset_output",
-        inputs=DotMulOperator([out_memory, name + "_" + "reset_gate"]), )
-    Layer(
-        name=name + "_" + "output_candidate",
-        type="mixed",
-        size=size,
-        active_type=active_type,
-        error_clipping_threshold=error_clipping_threshold,
-        bias=Bias(
-            initial_std=0, parameter_name=para_prefix + "_output_candidate.b"),
-        inputs=[
-            IdentityOffsetProjection(
-                input_layer_name, offset=size * 2), FullMatrixProjection(
-                    name + "_" + "reset_output",
-                    parameter_name=para_prefix + "_output_candidate.w")
-        ], )
-    ExpressionLayer(  #element-wise interpolation
-        name=name,
-        inputs=[
-            IdentityProjection(out_memory),
-            DotMulOperator(
-                [out_memory, name + "_" + "update_gate"], scale=-1.0),
-            DotMulOperator(
-                [name + "_" + "output_candidate", name + "_" + "update_gate"]),
-        ], )
-
-
-# like GatedRecurrentUnit, but it's a layer group.
-# it is equivalent to GatedRecurrentLayer.
-def GatedRecurrentLayerGroup(name,
-                             size,
-                             active_type,
-                             gate_active_type,
-                             inputs,
-                             para_prefix=None,
-                             error_clipping_threshold=0,
-                             seq_reversed=False):
-
-    input_layer_name = name + "_" + "transform_input"
-    Layer(
-        name=input_layer_name,
-        type="mixed",
-        size=size * 3,
-        active_type="",
-        bias=False,
-        inputs=inputs, )
-
-    RecurrentLayerGroupBegin(
-        name + "_layer_group",
-        in_links=[input_layer_name],
-        out_links=[name],
-        seq_reversed=seq_reversed)
-
-    GatedRecurrentUnit(
-        name=name,
-        size=size,
-        active_type=active_type,
-        gate_active_type=gate_active_type,
-        inputs=input_layer_name,  #transform outside
-        para_prefix=para_prefix,
-        error_clipping_threshold=error_clipping_threshold, )
-
-    RecurrentLayerGroupEnd(name + "_layer_group")
--- a/python/paddle/trainer_config_helpers/__init__.py
+++ b/python/paddle/trainer_config_helpers/__init__.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from activations import *
-from data_sources import *
-from poolings import *
-from evaluators import *
-from layers import *
-from networks import *
-from optimizers import *
-from attrs import *
-from config_parser_utils import *
-# This will enable operator overload for LayerOutput
-import layer_math
--- a/python/paddle/trainer_config_helpers/activations.py
+++ b/python/paddle/trainer_config_helpers/activations.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-__all__ = [
-    "TanhActivation", "SigmoidActivation", "SoftmaxActivation",
-    "IdentityActivation", "LinearActivation", 'SequenceSoftmaxActivation',
-    'ExpActivation', "ReluActivation", "BReluActivation", "SoftReluActivation",
-    "STanhActivation", "AbsActivation", "SquareActivation", "BaseActivation",
-    "LogActivation", "SqrtActivation", "ReciprocalActivation",
-    "SoftSignActivation"
-]
-
-
-class BaseActivation(object):
-    """
-    A mark for activation class.
-    Each activation inherit BaseActivation, which has two parameters.
-
-    :param name: activation name in paddle config.
-    :type name: basestring
-    :param support_hppl: True if supported by hppl. HPPL is a library used by paddle
-                         internally. Currently, lstm layer can only use activations
-                         supported by hppl.
-    :type support_hppl: bool
-    """
-
-    def __init__(self, name, support_hppl):
-        self.name = name
-        self.support_hppl = support_hppl
-
-    def __repr__(self):
-        return self.name
-
-
-class TanhActivation(BaseActivation):
-    """
-    Tanh activation.
-
-    .. math::
-
-       f(z)=tanh(z)=\\frac{e^z-e^{-z}}{e^z+e^{-z}}
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'tanh', True)
-
-
-class SigmoidActivation(BaseActivation):
-    """
-    Sigmoid activation.
-
-    .. math::
-
-       f(z) = \\frac{1}{1+exp(-z)}
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'sigmoid', True)
-
-
-class SoftmaxActivation(BaseActivation):
-    """
-    Softmax activation for simple input
-
-
-
-    .. math::
-
-       P(y=j|x) = \\frac{e^{x_j}} {\\sum^K_{k=1} e^{x_k} }
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'softmax', False)
-
-
-class SequenceSoftmaxActivation(BaseActivation):
-    """
-    Softmax activation for one sequence. The dimension of input feature must be
-    1 and a sequence.
-
-    ..  code:: python
-
-        result = softmax(for each_feature_vector[0] in input_feature)
-        for i, each_time_step_output in enumerate(output):
-            each_time_step_output = result[i]
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'sequence_softmax', False)
-
-
-class IdentityActivation(BaseActivation):
-    """
-    Identity Activation.
-
-    Just do nothing for output both forward/backward.
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, '', False)
-
-
-LinearActivation = IdentityActivation
-
-
-class ReluActivation(BaseActivation):
-    """
-    Relu activation.
-
-    forward. :math:`y = max(0, z)`
-
-    derivative:
-
-    .. math::
-
-       1  &\\quad if z > 0 \\\\
-       0  &\\quad\\mathrm{otherwize}
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'relu', True)
-
-
-class BReluActivation(BaseActivation):
-    """
-    BRelu Activation.
-
-    forward.  :math:`y = min(24, max(0, z))`
-
-    derivative:
-
-    .. math::
-
-       1  &\\quad if 0 < z < 24 \\\\
-       0  &\\quad \\mathrm{otherwise}
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'brelu', False)
-
-
-class SoftReluActivation(BaseActivation):
-    """
-    SoftRelu Activation.
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'softrelu', False)
-
-
-class STanhActivation(BaseActivation):
-    """
-    Scaled Tanh Activation.
-
-    .. math::
-
-       f(z) = 1.7159 * tanh(2/3*z)
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'stanh', False)
-
-
-class AbsActivation(BaseActivation):
-    """
-    Abs Activation.
-
-    Forward:    :math:`f(z) = abs(z)`
-
-    Derivative:
-
-    .. math::
-
-       1 &\\quad if \\quad z > 0 \\\\
-       -1 &\\quad if \\quad z < 0 \\\\
-       0 &\\quad if \\quad z = 0
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'abs', False)
-
-
-class SquareActivation(BaseActivation):
-    """
-    Square Activation.
-
-    .. math::
-       f(z) = z^2.
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'square', False)
-
-
-class ExpActivation(BaseActivation):
-    """
-    Exponential Activation.
-
-    .. math::
-       f(z) = e^z.
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'exponential', False)
-
-
-class LogActivation(BaseActivation):
-    """
-    Logarithm Activation.
-
-    .. math::
-       f(z) = log(z)
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'log', False)
-
-
-class SqrtActivation(BaseActivation):
-    """
-    Square Root Activation.
-
-    .. math::
-       f(z) = sqrt(z)
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'sqrt', False)
-
-
-class ReciprocalActivation(BaseActivation):
-    """
-    Reciprocal Activation.
-
-    .. math::
-       f(z)=\\frac{1}{z}
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'reciprocal', False)
-
-
-class SoftSignActivation(BaseActivation):
-    """
-    SoftSign Activation.
-
-    .. math::
-       f(z)=\\frac{z}{1 + |z|}
-    """
-
-    def __init__(self):
-        BaseActivation.__init__(self, 'softsign', False)
--- a/python/paddle/trainer_config_helpers/attrs.py
+++ b/python/paddle/trainer_config_helpers/attrs.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer.config_parser import *
-__all__ = [
-    'HookAttr', 'ParamAttr', 'ExtraAttr', 'ParameterAttribute',
-    'ExtraLayerAttribute'
-]
-
-
-def convert_and_compare(x, Type):
-    """
-    Convert x to be the same type as Type and then convert back to
-    check whether there is a loss of information
-    :param x: object to be checked
-    :param Type: target type to check x over
-
-    """
-    return type(x)(Type(x)) == x
-
-
-def is_compatible_with(x, Type):
-    """
-    Check if x has a type compatible with Type
-    :param x: object to be checked
-    :param Type: target type to check x over
-
-    """
-    if type(x) == Type:
-        return True
-    try:
-        if float == Type or int == Type:
-            # avoid those types that can be converted to float/int but not very
-            # meaningful and  could potentially lead to error
-            # i.e., str and bool typed value should not be used for initializing float/int variable
-            if not isinstance(x, str) and not isinstance(x, bool):
-                return convert_and_compare(x, Type)
-        elif bool == Type:
-            # should not use string type to initialize bool variable
-            if not isinstance(x, str):
-                return convert_and_compare(x, Type)
-        else:
-            return False
-    except:
-        return False
-
-
-class HookAttribute(object):
-    """
-    Hook Attribute object. As a member of ParameterAttribute class, the hook is an auxiliary operation that occurs
-    during training process of a layer with parameters, such as img_conv layer, fc layer.
-
-    :param  type: Hook type, currently supported types:
-                        'pruning' :  user specify a sparsity_ratio before training started, and the
-                            network will prune the parameters based on the sparsity_ratio.
-                            eg: The definition of Hook object can be hk = HookAttribute('pruning', 0.6)
-                            The specific usage can be paddle.layer.img_conv(input=img, filter_size=3,
-                                                                       num_channels=3, num_filters=64,
-                                                                       param_attr=ParameterAttribute(update_hooks=hk) )
-                            The pruning details can be found https://arxiv.org/pdf/1506.02626.pdf
-    :type type: string
-
-    :param sparsity_ratio: Must be specified if hook type is 'pruning',
-                        it represents the ratio of the zero elements to be set by the Parameter.
-    :type sparsity_ratio: float or None
-
-    """
-
-    def __init__(self, type, sparsity_ratio=None):
-        self.type = type
-        self.sparsity_ratio = sparsity_ratio
-        if self.sparsity_ratio is not None:
-            assert is_compatible_with(
-                self.sparsity_ratio,
-                float), 'sparisity_ratio must be float type'
-            assert self.sparsity_ratio <= 1 and self.sparsity_ratio >= 0, 'sparsity_ratio must be a float between [0, 1] '
-
-    def __call__(self):
-        return ParameterHook(self.type, sparsity_ratio=self.sparsity_ratio)
-
-
-class ParameterAttribute(object):
-    """
-    Parameter Attributes object. To fine-tuning network training process, user
-    can set attribute to control training details, such as l1,l2 rate / learning
-    rate / how to init param.
-
-    NOTE: IT IS A HIGH LEVEL USER INTERFACE.
-
-    :param is_static: True if this parameter will be fixed while training.
-    :type is_static: bool
-
-    :param initial_std: Gauss Random initialization standard deviation.
-                        None if not using Gauss Random initialize parameter.
-    :type initial_std: float or None
-    :param initial_mean:  Gauss Random initialization mean.
-                         None if not using Gauss Random initialize parameter.
-    :type initial_mean: float or None
-    :param initial_max: Uniform initialization max value.
-    :type initial_max: float or None
-    :param initial_min: Uniform initialization min value.
-    :type initial_min: float or None
-    :param l1_rate: the l1 regularization factor
-    :type l1_rate: float or None
-    :param l2_rate: the l2 regularization factor
-    :type l2_rate: float or None
-    :param learning_rate: The parameter learning rate. None means 1.
-                          The learning rate when optimize is LEARNING_RATE =
-                          GLOBAL_LEARNING_RATE * PARAMETER_LEARNING_RATE
-                          * SCHEDULER_FACTOR.
-
-    :type learning_rate: float or None
-    :param momentum: The parameter momentum. None means use global value.
-    :type momentum: float or None
-    :param gradient_clipping_threshold: gradient clipping threshold. If gradient
-                                        value larger than some value, will be
-                                        clipped.
-    :type gradient_clipping_threshold: float
-    :param sparse_update: Enable sparse update for this parameter. It will
-                          enable both local and remote sparse update.
-    :type sparse_update: bool
-    :param update_hooks: A HookAttribute object.
-    :type update_hooks: HookAttribute
-    :param initializer: If not None, it should be a callable object which accepts
-                        a parameter name and returns numpy array for the initial
-                        value of the parameter
-    :type initializer: callable object
-    """
-
-    def __init__(self,
-                 name=None,
-                 is_static=False,
-                 initial_std=None,
-                 initial_mean=None,
-                 initial_max=None,
-                 initial_min=None,
-                 l1_rate=None,
-                 l2_rate=None,
-                 learning_rate=None,
-                 momentum=None,
-                 gradient_clipping_threshold=None,
-                 sparse_update=False,
-                 update_hooks=None,
-                 initializer=None):
-        self.attr = {}
-
-        if is_static:
-            self.attr['is_static'] = True
-
-        if initial_std is None and initial_mean is None and initial_max \
-                is None and initial_min is None:
-            self.attr['initial_smart'] = True
-        elif is_compatible_with(initial_std, float) or \
-             is_compatible_with(initial_mean, float):
-            if initial_std is not None:
-                self.attr['initial_std'] = initial_std
-            if initial_mean is not None:
-                self.attr['initial_mean'] = initial_mean
-            self.attr['initial_strategy'] = 0  # Gauss Random
-        elif is_compatible_with(initial_max, float) and \
-             is_compatible_with(initial_min, float):
-            initial_max = initial_max
-            initial_min = initial_min
-            assert initial_min < initial_max
-            initial_mean = (initial_max + initial_min) / 2
-            initial_std = initial_mean - initial_min
-            self.attr['initial_mean'] = initial_mean
-            self.attr['initial_std'] = initial_std
-            self.attr['initial_strategy'] = 1  # Uniform Random
-        else:
-            raise RuntimeError("Unexpected branch.")
-
-        if not is_static and is_compatible_with(l1_rate, float):
-            self.attr['decay_rate_l1'] = l1_rate
-
-        if not is_static and is_compatible_with(l2_rate, float):
-            self.attr['decay_rate'] = l2_rate
-
-        if not is_static and is_compatible_with(learning_rate, float):
-            self.attr['learning_rate'] = learning_rate
-
-        if not is_static and is_compatible_with(momentum, float):
-            self.attr['momentum'] = momentum
-
-        if name is not None:
-            self.attr['parameter_name'] = name
-
-        if sparse_update:
-            self.attr['sparse_update'] = True
-            self.attr['sparse_remote_update'] = True
-
-        if gradient_clipping_threshold is not None and \
-                is_compatible_with(gradient_clipping_threshold, float):
-            self.attr['gradient_clipping_threshold'] = \
-                gradient_clipping_threshold
-        if initializer is not None:
-            self.attr['initializer'] = initializer
-
-        if update_hooks:
-            self.attr['update_hooks'] = update_hooks
-
-    def set_default_parameter_name(self, name):
-        """
-        Set default parameter name. If parameter not set, then will use default
-        parameter name.
-
-
-        :param name: default parameter name.
-        :type name: basestring
-        """
-        if 'parameter_name' not in self.attr:
-            self.attr['parameter_name'] = name
-
-    @staticmethod
-    def to_bias(bias_attr):
-        if isinstance(bias_attr, ParameterAttribute):
-            return Bias(**bias_attr.attr)
-        else:
-            return False
-
-
-class ExtraLayerAttribute(object):
-    """
-    Some high level layer attributes config. You can set all attributes here,
-    but some layer doesn't support all attributes. If you set an attribute to a
-    layer that not support this attribute, paddle will print an error and core.
-
-    :param error_clipping_threshold: Error clipping threshold.
-    :type error_clipping_threshold: float
-    :param drop_rate: Dropout rate. Dropout will create a mask on layer output.
-                      The dropout rate is the zero rate of this mask. The
-                      details of what dropout is please refer to `JMLRdropout
-                      <https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf
-                      >`_.
-    :type drop_rate: float
-    :param device: device ID of layer. device=-1, use CPU. device>=0, use GPU.
-                   The details allocation in parallel_nn please refer to `use_case
-                   <https://github.com/PaddlePaddle/Paddle/blob/develop/doc/v2
-                   /howto/cmd_parameter/use_case_en.md#case-2-specify-layers-in
-                   -different-devices>`_.
-    :type device: int
-    """
-
-    def __init__(self,
-                 error_clipping_threshold=None,
-                 drop_rate=None,
-                 device=None):
-        self.attr = dict()
-        if error_clipping_threshold is not None:
-            error_clipping_threshold = float(error_clipping_threshold)
-            if error_clipping_threshold < 0:
-                raise ValueError("Error clipping must > 0")
-            self.attr['error_clipping_threshold'] = error_clipping_threshold
-        if drop_rate is not None:
-            drop_rate = float(drop_rate)
-            if drop_rate < 0:
-                raise ValueError("Dropout rate must > 0")
-            self.attr["drop_rate"] = drop_rate
-
-        if isinstance(device, int):
-            self.attr["device"] = device
-
-    def check(self, layer_name):
-        for key in self.attr:
-            if not hasattr(self, 'can_%s' % key) or \
-                    not getattr(self, 'can_%s' % key):
-                raise NotImplementedError("Layer %s does not support %s" %
-                                          (layer_name, key))
-
-    @staticmethod
-    def to_kwargs(attr):
-        if attr is None:
-            return dict()
-        else:
-            return attr.attr
-
-
-HookAttr = HookAttribute
-ParamAttr = ParameterAttribute
-ExtraAttr = ExtraLayerAttribute
--- a/python/paddle/trainer_config_helpers/config_parser_utils.py
+++ b/python/paddle/trainer_config_helpers/config_parser_utils.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import copy
-import paddle.trainer.config_parser as config_parser
-from paddle.proto.TrainerConfig_pb2 import OptimizationConfig
-'''
-This file is a wrapper of formal config_parser. The main idea of this file is to
-separete different config logic into different function, such as network configuration
- and optimizer configuration.
-'''
-
-__all__ = [
-    "parse_trainer_config", "parse_network_config", "parse_optimizer_config",
-    "reset_parser"
-]
-
-
-def parse_trainer_config(trainer_conf, config_arg_str):
-    return config_parser.parse_config(trainer_conf, config_arg_str)
-
-
-def parse_network_config(network_conf, config_arg_str=''):
-    config = config_parser.parse_config(network_conf, config_arg_str)
-    return config.model_config
-
-
-def parse_optimizer_config(optimizer_conf, config_arg_str=''):
-    config_parser.settings = copy.deepcopy(config_parser.DEFAULT_SETTING)
-    optimizer_conf()
-    opt_config = OptimizationConfig()
-    for k, v in config_parser.settings.iteritems():
-        if v is None:
-            continue
-        opt_config.__setattr__(k, v)
-    return opt_config
-
-
-def reset_parser():
-    config_parser.begin_parse()
--- a/python/paddle/trainer_config_helpers/data_sources.py
+++ b/python/paddle/trainer_config_helpers/data_sources.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Data Sources are helpers to define paddle training data or testing data.
-"""
-from paddle.trainer.config_parser import *
-from .utils import deprecated
-
-try:
-    import cPickle as pickle
-except ImportError:
-    import six.moves.cPickle as pickle
-
-__all__ = ['define_py_data_sources2']
-
-
-def define_py_data_source(file_list,
-                          cls,
-                          module,
-                          obj,
-                          args=None,
-                          async=False,
-                          data_cls=PyData):
-    """
-    Define a python data source.
-
-    For example, the simplest usage in trainer_config.py as follow:
-
-    ..  code-block:: python
-
-        define_py_data_source("train.list", TrainData, "data_provider", "process")
-
-    Or. if you want to pass arguments from trainer_config to data_provider.py, then
-
-    ..  code-block:: python
-
-        define_py_data_source("train.list", TrainData, "data_provider", "process",
-                              args={"dictionary": dict_name})
-
-    :param data_cls:
-    :param file_list: file list name, which contains all data file paths
-    :type file_list: basestring
-    :param cls: Train or Test Class.
-    :type cls: TrainData or TestData
-    :param module: python module name.
-    :type module: basestring
-    :param obj: python object name. May be a function name if using
-                PyDataProviderWrapper.
-    :type obj: basestring
-    :param args: The best practice is using dict to pass arguments into
-                 DataProvider, and use :code:`@init_hook_wrapper` to
-                 receive arguments.
-    :type args: string or picklable object
-    :param async: Load Data asynchronously or not.
-    :type async: bool
-    :return: None
-    :rtype: None
-    """
-    if isinstance(file_list, list):
-        file_list_name = 'train.list'
-        if cls == TestData:
-            file_list_name = 'test.list'
-        with open(file_list_name, 'w') as f:
-            f.writelines(file_list)
-        file_list = file_list_name
-
-    if not isinstance(args, basestring) and args is not None:
-        args = pickle.dumps(args, 0)
-
-    cls(
-        data_cls(
-            files=file_list,
-            load_data_module=module,
-            load_data_object=obj,
-            load_data_args=args,
-            async_load_data=async))
-
-
-def define_py_data_sources(train_list,
-                           test_list,
-                           module,
-                           obj,
-                           args=None,
-                           train_async=False,
-                           data_cls=PyData):
-    """
-    The annotation is almost the same as define_py_data_sources2, except that
-    it can specific train_async and data_cls.
-
-    :param data_cls:
-    :param train_list: Train list name.
-    :type train_list: basestring
-    :param test_list: Test list name.
-    :type test_list: basestring
-    :param module: python module name. If train and test is different, then
-                   pass a tuple or list to this argument.
-    :type module: basestring or tuple or list
-    :param obj: python object name. May be a function name if using
-                PyDataProviderWrapper. If train and test is different, then pass
-                a tuple or list to this argument.
-    :type obj: basestring or tuple or list
-    :param args: The best practice is using dict() to pass arguments into
-                 DataProvider, and use :code:`@init_hook_wrapper` to receive
-                 arguments. If train and test is different, then pass a tuple
-                 or list to this argument.
-    :type args: string or picklable object or list or tuple.
-    :param train_async: Is training data load asynchronously or not.
-    :type train_async: bool
-    :return: None
-    :rtype: None
-    """
-
-    def __is_splitable__(o):
-        return (isinstance(o, list) or
-                isinstance(o, tuple)) and hasattr(o, '__len__') and len(o) == 2
-
-    assert train_list is not None or test_list is not None
-    assert module is not None and obj is not None
-
-    test_module = module
-    train_module = module
-    if __is_splitable__(module):
-        train_module, test_module = module
-
-    test_obj = obj
-    train_obj = obj
-    if __is_splitable__(obj):
-        train_obj, test_obj = obj
-
-    if args is None:
-        args = ""
-
-    train_args = args
-    test_args = args
-    if __is_splitable__(args):
-        train_args, test_args = args
-
-    if train_list is not None:
-        define_py_data_source(train_list, TrainData, train_module, train_obj,
-                              train_args, train_async, data_cls)
-
-    if test_list is not None:
-        define_py_data_source(test_list, TestData, test_module, test_obj,
-                              test_args, False, data_cls)
-
-
-def define_py_data_sources2(train_list, test_list, module, obj, args=None):
-    """
-    Define python Train/Test data sources in one method. If train/test use
-    the same Data Provider configuration, module/obj/args contain one argument,
-    otherwise contain a list or tuple of arguments. For example\:
-
-    ..  code-block:: python
-
-        define_py_data_sources2(train_list="train.list",
-                                test_list="test.list",
-                                module="data_provider"
-                                # if train/test use different configurations,
-                                # obj=["process_train", "process_test"]
-                                obj="process",
-                                args={"dictionary": dict_name})
-
-    The related data provider can refer to :ref:`api_pydataprovider2_sequential_model` .
-
-    :param train_list: Train list name.
-    :type train_list: basestring
-    :param test_list: Test list name.
-    :type test_list: basestring
-    :param module: python module name. If train and test is different, then
-                   pass a tuple or list to this argument.
-    :type module: basestring or tuple or list
-    :param obj: python object name. May be a function name if using
-                PyDataProviderWrapper. If train and test is different, then pass
-                a tuple or list to this argument.
-    :type obj: basestring or tuple or list
-    :param args: The best practice is using dict() to pass arguments into
-                 DataProvider, and use :code:`@init_hook_wrapper` to receive
-                 arguments. If train and test is different, then pass a tuple
-                 or list to this argument.
-    :type args: string or picklable object or list or tuple.
-    :return: None
-    :rtype: None
-    """
-
-    def py_data2(files, load_data_module, load_data_object, load_data_args,
-                 **kwargs):
-        data = create_data_config_proto()
-        data.type = 'py2'
-        data.files = files
-        data.load_data_module = load_data_module
-        data.load_data_object = load_data_object
-        data.load_data_args = load_data_args
-        data.async_load_data = False
-        return data
-
-    define_py_data_sources(
-        train_list=train_list,
-        test_list=test_list,
-        module=module,
-        obj=obj,
-        args=args,
-        data_cls=py_data2)
--- a/python/paddle/trainer_config_helpers/default_decorators.py
+++ b/python/paddle/trainer_config_helpers/default_decorators.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import functools
-import inspect
-from .attrs import ParamAttr
-from .activations import TanhActivation
-from paddle.trainer.config_parser import *
-
-__all__ = [
-    'wrap_name_default', 'wrap_param_attr_default', 'wrap_bias_attr_default',
-    'wrap_act_default', 'wrap_param_default'
-]
-
-
-def __default_not_set_callback__(kwargs, name):
-    return name not in kwargs or kwargs[name] is None
-
-
-def wrap_param_default(param_names=None,
-                       default_factory=None,
-                       not_set_callback=__default_not_set_callback__):
-    assert param_names is not None
-    assert isinstance(param_names, list) or isinstance(param_names, tuple)
-    for each_param_name in param_names:
-        assert isinstance(each_param_name, basestring)
-
-    def __impl__(func):
-        @functools.wraps(func)
-        def __wrapper__(*args, **kwargs):
-            if len(args) != 0:
-                argspec = inspect.getargspec(func)
-                num_positional = len(argspec.args)
-                if argspec.defaults:
-                    num_positional -= len(argspec.defaults)
-                if not argspec.varargs and len(args) > num_positional:
-                    logger.fatal(
-                        "Must use keyword arguments for non-positional args")
-            for name in param_names:
-                if not_set_callback(kwargs, name):  # Not set
-                    kwargs[name] = default_factory(func)
-            return func(*args, **kwargs)
-
-        if hasattr(func, 'argspec'):
-            __wrapper__.argspec = func.argspec
-        else:
-            __wrapper__.argspec = inspect.getargspec(func)
-        return __wrapper__
-
-    return __impl__
-
-
-class DefaultNameFactory(object):
-    def __init__(self, name_prefix):
-        self.__counter__ = 0
-        self.__name_prefix__ = name_prefix
-
-    def __call__(self, func):
-        if self.__name_prefix__ is None:
-            self.__name_prefix__ = func.__name__
-        tmp = "__%s_%d__" % (self.__name_prefix__, self.__counter__)
-        self.__check_name__(tmp)
-        self.__counter__ += 1
-        return tmp
-
-    def __check_name__(self, nm):
-        """
-        @TODO(yuyang18): Implement it!
-        @param nm:
-        @return:
-        """
-        pass
-
-    def reset(self):
-        self.__counter__ = 0
-
-
-_name_factories = []
-
-
-def reset_hook():
-    for factory in _name_factories:
-        factory.reset()
-
-
-register_parse_config_hook(reset_hook)
-
-
-def wrap_name_default(name_prefix=None, name_param="name"):
-    """
-    Decorator to set "name" arguments default to "{name_prefix}_{invoke_count}".
-
-    ..  code:: python
-
-        @wrap_name_default("some_name")
-        def func(name=None):
-            print name      # name will never be None. If name is not set,
-                            # name will be "some_name_%d"
-
-    :param name_prefix: name prefix. wrapped function's __name__ if None.
-    :type name_prefix: basestring
-    :return: a decorator to set default name
-    :rtype: callable
-    """
-    factory = DefaultNameFactory(name_prefix)
-    _name_factories.append(factory)
-    return wrap_param_default([name_param], factory)
-
-
-def wrap_param_attr_default(param_names=None, default_factory=None):
-    """
-    Setting Default Parameter Attributes Decorator.
-
-    :param default_factory:
-    :param param_names: Parameter Attribute's Names, list of string
-    :type param_names: list
-    :return: decorator
-    """
-    if param_names is None:
-        param_names = ['param_attr']
-    if default_factory is None:
-        default_factory = lambda _: ParamAttr()
-
-    return wrap_param_default(param_names, default_factory)
-
-
-def wrap_bias_attr_default(param_names=None,
-                           default_factory=None,
-                           has_bias=True):
-    if param_names is None:
-        param_names = ['bias_attr']
-    if default_factory is None:
-        default_factory = lambda _: ParamAttr(initial_std=0., initial_mean=0.)
-
-    def __bias_attr_not_set__(kwargs, name):
-        if has_bias:
-            return name not in kwargs or kwargs[name] is None or \
-                   kwargs[name] == True
-        else:
-            return name in kwargs and kwargs[name] == True
-
-    return wrap_param_default(param_names, default_factory,
-                              __bias_attr_not_set__)
-
-
-def wrap_act_default(param_names=None, act=None):
-    if param_names is None:
-        param_names = ["act"]
-
-    if act is None:
-        act = TanhActivation()
-
-    return wrap_param_default(param_names, lambda _: act)
--- a/python/paddle/trainer_config_helpers/evaluators.py
+++ b/python/paddle/trainer_config_helpers/evaluators.py
--- a/python/paddle/trainer_config_helpers/layer_math.py
+++ b/python/paddle/trainer_config_helpers/layer_math.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .layers import LayerOutput, mixed_layer, identity_projection, \
-    slope_intercept_layer, scaling_layer, repeat_layer
-from .attrs import is_compatible_with
-from .default_decorators import *
-import activations as act
-from paddle.trainer.config_parser import logger
-
-__all__ = []
-
-
-def register_unary_math_op(op_name, act):
-    def op(input, name=None):
-        return mixed_layer(
-            input=[identity_projection(input=input)], name=name, act=act)
-
-    op = wrap_name_default(op_name)(op)
-    op.__doc__ = type(act).__doc__
-    globals()[op_name] = op
-    __all__.append(op_name)
-
-
-register_unary_math_op('exp', act.ExpActivation())
-register_unary_math_op('log', act.LogActivation())
-register_unary_math_op('abs', act.AbsActivation())
-register_unary_math_op('sigmoid', act.SigmoidActivation())
-register_unary_math_op('tanh', act.TanhActivation())
-register_unary_math_op('square', act.SquareActivation())
-register_unary_math_op('relu', act.ReluActivation())
-register_unary_math_op('sqrt', act.SqrtActivation())
-register_unary_math_op('reciprocal', act.ReciprocalActivation())
-
-
-def add(layeroutput, other):
-    if is_compatible_with(other, float):
-        return slope_intercept_layer(input=layeroutput, intercept=other)
-    if not isinstance(other, LayerOutput):
-        logger.fatal("LayerOutput can only be added with"
-                     " another LayerOutput or a number")
-    if layeroutput.size == other.size:
-        return mixed_layer(input=[
-            identity_projection(input=layeroutput),
-            identity_projection(input=other)
-        ])
-    if other.size != 1 and layeroutput.size != 1:
-        logger.fatal("Two LayerOutput can be added only if they have equal size"
-                     " or one of their sizes is 1. sizes are %s and %s" %
-                     (layeroutput.size, other.size))
-    elif layeroutput.size == 1:
-        tmp = layeroutput
-        layeroutput = other
-        other = tmp
-    other = repeat_layer(other, layeroutput.size)
-    return mixed_layer(input=[
-        identity_projection(input=layeroutput), identity_projection(input=other)
-    ])
-
-
-LayerOutput.__radd__ = add
-LayerOutput.__add__ = add
-
-
-def sub(layeroutput, other):
-    if is_compatible_with(other, float):
-        return slope_intercept_layer(input=layeroutput, intercept=-other)
-    if not isinstance(other, LayerOutput):
-        logger.fatal("LayerOutput can only be subtracted with"
-                     " another Layeroutput or a number")
-    neg = slope_intercept_layer(input=other, slope=-1.0)
-    return add(layeroutput, neg)
-
-
-LayerOutput.__sub__ = sub
-
-
-def rsub(layeroutput, other):
-    neg = slope_intercept_layer(input=layeroutput, slope=-1.0)
-    return add(neg, other)
-
-
-LayerOutput.__rsub__ = rsub
-
-
-def mul(layeroutput, other):
-    if is_compatible_with(other, float):
-        return slope_intercept_layer(input=layeroutput, slope=other)
-    if not isinstance(other, LayerOutput):
-        logger.fatal("LayerOutput can only be multiplied with"
-                     " another Layeroutput or a number")
-    elif layeroutput.size == 1:
-        return scaling_layer(input=other, weight=layeroutput)
-    elif other.size == 1:
-        return scaling_layer(input=layeroutput, weight=other)
-    else:
-        logger.fatal("At least one of the operand of '*' must be a number"
-                     " or a LayerOutput with size=1")
-
-
-LayerOutput.__mul__ = mul
-LayerOutput.__rmul__ = mul
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
--- a/python/paddle/trainer_config_helpers/networks.py
+++ b/python/paddle/trainer_config_helpers/networks.py
--- a/python/paddle/trainer_config_helpers/optimizers.py
+++ b/python/paddle/trainer_config_helpers/optimizers.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer.config_parser import Settings, default_decay_rate, \
-    default_gradient_clipping_threshold, default_momentum
-
-from .default_decorators import wrap_param_default
-
-__all__ = [
-    'Optimizer', 'BaseSGDOptimizer', 'MomentumOptimizer', 'AdamaxOptimizer',
-    'AdamOptimizer', 'AdaGradOptimizer', 'RMSPropOptimizer',
-    'DecayedAdaGradOptimizer', 'AdaDeltaOptimizer', 'BaseRegularization',
-    'L2Regularization', 'settings', 'ModelAverage'
-]
-
-
-class Optimizer(object):
-    def to_setting_kwargs(self):
-        raise NotImplementedError()
-
-    def extra_settings(self):
-        pass
-
-    @property
-    def is_support_sparse(self):
-        return True
-
-
-class BaseSGDOptimizer(Optimizer):
-    """
-    SGD Optimizer.
-
-    SGD is an optimization method, trying to find a neural network that
-    minimize the "cost/error" of it by iteration. In paddle's implementation
-    SGD Optimizer is synchronized, which means all gradients will be wait to
-    calculate and reduced into one gradient, then do optimize operation.
-
-    The neural network consider the learning problem of minimizing an objective
-    function, that has the form of a sum
-
-    ..  math::
-
-        Q(w) = \\sum_{i}^{n} Q_i(w)
-
-    The value of function Q sometimes is the cost of neural network (Mean
-    Square Error between prediction and label for example). The function Q is
-    parametrised by w, the weight/bias of neural network. And weights is what to
-    be learned. The i is the i-th observation in (trainning) data.
-
-    So, the SGD method will optimize the weight by
-
-    ..  math::
-
-        w = w - \\eta \\nabla Q(w) = w - \\eta \\sum_{i}^{n} \\nabla Q_i(w)
-
-    where :math:`\\eta` is learning rate. And :math:`n` is batch size.
-    """
-
-    def to_setting_kwargs(self):
-        raise NotImplementedError()
-
-
-class MomentumOptimizer(BaseSGDOptimizer):
-    """
-    MomentumOptimizer.
-
-    When sparse=True, the update scheme:
-
-    ..  math::
-
-        \\alpha_t &= \\alpha_{t-1} / k \\\\
-        \\beta_t &= \\beta_{t-1} / (1 + \\lambda \\gamma_t) \\\\
-        u_t &= u_{t-1} - \\alpha_t \\gamma_t g_t \\\\
-        v_t &= v_{t-1} + \\tau_{t-1} \\alpha_t \\gamma_t g_t \\\\
-        \\tau_t &= \\tau_{t-1} + \\beta_t / \\alpha_t
-    
-    where :math:`k` is momentum, :math:`\\lambda` is decay rate, 
-    :math:`\\gamma_t` is learning rate at the t'th step.
-
-    :param sparse: with sparse support or not.
-    :type sparse: bool
-    """
-
-    def extra_settings(self):
-        default_momentum(self.momentum)
-
-    def to_setting_kwargs(self):
-        if self.sparse:
-            return {'learning_method': 'sparse_momentum'}
-        else:
-            return {'learning_method': 'momentum'}
-
-    def __init__(self, momentum=None, sparse=False):
-        self.momentum = momentum
-        self.sparse = sparse
-
-
-class AdamOptimizer(BaseSGDOptimizer):
-    """
-    Adam optimizer.
-    The details of please refer `Adam: A Method for Stochastic Optimization
-    <https://arxiv.org/abs/1412.6980>`_
-
-    ..  math::
-
-        m(w, t) & = \\beta_1 m(w, t-1) + (1 - \\beta_1) \\nabla Q_i(w) \\\\
-        v(w, t) & = \\beta_2 v(w, t-1) + (1 - \\beta_2)(\\nabla Q_i(w)) ^2 \\\\
-        w & = w - \\frac{\\eta m(w, t)}{\\sqrt{v(w,t) + \\epsilon}}
-
-    :param beta1: the :math:`\\beta_1` in equation.
-    :type beta1: float
-    :param beta2: the :math:`\\beta_2` in equation.
-    :type beta2: float
-    :param epsilon: the :math:`\\epsilon` in equation. It is used to prevent
-                        divided by zero.
-    :type epsilon: float
-    """
-
-    @property
-    def is_support_sparse(self):
-        return False
-
-    def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8):
-        self.beta1 = beta1
-        self.beta2 = beta2
-        self.epsilon = epsilon
-
-    def to_setting_kwargs(self):
-        return {
-            'learning_method': 'adam',
-            'adam_beta1': self.beta1,
-            'adam_beta2': self.beta2,
-            'adam_epsilon': self.epsilon
-        }
-
-
-class AdamaxOptimizer(BaseSGDOptimizer):
-    """
-    Adamax optimizer.
-
-    The details of please refer this `Adam: A Method for Stochastic Optimization
-    <https://arxiv.org/abs/1412.6980>`_
-
-    ..  math::
-
-        m_t & = \\beta_1 * m_{t-1} + (1-\\beta_1)* \\nabla Q_i(w) \\\\
-        u_t & = max(\\beta_2*u_{t-1}, abs(\\nabla Q_i(w))) \\\\
-        w_t & = w_{t-1} - (\\eta/(1-\\beta_1^t))*m_t/u_t
-
-    :param beta1: the :math:`\\beta_1` in the equation.
-    :type beta1: float
-    :param beta2: the :math:`\\beta_2` in the equation.
-    :type beta2: float
-    """
-
-    def __init__(self, beta1, beta2):
-        self.beta1 = beta1
-        self.beta2 = beta2
-
-    def to_setting_kwargs(self):
-        return {
-            'learning_method': 'adamax',
-            'adam_beta1': self.beta1,
-            'adam_beta2': self.beta2
-        }
-
-    @property
-    def is_support_sparse(self):
-        return False
-
-
-class AdaGradOptimizer(BaseSGDOptimizer):
-    """
-    Adagrad(for ADAptive GRAdient algorithm) optimizer.
-
-    For details please refer this `Adaptive Subgradient Methods for
-    Online Learning and Stochastic Optimization
-    <http://www.magicbroom.info/Papers/DuchiHaSi10.pdf>`_.
-
-    ..  math::
-
-        G &= \\sum_{\\tau=1}^{t} g_{\\tau} g_{\\tau}^T \\\\
-        w & = w - \\eta diag(G)^{-\\frac{1}{2}} \\circ g
-    """
-
-    def to_setting_kwargs(self):
-        return {'learning_method': 'adagrad'}
-
-    def __init__(self):
-        pass
-
-
-class RMSPropOptimizer(BaseSGDOptimizer):
-    """
-    RMSProp(for Root Mean Square Propagation) optimizer. For details please
-    refer this `slide <http://www.cs.toronto.edu/~tijmen/csc321/slides/
-    lecture_slides_lec6.pdf>`_.
-
-    The equations of this method as follows:
-
-    ..  math::
-
-        v(w, t) & = \\rho v(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\\\
-        w & = w - \\frac{\\eta} {\\sqrt{v(w,t) + \\epsilon}} \\nabla Q_{i}(w)
-
-    :param rho: the :math:`\\rho` in the equation. The forgetting factor.
-    :type rho: float
-    :param epsilon: the :math:`\\epsilon` in the equation.
-    :type epsilon: float
-    """
-
-    def to_setting_kwargs(self):
-        return {
-            'learning_method': 'rmsprop',
-            'ada_rou': self.rho,
-            'ada_epsilon': self.epsilon
-        }
-
-    def __init__(self, rho=0.95, epsilon=1e-6):
-        self.rho = rho
-        self.epsilon = epsilon
-
-
-class DecayedAdaGradOptimizer(BaseSGDOptimizer):
-    """
-    AdaGrad method with decayed sum gradients. The equations of this method
-    show as follow.
-
-    ..  math::
-
-        E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\
-        learning\\_rate &= 1/sqrt( ( E(g_t^2) + \\epsilon )
-
-    :param rho: The :math:`\\rho` parameter in that equation
-    :type rho: float
-    :param epsilon: The :math:`\\epsilon` parameter in that equation.
-    :type epsilon: float
-    """
-
-    def to_setting_kwargs(self):
-        return {
-            'learning_method': 'decayed_adagrad',
-            'ada_rou': self.rho,
-            'ada_epsilon': self.epsilon
-        }
-
-    def __init__(self, rho=0.95, epsilon=1e-6):
-        self.rho = rho
-        self.epsilon = epsilon
-
-
-class AdaDeltaOptimizer(BaseSGDOptimizer):
-    """
-    AdaDelta method. The details of adadelta please refer to this
-    `ADADELTA: AN ADAPTIVE LEARNING RATE METHOD
-    <http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf>`_.
-
-    ..  math::
-
-        E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\
-        learning\\_rate &= sqrt( ( E(dx_{t-1}^2) + \\epsilon ) / ( \\
-                          E(g_t^2) + \\epsilon ) ) \\\\
-        E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\\_rate)^2
-
-    :param rho: :math:`\\rho` in equation
-    :type rho: float
-    :param epsilon: :math:`\\rho` in equation
-    :type epsilon: float
-    """
-
-    def to_setting_kwargs(self):
-        return {
-            'learning_method': 'adadelta',
-            'ada_rou': self.rho,
-            'ada_epsilon': self.epsilon
-        }
-
-    def __init__(self, rho=0.95, epsilon=1e-6):
-        self.rho = rho
-        self.epsilon = epsilon
-
-
-class BaseRegularization(Optimizer):
-    def __init__(self):
-        self.algorithm = ""
-        self.learning_method = ""
-
-    def to_setting_kwargs(self):
-        return {}
-
-
-class L2Regularization(BaseRegularization):
-    def __init__(self, rate):
-        super(L2Regularization, self).__init__()
-        self.decay_rate = rate
-
-    def to_setting_kwargs(self):
-        if self.algorithm == 'owlqn':
-            return {'l2weight': self.decay_rate}
-        else:
-            return dict()
-
-    def extra_settings(self):
-        if self.algorithm == 'sgd' or self.algorithm == 'async_sgd':
-            default_decay_rate(self.decay_rate)
-
-
-class ModelAverage(Optimizer):
-    def to_setting_kwargs(self):
-        return {
-            'average_window': self.average_window,
-            'max_average_window': self.max_average_window,
-            'do_average_in_cpu': self.do_average_in_cpu
-        }
-
-    def __init__(self,
-                 average_window,
-                 max_average_window=None,
-                 do_average_in_cpu=False):
-        self.average_window = average_window
-        self.max_average_window = max_average_window
-        self.do_average_in_cpu = do_average_in_cpu
-
-
-class GradientClippingThreshold(Optimizer):
-    def extra_settings(self):
-        default_gradient_clipping_threshold(self.threshold)
-
-    def __init__(self, threshold):
-        self.threshold = threshold
-
-    def to_setting_kwargs(self):
-        return dict()
-
-
-def __extends__(dict1, dict2):
-    for key in dict2:
-        assert key not in dict1
-        dict1[key] = dict2[key]
-    return dict1
-
-
-@wrap_param_default(
-    ['learning_method'], default_factory=lambda _: MomentumOptimizer())
-@wrap_param_default(
-    ['regularization'], default_factory=lambda _: BaseRegularization())
-def settings(batch_size,
-             learning_rate=1e-3,
-             learning_rate_decay_a=0.,
-             learning_rate_decay_b=0.,
-             learning_rate_schedule='poly',
-             learning_rate_args='',
-             async_lagged_grad_discard_ratio=1.5,
-             learning_method=None,
-             regularization=None,
-             is_async=False,
-             model_average=None,
-             gradient_clipping_threshold=None):
-    """
-    Set the optimization method, learning rate, batch size, and other training
-    settings. The currently supported algorithms are SGD and Async-SGD.
-
-    ..  warning::
-
-        Note that the 'batch_size' in PaddlePaddle is not equal to global
-        training batch size. It represents the single training process's batch
-        size. If you use N processes to train one model, for example use three
-        GPU machines, the global batch size is N*'batch_size'.
-
-    :param batch_size: batch size for one training process.
-    :type batch_size: int
-    :param learning_rate: learning rate for SGD
-    :type learning_rate: float
-    :param learning_method: The extension optimization algorithms of gradient
-                            descent, such as momentum, adagrad, rmsprop, etc.
-                            Note that it should be instance with base type
-                            BaseSGDOptimizer.
-    :type learning_method: BaseSGDOptimizer
-    :param regularization: The regularization method.
-    :type regularization: BaseRegularization
-    :param is_async: Is Async-SGD or not. Default value is False.
-    :type is_async: bool
-    :param model_average: Model Average Settings.
-    :type model_average: ModelAverage
-    :param gradient_clipping_threshold: gradient clipping threshold. If gradient
-                                        value larger than some value, will be
-                                        clipped.
-    :type gradient_clipping_threshold: float
-    :param async_lagged_grad_discard_ratio: async SGD gradient commit control,
-          when async_lagged_grad_discard_ratio * num_gradient_servers commit passed, 
-          the current async SGD gradient is discarded.
-    :type async_lagged_grad_discard_ratio: float
-    """
-    if isinstance(regularization, BaseRegularization):
-        regularization = [regularization]
-
-    assert isinstance(learning_method, Optimizer)
-    if isinstance(learning_method, BaseSGDOptimizer):
-        algorithm = 'async_sgd' if is_async else 'sgd'
-    else:
-        algorithm = 'owlqn'
-
-    args = [
-        'batch_size', 'learning_rate', 'learning_rate_decay_a',
-        'learning_rate_decay_b', 'learning_rate_schedule', 'learning_rate_args',
-        'gradient_clipping_threshold', 'async_lagged_grad_discard_ratio'
-    ]
-    kwargs = dict()
-    kwargs['algorithm'] = algorithm
-    for arg in args:
-        kwargs[arg] = locals()[arg]
-
-    kwargs = __extends__(kwargs, learning_method.to_setting_kwargs())
-    learning_method.extra_settings()
-
-    for regular in regularization:
-        assert isinstance(regular, BaseRegularization)
-        regular.algorithm = algorithm
-        regular.learning_method = kwargs['learning_method']
-        kwargs = __extends__(kwargs, regular.to_setting_kwargs())
-        regular.extra_settings()
-
-    if gradient_clipping_threshold is not None:
-        gradient_clipping_threshold = GradientClippingThreshold(
-            threshold=gradient_clipping_threshold)
-
-    for each in [model_average, gradient_clipping_threshold]:
-        if each is not None:
-            assert isinstance(each, Optimizer)
-            each.algorithm = algorithm
-            each.learning_method = kwargs['learning_method']
-            kwargs = __extends__(kwargs, each.to_setting_kwargs())
-            each.extra_settings()
-
-    # Do Check?
-    Settings(**kwargs)
--- a/python/paddle/trainer_config_helpers/poolings.py
+++ b/python/paddle/trainer_config_helpers/poolings.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-"""
-
-__all__ = [
-    "BasePoolingType", "MaxPooling", "AvgPooling", "MaxWithMaskPooling",
-    "CudnnMaxPooling", "CudnnAvgPooling", "CudnnAvgInclPadPooling",
-    "SumPooling", "SquareRootNPooling"
-]
-
-
-class BasePoolingType(object):
-    """
-    Base Pooling Type.
-    Note these pooling types are used for sequence input, not for images.
-    Each PoolingType contains one parameter:
-
-    :param name: pooling layer type name used by paddle.
-    :type name: basestring
-
-    """
-
-    def __init__(self, name):
-        self.name = name
-
-
-class MaxPooling(BasePoolingType):
-    """
-    Max pooling.
-
-    Return the very large values for each dimension in sequence or time steps.
-
-    ..  math::
-
-        max(samples\\_of\\_a\\_sequence)
-
-    :param output_max_index: True if output sequence max index instead of max
-                             value. None means use default value in proto.
-    :type output_max_index: bool|None
-    """
-
-    def __init__(self, output_max_index=None):
-        BasePoolingType.__init__(self, "max")
-        self.output_max_index = output_max_index
-
-
-class MaxWithMaskPooling(BasePoolingType):
-    """
-    MaxWithMask pooling.
-
-    Not only return the very large values for each dimension in sequence or time steps,
-    but also the location indices of found maxinum values.
-
-    """
-
-    def __init__(self):
-        BasePoolingType.__init__(self, "max-pool-with-mask")
-
-
-class CudnnMaxPooling(BasePoolingType):
-    """
-    Cudnn max pooling only support GPU. Return the maxinum value in the
-    pooling window.
-    """
-
-    def __init__(self):
-        BasePoolingType.__init__(self, "cudnn-max-pool")
-
-
-class CudnnAvgPooling(BasePoolingType):
-    """
-    Cudnn average pooling only support GPU. Return the average value in the
-    pooling window.
-    """
-
-    def __init__(self):
-        BasePoolingType.__init__(self, "cudnn-avg-pool")
-
-
-class CudnnAvgInclPadPooling(BasePoolingType):
-    """
-    Cudnn average pooling only support GPU. Return the average value in the
-    pooling window taking into account the padding cells.
-    """
-
-    def __init__(self):
-        BasePoolingType.__init__(self, "cudnn-avg-incl-pad-pool")
-
-
-class AvgPooling(BasePoolingType):
-    """
-    Average pooling.
-
-    Return the average values for each dimension in sequence or time steps.
-
-    ..  math::
-
-        sum(samples\\_of\\_a\\_sequence)/sample\\_num
-    """
-    STRATEGY_AVG = "average"
-    STRATEGY_SUM = "sum"
-    STRATEGY_SQROOTN = "squarerootn"
-
-    def __init__(self, strategy=STRATEGY_AVG):
-        BasePoolingType.__init__(self, "average")
-        self.strategy = strategy
-
-
-class SumPooling(AvgPooling):
-    """
-    Sum pooling.
-
-    Return the sum values of each dimension in sequence or time steps.
-
-    ..  math::
-
-        sum(samples\\_of\\_a\\_sequence)
-    """
-
-    def __init__(self):
-        AvgPooling.__init__(self, AvgPooling.STRATEGY_SUM)
-
-
-class SquareRootNPooling(AvgPooling):
-    """
-    Square Root Pooling.
-
-    Return the square root values of each dimension in sequence or time steps.
-
-    ..  math::
-
-        sum(samples\\_of\\_a\\_sequence)/sqrt(sample\\_num)
-    """
-
-    def __init__(self):
-        AvgPooling.__init__(self, AvgPooling.STRATEGY_SQROOTN)
--- a/python/paddle/trainer_config_helpers/tests/CMakeLists.txt
+++ b/python/paddle/trainer_config_helpers/tests/CMakeLists.txt
-#################### test_config_parser #########################
-add_test(NAME layers_test
-  COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_BINARY_DIR}/python/
-        ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/layers_test.py
-    WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/python/paddle)
-
-add_test(NAME test_reset_hook
-  COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_BINARY_DIR}/python/
-        ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/test_reset_hook.py
-    WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/python/paddle)
-
-add_paddle_exe(protobuf_equal ProtobufEqualMain.cpp)
-add_test(NAME test_layerHelpers
-  COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_BINARY_DIR}/python/
-  ${PADDLE_BINARY_DIR}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE}
-  ${CMAKE_CURRENT_BINARY_DIR}/protobuf_equal
-)
--- a/python/paddle/trainer_config_helpers/tests/ProtobufEqualMain.cpp
+++ b/python/paddle/trainer_config_helpers/tests/ProtobufEqualMain.cpp
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <google/protobuf/text_format.h>
-#include <google/protobuf/util/message_differencer.h>
-#include <fstream>
-#include <iostream>
-#include "TrainerConfig.pb.h"
-
-bool loadPb(google::protobuf::Message* conf, const std::string& filename) {
-  std::ifstream fin;
-  fin.open(filename.c_str());
-  if (fin.is_open()) {
-    std::string str((std::istreambuf_iterator<char>(fin)),
-                    std::istreambuf_iterator<char>());
-    bool ok = google::protobuf::TextFormat::ParseFromString(str, conf);
-    fin.close();
-    return ok;
-  } else {
-    return false;
-  }
-}
-
-int main(int argc, char** argv) {
-  std::unique_ptr<google::protobuf::Message> config1;
-  std::unique_ptr<google::protobuf::Message> config2;
-  if (argc == 3) {
-    config1.reset(new paddle::ModelConfig());
-    config2.reset(new paddle::ModelConfig());
-  } else if (argc == 4) {
-    config1.reset(new paddle::TrainerConfig());
-    config2.reset(new paddle::TrainerConfig());
-  }
-  if (!config1 || !config2) {
-    return 1;
-  } else if (!loadPb(config1.get(), argv[1])) {
-    return 2;
-  } else if (!loadPb(config2.get(), argv[2])) {
-    return 3;
-  } else {
-    if (google::protobuf::util::MessageDifferencer::ApproximatelyEquals(
-            *config1, *config2)) {
-      return 0;
-    } else {
-      return 4;
-    }
-  }
-}
--- a/python/paddle/trainer_config_helpers/tests/configs/.gitignore
+++ b/python/paddle/trainer_config_helpers/tests/configs/.gitignore
-protostr/*.unittest
--- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
-#!/bin/bash
-export configs=(test_repeat_layer test_fc layer_activations projections test_print_layer
-test_sequence_pooling test_lstmemory_layer test_grumemory_layer
-last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
-img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
-test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
-test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
-test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
-test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
-test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
-test_kmax_seq_socre_layer test_sub_nested_seq_select_layer test_scale_shift_layer
-test_seq_slice_layer test_cross_entropy_over_beam test_roi_pool_layer test_pooling3D_layer
-test_conv3d_layer test_deconv3d_layer test_BatchNorm3D test_resize_layer
-test_scale_sub_region_layer test_dot_prod_layer test_l2_distance_layer
-test_factorization_machine)
-
-export whole_configs=(test_split_datasource)
--- a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
-#!/bin/bash
-
-set -e
-cd `dirname $0`
-
-protostr=$PWD/protostr
-. file_list.sh
-
-for conf in ${configs[*]}
-do
-    echo "Generating " $conf
-    $1 -m paddle.utils.dump_config $conf.py > $protostr/$conf.protostr.unittest
-    if [ ! -f "$protostr/$conf.protostr" ]; then 
-        cp $protostr/$conf.protostr.unittest $protostr/$conf.protostr
-    fi
-    cat ${conf}.py |$1 test_config_parser_for_non_file_config.py > $protostr/$conf.protostr.non_file_config.unittest
-done
-
-for conf in ${whole_configs[*]}
-do
-    echo "Generating " $conf
-    $1 -m paddle.utils.dump_config $conf.py "" --whole > $protostr/$conf.protostr.unittest
-    if [ ! -f "$protostr/$conf.protostr" ]; then 
-        cp $protostr/$conf.protostr.unittest $protostr/$conf.protostr
-    fi
-    cat ${conf}.py |$1 test_config_parser_for_non_file_config.py --whole > $protostr/$conf.protostr.non_file_config.unittest
-done
--- a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(learning_rate=1e-3, batch_size=1000)
-
-img = data_layer(name='image', size=256 * 256)
-
-# the parse_conv in config_parse.py is not strictly accurate when filter_size
-# is not square. So here set square filter_size.
-img_conv = img_conv_layer(
-    input=img,
-    num_channels=1,
-    num_filters=64,
-    filter_size=(32, 32),
-    padding=(1, 1),
-    dilation=(1, 1),
-    stride=(1, 1),
-    act=LinearActivation())
-img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
-
-img_norm = img_cmrnorm_layer(input=img_bn, size=32)
-
-img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling())
-
-outputs(img_pool, img_norm)
--- a/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(learning_rate=1e-3, batch_size=1000)
-
-img = data_layer(name='image', size=227 * 227)
-
-# the parse_conv in config_parse.py is not strictly accurate when filter_size
-# is not square. So here set square filter_size.
-img_conv = img_conv_layer(
-    input=img,
-    num_channels=1,
-    num_filters=64,
-    filter_size=(32, 32),
-    padding=(1, 1),
-    stride=(1, 1),
-    act=LinearActivation(),
-    trans=True)
-img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
-
-img_norm = img_cmrnorm_layer(input=img_bn, size=32)
-
-img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling())
-
-outputs(img_pool, img_norm)
--- a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-din = data_layer(name='data', size=30)
-
-seq_op = [first_seq, last_seq]
-
-agg_level = [AggregateLevel.TO_SEQUENCE, AggregateLevel.TO_NO_SEQUENCE]
-
-opts = []
-
-for op in seq_op:
-    for al in agg_level:
-        opts.append(op(input=din, agg_level=al))
-
-for op in seq_op:
-    opts.append(
-        op(input=din, agg_level=AggregateLevel.TO_NO_SEQUENCE, stride=5))
-
-outputs(opts)
--- a/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-'''
-Test all activations.
-'''
-
-from paddle.trainer_config_helpers import *
-
-settings(learning_rate=1e-4, batch_size=1000)
-
-din = data_layer(name='input', size=100)
-
-acts = [
-    TanhActivation, SigmoidActivation, SoftmaxActivation, IdentityActivation,
-    LinearActivation, ExpActivation, ReluActivation, BReluActivation,
-    SoftReluActivation, STanhActivation, AbsActivation, SquareActivation
-]
-
-outputs([
-    fc_layer(
-        input=din, size=100, act=act(), name="layer_%d" % i)
-    for i, act in enumerate(acts)
-])
--- a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-5)
-
-x = data_layer(name='data', size=100)
-x = layer_math.exp(x)
-x = layer_math.sqrt(x)
-x = layer_math.reciprocal(x)
-x = layer_math.log(x)
-x = layer_math.abs(x)
-x = layer_math.sigmoid(x)
-x = layer_math.tanh(x)
-x = layer_math.square(x)
-x = layer_math.relu(x)
-y = 1 + x
-y = y + 1
-y = x + y
-y = y - x
-y = y - 2
-y = 2 - y
-y = 2 * y
-y = y * 3
-z = data_layer(name='data_2', size=1)
-y = y * z
-y = z * y
-y = y + z
-y = z + y
-outputs(y)
--- a/python/paddle/trainer_config_helpers/tests/configs/projections.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/projections.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-'''
-Test mixed layer, projections and operators.
-'''
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-4)
-
-din = data_layer(name='test', size=100)
-
-din = embedding_layer(input=din, size=256)
-
-with mixed_layer(size=100) as m1:
-    m1 += full_matrix_projection(input=din)
-
-with mixed_layer(size=100) as m2:
-    m2 += table_projection(input=m1)
-
-with mixed_layer(size=100) as m3:
-    m3 += identity_projection(input=m2)
-
-with mixed_layer(size=100) as m4:
-    m4 += dotmul_projection(input=m3)
-
-with mixed_layer() as m5:
-    m5 += context_projection(input=m4, context_len=3)
-
-with mixed_layer() as m6:
-    m6 += dotmul_operator(a=m3, b=m4)
-    m6 += scaling_projection(m3)
-
-img = data_layer(name='img', size=32 * 32)
-flt = data_layer(name='filter', size=3 * 3 * 1 * 64)
-
-with mixed_layer() as m7:
-    m7 += conv_operator(
-        img=img, filter=flt, num_filters=64, num_channels=1, filter_size=3)
-    m7 += conv_projection(img, filter_size=3, num_filters=64, num_channels=1)
-
-with mixed_layer() as m8:
-    m8 += conv_operator(
-        img=img,
-        filter=flt,
-        num_filters=64,
-        num_channels=1,
-        filter_size=3,
-        stride=2,
-        padding=1,
-        trans=True)
-    m8 += conv_projection(
-        img,
-        filter_size=3,
-        num_filters=64,
-        num_channels=1,
-        stride=2,
-        padding=1,
-        trans=True)
-end = mixed_layer(
-    input=[
-        full_matrix_projection(input=m5),
-        trans_full_matrix_projection(input=m6),
-        full_matrix_projection(input=m7), full_matrix_projection(input=m8)
-    ],
-    size=100,
-    layer_attr=ExtraAttr(
-        drop_rate=0.5, error_clipping_threshold=40))
-
-outputs(end)
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
-type: "nn"
-layers {
-  name: "image"
-  type: "data"
-  size: 65536
-  active_type: ""
-}
-layers {
-  name: "__conv_0__"
-  type: "exconv"
-  size: 3297856
-  active_type: ""
-  inputs {
-    input_layer_name: "image"
-    input_parameter_name: "___conv_0__.w0"
-    conv_conf {
-      filter_size: 32
-      channels: 1
-      stride: 1
-      padding: 1
-      groups: 1
-      filter_channels: 1
-      output_x: 227
-      img_size: 256
-      caffe_mode: true
-      filter_size_y: 32
-      padding_y: 1
-      stride_y: 1
-      output_y: 227
-      img_size_y: 256
-      dilation: 1
-      dilation_y: 1
-    }
-  }
-  bias_parameter_name: "___conv_0__.wbias"
-  num_filters: 64
-  shared_biases: true
-  height: 227
-  width: 227
-}
-layers {
-  name: "__batch_norm_0__"
-  type: "batch_norm"
-  size: 3297856
-  active_type: "relu"
-  inputs {
-    input_layer_name: "__conv_0__"
-    input_parameter_name: "___batch_norm_0__.w0"
-    image_conf {
-      channels: 64
-      img_size: 227
-      img_size_y: 227
-    }
-  }
-  inputs {
-    input_layer_name: "__conv_0__"
-    input_parameter_name: "___batch_norm_0__.w1"
-  }
-  inputs {
-    input_layer_name: "__conv_0__"
-    input_parameter_name: "___batch_norm_0__.w2"
-  }
-  bias_parameter_name: "___batch_norm_0__.wbias"
-  moving_average_fraction: 0.9
-  height: 227
-  width: 227
-  depth: 1
-  epsilon: 1e-05
-}
-layers {
-  name: "__crmnorm_0__"
-  type: "norm"
-  size: 3297856
-  active_type: ""
-  inputs {
-    input_layer_name: "__batch_norm_0__"
-    norm_conf {
-      norm_type: "cmrnorm-projection"
-      channels: 64
-      size: 32
-      scale: 0.0004
-      pow: 0.75
-      output_x: 227
-      img_size: 227
-      blocked: false
-      output_y: 227
-      img_size_y: 227
-    }
-  }
-  height: 227
-  width: 227
-}
-layers {
-  name: "__pool_0__"
-  type: "pool"
-  size: 2458624
-  active_type: ""
-  inputs {
-    input_layer_name: "__conv_0__"
-    pool_conf {
-      pool_type: "max-projection"
-      channels: 64
-      size_x: 32
-      stride: 1
-      output_x: 196
-      img_size: 227
-      padding: 0
-      size_y: 32
-      stride_y: 1
-      output_y: 196
-      img_size_y: 227
-      padding_y: 0
-    }
-  }
-  height: 196
-  width: 196
-}
-parameters {
-  name: "___conv_0__.w0"
-  size: 65536
-  initial_mean: 0.0
-  initial_std: 0.0441941738242
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___conv_0__.wbias"
-  size: 64
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 64
-  dims: 1
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___batch_norm_0__.w0"
-  size: 64
-  initial_mean: 1.0
-  initial_std: 0.0
-  initial_strategy: 0
-  initial_smart: false
-}
-parameters {
-  name: "___batch_norm_0__.w1"
-  size: 64
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 64
-  initial_strategy: 0
-  initial_smart: false
-  is_static: true
-  is_shared: true
-}
-parameters {
-  name: "___batch_norm_0__.w2"
-  size: 64
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 64
-  initial_strategy: 0
-  initial_smart: false
-  is_static: true
-  is_shared: true
-}
-parameters {
-  name: "___batch_norm_0__.wbias"
-  size: 64
-  initial_mean: 0.0
-  initial_std: 0.0
-  dims: 1
-  dims: 64
-  initial_strategy: 0
-  initial_smart: false
-}
-input_layer_names: "image"
-output_layer_names: "__pool_0__"
-output_layer_names: "__crmnorm_0__"
-sub_models {
-  name: "root"
-  layer_names: "image"
-  layer_names: "__conv_0__"
-  layer_names: "__batch_norm_0__"
-  layer_names: "__crmnorm_0__"
-  layer_names: "__pool_0__"
-  input_layer_names: "image"
-  output_layer_names: "__pool_0__"
-  output_layer_names: "__crmnorm_0__"
-  is_recurrent_layer_group: false
-}
-
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr
-type: "nn"
-layers {
-  name: "data"
-  type: "data"
-  size: 30
-  active_type: ""
-}
-layers {
-  name: "__first_seq_0__"
-  type: "seqlastins"
-  size: 30
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-  }
-  select_first: true
-  trans_type: "seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__first_seq_1__"
-  type: "seqlastins"
-  size: 30
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-  }
-  select_first: true
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__last_seq_0__"
-  type: "seqlastins"
-  size: 30
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-  }
-  trans_type: "seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__last_seq_1__"
-  type: "seqlastins"
-  size: 30
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-  }
-  trans_type: "non-seq"
-  seq_pool_stride: -1
-}
-layers {
-  name: "__first_seq_2__"
-  type: "seqlastins"
-  size: 30
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-  }
-  select_first: true
-  trans_type: "non-seq"
-  seq_pool_stride: 5
-}
-layers {
-  name: "__last_seq_2__"
-  type: "seqlastins"
-  size: 30
-  active_type: ""
-  inputs {
-    input_layer_name: "data"
-  }
-  trans_type: "non-seq"
-  seq_pool_stride: 5
-}
-input_layer_names: "data"
-output_layer_names: "__first_seq_0__"
-output_layer_names: "__first_seq_1__"
-output_layer_names: "__last_seq_0__"
-output_layer_names: "__last_seq_1__"
-output_layer_names: "__first_seq_2__"
-output_layer_names: "__last_seq_2__"
-sub_models {
-  name: "root"
-  layer_names: "data"
-  layer_names: "__first_seq_0__"
-  layer_names: "__first_seq_1__"
-  layer_names: "__last_seq_0__"
-  layer_names: "__last_seq_1__"
-  layer_names: "__first_seq_2__"
-  layer_names: "__last_seq_2__"
-  input_layer_names: "data"
-  output_layer_names: "__first_seq_0__"
-  output_layer_names: "__first_seq_1__"
-  output_layer_names: "__last_seq_0__"
-  output_layer_names: "__last_seq_1__"
-  output_layer_names: "__first_seq_2__"
-  output_layer_names: "__last_seq_2__"
-  is_recurrent_layer_group: false
-}
-
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/layer_activations.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/layer_activations.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_fc.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_fc.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_clip_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_clip_layer.protostr
-type: "nn"
-layers {
-  name: "input"
-  type: "data"
-  size: 300
-  active_type: ""
-}
-layers {
-  name: "__clip_0__"
-  type: "clip"
-  size: 300
-  active_type: ""
-  inputs {
-    input_layer_name: "input"
-    clip_conf {
-      min: -10
-      max: 10
-    }
-  }
-}
-input_layer_names: "input"
-output_layer_names: "__clip_0__"
-sub_models {
-  name: "root"
-  layer_names: "input"
-  layer_names: "__clip_0__"
-  input_layer_names: "input"
-  output_layer_names: "__clip_0__"
-  is_recurrent_layer_group: false
-}
-
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_conv3d_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_conv3d_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers_with_weight.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_deconv3d_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_deconv3d_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_detection_output_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_detection_output_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_dot_prod_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_dot_prod_layer.protostr
-type: "nn"
-layers {
-  name: "vector1"
-  type: "data"
-  size: 10
-  active_type: ""
-}
-layers {
-  name: "vector2"
-  type: "data"
-  size: 10
-  active_type: ""
-}
-layers {
-  name: "__dot_prod_layer_0__"
-  type: "dot_prod"
-  size: 1
-  active_type: ""
-  inputs {
-    input_layer_name: "vector1"
-  }
-  inputs {
-    input_layer_name: "vector2"
-  }
-}
-input_layer_names: "vector1"
-input_layer_names: "vector2"
-output_layer_names: "__dot_prod_layer_0__"
-sub_models {
-  name: "root"
-  layer_names: "vector1"
-  layer_names: "vector2"
-  layer_names: "__dot_prod_layer_0__"
-  input_layer_names: "vector1"
-  input_layer_names: "vector2"
-  output_layer_names: "__dot_prod_layer_0__"
-  is_recurrent_layer_group: false
-}
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_expand_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_expand_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_factorization_machine.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_factorization_machine.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_fc.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_fc.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_gated_unit_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_gated_unit_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_grumemory_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_grumemory_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_hsigmoid.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_hsigmoid.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_l2_distance_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_l2_distance_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_lstmemory_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_lstmemory_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multibox_loss_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multibox_loss_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multiplex_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multiplex_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pad.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pad.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pooling3D_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pooling3D_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_print_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_print_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_recursive_topology.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_recursive_topology.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_repeat_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_repeat_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_resize_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_resize_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_conv.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_conv.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_l2_norm_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_l2_norm_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_shift_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_shift_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_sub_region_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_sub_region_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_slice_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_smooth_l1.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_smooth_l1.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sub_nested_seq_select_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sub_nested_seq_select_layer.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/unused_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/unused_layers.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
--- a/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py
--- a/python/paddle/trainer_config_helpers/tests/configs/shared_gru.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/shared_gru.py
--- a/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
--- a/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-settings(batch_size=1000, learning_rate=1e-4)
-
-din = data_layer(name='data', size=120)
-
-outputs(bidirectional_gru(input=din, size=40, return_seq=True))
--- a/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_config_parser_for_non_file_config.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_config_parser_for_non_file_config.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_crop.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_crop.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_fc.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_fc.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_multiplex_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_multiplex_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_pad.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_pad.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_recursive_topology.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_recursive_topology.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_repeat_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_repeat_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_resize_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_resize_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_scale_sub_region_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_scale_sub_region_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_sub_nested_seq_select_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_sub_nested_seq_select_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
--- a/python/paddle/trainer_config_helpers/tests/configs/util_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/util_layers.py
--- a/python/paddle/trainer_config_helpers/tests/layers_test.py
+++ b/python/paddle/trainer_config_helpers/tests/layers_test.py
--- a/python/paddle/trainer_config_helpers/tests/layers_test_config.py
+++ b/python/paddle/trainer_config_helpers/tests/layers_test_config.py
--- a/python/paddle/trainer_config_helpers/tests/test_reset_hook.py
+++ b/python/paddle/trainer_config_helpers/tests/test_reset_hook.py
--- a/python/paddle/trainer_config_helpers/utils.py
+++ b/python/paddle/trainer_config_helpers/utils.py
--- a/python/paddle/v2/__init__.py
+++ b/python/paddle/v2/__init__.py
--- a/python/paddle/v2/activation.py
+++ b/python/paddle/v2/activation.py
--- a/python/paddle/v2/attr.py
+++ b/python/paddle/v2/attr.py
--- a/python/paddle/v2/config_base.py
+++ b/python/paddle/v2/config_base.py
--- a/python/paddle/v2/data_feeder.py
+++ b/python/paddle/v2/data_feeder.py
--- a/python/paddle/v2/data_type.py
+++ b/python/paddle/v2/data_type.py
--- a/python/paddle/v2/dataset/__init__.py
+++ b/python/paddle/v2/dataset/__init__.py
--- a/python/paddle/v2/dataset/cifar.py
+++ b/python/paddle/v2/dataset/cifar.py
--- a/python/paddle/v2/dataset/common.py
+++ b/python/paddle/v2/dataset/common.py
--- a/python/paddle/v2/dataset/conll05.py
+++ b/python/paddle/v2/dataset/conll05.py
--- a/python/paddle/v2/dataset/flowers.py
+++ b/python/paddle/v2/dataset/flowers.py
--- a/python/paddle/v2/dataset/imdb.py
+++ b/python/paddle/v2/dataset/imdb.py
--- a/python/paddle/v2/dataset/imikolov.py
+++ b/python/paddle/v2/dataset/imikolov.py
--- a/python/paddle/v2/dataset/mnist.py
+++ b/python/paddle/v2/dataset/mnist.py
--- a/python/paddle/v2/dataset/movielens.py
+++ b/python/paddle/v2/dataset/movielens.py
--- a/python/paddle/v2/dataset/mq2007.py
+++ b/python/paddle/v2/dataset/mq2007.py
--- a/python/paddle/v2/dataset/sentiment.py
+++ b/python/paddle/v2/dataset/sentiment.py
--- a/python/paddle/v2/dataset/tests/cifar_test.py
+++ b/python/paddle/v2/dataset/tests/cifar_test.py
--- a/python/paddle/v2/dataset/tests/common_test.py
+++ b/python/paddle/v2/dataset/tests/common_test.py
--- a/python/paddle/v2/dataset/tests/flowers_test.py
+++ b/python/paddle/v2/dataset/tests/flowers_test.py
--- a/python/paddle/v2/dataset/tests/imdb_test.py
+++ b/python/paddle/v2/dataset/tests/imdb_test.py
--- a/python/paddle/v2/dataset/tests/imikolov_test.py
+++ b/python/paddle/v2/dataset/tests/imikolov_test.py
--- a/python/paddle/v2/dataset/tests/mnist_test.py
+++ b/python/paddle/v2/dataset/tests/mnist_test.py
--- a/python/paddle/v2/dataset/tests/mq2007_test.py
+++ b/python/paddle/v2/dataset/tests/mq2007_test.py
--- a/python/paddle/v2/dataset/tests/test_sentiment.py
+++ b/python/paddle/v2/dataset/tests/test_sentiment.py
--- a/python/paddle/v2/dataset/tests/voc2012_test.py
+++ b/python/paddle/v2/dataset/tests/voc2012_test.py
--- a/python/paddle/v2/dataset/tests/wmt16_test.py
+++ b/python/paddle/v2/dataset/tests/wmt16_test.py
--- a/python/paddle/v2/dataset/uci_housing.py
+++ b/python/paddle/v2/dataset/uci_housing.py
--- a/python/paddle/v2/dataset/voc2012.py
+++ b/python/paddle/v2/dataset/voc2012.py
--- a/python/paddle/v2/dataset/wmt14.py
+++ b/python/paddle/v2/dataset/wmt14.py
--- a/python/paddle/v2/dataset/wmt16.py
+++ b/python/paddle/v2/dataset/wmt16.py
--- a/python/paddle/v2/evaluator.py
+++ b/python/paddle/v2/evaluator.py
--- a/python/paddle/v2/event.py
+++ b/python/paddle/v2/event.py
--- a/python/paddle/v2/image.py
+++ b/python/paddle/v2/image.py
--- a/python/paddle/v2/inference.py
+++ b/python/paddle/v2/inference.py
--- a/python/paddle/v2/layer.py
+++ b/python/paddle/v2/layer.py
--- a/python/paddle/v2/master/.gitignore
+++ b/python/paddle/v2/master/.gitignore
--- a/python/paddle/v2/master/__init__.py
+++ b/python/paddle/v2/master/__init__.py
--- a/python/paddle/v2/master/client.py
+++ b/python/paddle/v2/master/client.py
--- a/python/paddle/v2/minibatch.py
+++ b/python/paddle/v2/minibatch.py
--- a/python/paddle/v2/networks.py
+++ b/python/paddle/v2/networks.py
--- a/python/paddle/v2/op.py
+++ b/python/paddle/v2/op.py
--- a/python/paddle/v2/optimizer.py
+++ b/python/paddle/v2/optimizer.py
--- a/python/paddle/v2/parameters.py
+++ b/python/paddle/v2/parameters.py
--- a/python/paddle/v2/plot/__init__.py
+++ b/python/paddle/v2/plot/__init__.py
--- a/python/paddle/v2/plot/plot.py
+++ b/python/paddle/v2/plot/plot.py
--- a/python/paddle/v2/plot/tests/CMakeLists.txt
+++ b/python/paddle/v2/plot/tests/CMakeLists.txt
--- a/python/paddle/v2/plot/tests/__init__.py
+++ b/python/paddle/v2/plot/tests/__init__.py
--- a/python/paddle/v2/plot/tests/test_ploter.py
+++ b/python/paddle/v2/plot/tests/test_ploter.py
--- a/python/paddle/v2/pooling.py
+++ b/python/paddle/v2/pooling.py
--- a/python/paddle/v2/reader/__init__.py
+++ b/python/paddle/v2/reader/__init__.py
--- a/python/paddle/v2/reader/creator.py
+++ b/python/paddle/v2/reader/creator.py
--- a/python/paddle/v2/reader/decorator.py
+++ b/python/paddle/v2/reader/decorator.py
--- a/python/paddle/v2/reader/tests/CMakeLists.txt
+++ b/python/paddle/v2/reader/tests/CMakeLists.txt
--- a/python/paddle/v2/reader/tests/__init__.py
+++ b/python/paddle/v2/reader/tests/__init__.py
--- a/python/paddle/v2/reader/tests/creator_test.py
+++ b/python/paddle/v2/reader/tests/creator_test.py
--- a/python/paddle/v2/reader/tests/decorator_test.py
+++ b/python/paddle/v2/reader/tests/decorator_test.py
--- a/python/paddle/v2/reader/tests/test_data_creator.txt
+++ b/python/paddle/v2/reader/tests/test_data_creator.txt
--- a/python/paddle/v2/reader/tests/test_reader_recordio.dat
+++ b/python/paddle/v2/reader/tests/test_reader_recordio.dat
--- a/python/paddle/v2/reader/tests/test_recordio_creator.dat
+++ b/python/paddle/v2/reader/tests/test_recordio_creator.dat
--- a/python/paddle/v2/tests/CMakeLists.txt
+++ b/python/paddle/v2/tests/CMakeLists.txt
--- a/python/paddle/v2/tests/cat.jpg
+++ b/python/paddle/v2/tests/cat.jpg
--- a/python/paddle/v2/tests/test_data_feeder.py
+++ b/python/paddle/v2/tests/test_data_feeder.py
--- a/python/paddle/v2/tests/test_image.py
+++ b/python/paddle/v2/tests/test_image.py
--- a/python/paddle/v2/tests/test_layer.py
+++ b/python/paddle/v2/tests/test_layer.py
--- a/python/paddle/v2/tests/test_op.py
+++ b/python/paddle/v2/tests/test_op.py
--- a/python/paddle/v2/tests/test_paramconf_order.py
+++ b/python/paddle/v2/tests/test_paramconf_order.py
--- a/python/paddle/v2/tests/test_parameters.py
+++ b/python/paddle/v2/tests/test_parameters.py
--- a/python/paddle/v2/tests/test_rnn_layer.py
+++ b/python/paddle/v2/tests/test_rnn_layer.py
--- a/python/paddle/v2/tests/test_topology.py
+++ b/python/paddle/v2/tests/test_topology.py
--- a/python/paddle/v2/topology.py
+++ b/python/paddle/v2/topology.py
--- a/python/paddle/v2/trainer.py
+++ b/python/paddle/v2/trainer.py