Merge pull request #446 from QiJune/format_py_code_2nd

format python code in python directory

Merge pull request #446 from QiJune/format_py_code_2nd
format python code in python directory
58e1b3b3 · Yu Yang · GitHub · ef5e483c · a1ba3f44 · 58e1b3b3
54 changed file
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
@@ -11,4 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
--- a/python/paddle/trainer/PyDataProvider2.py
+++ b/python/paddle/trainer/PyDataProvider2.py
@@ -18,8 +18,7 @@ import collections
 import functools
 import itertools
-logging.basicConfig(
+logging.basicConfig(format="[%(levelname)s %(asctime)s %(filename)s:%(lineno)s]"
-    format="[%(levelname)s %(asctime)s %(filename)s:%(lineno)s]"
                    " %(message)s")
@@ -132,8 +131,10 @@ class InputOrderWrapper(object):
    def __call__(self, obj, filename):
        for item in self.generator(obj, filename):
            if isinstance(item, dict):
-                yield [item.get(input_name, None) for input_name in
+                yield [
-                       self.input_order]
+                    item.get(input_name, None)
+                    for input_name in self.input_order
+                ]
            else:
                yield item
@@ -162,8 +163,8 @@ class CheckWrapper(object):
                yield items
            except AssertionError as e:
                self.logger.warning(
-                    "Item (%s) is not fit the input type with error %s"
+                    "Item (%s) is not fit the input type with error %s" %
-                    % (repr(item), repr(e)))
+                    (repr(item), repr(e)))
                if self.check_fail_continue:
                    continue
@@ -202,13 +203,17 @@ class CheckWrapper(object):
            callback(each)
-def provider(input_types=None, should_shuffle=None, pool_size=-1,
+def provider(input_types=None,
+             should_shuffle=None,
+             pool_size=-1,
             min_pool_size=-1,
             can_over_batch_size=True,
             calc_batch_size=None,
             cache=CacheType.NO_CACHE,
-             check=False, check_fail_continue=False,
+             check=False,
-             init_hook=None, **kwargs):
+             check_fail_continue=False,
+             init_hook=None,
+             **kwargs):
    """
    Provider decorator. Use it to make a function into PyDataProvider2 object.
    In this function, user only need to get each sample for some train/test
@@ -318,8 +323,8 @@ def provider(input_types=None, should_shuffle=None, pool_size=-1,
                            "Could not recognize should_shuffle (%s), "
                            "just use default value of should_shuffle."
                            " Please set should_shuffle to bool value or "
-                            "something in %s" % (
+                            "something in %s" %
-                                repr(self.should_shuffle),
+                            (repr(self.should_shuffle),
                             repr(true_table + false_table)))
                        self.should_shuffle = None
@@ -351,8 +356,7 @@ def provider(input_types=None, should_shuffle=None, pool_size=-1,
                    self.generator = InputOrderWrapper(self.generator,
                                                       self.input_order)
                if self.check:
-                    self.generator = CheckWrapper(self.generator,
+                    self.generator = CheckWrapper(self.generator, self.slots,
-                                                  self.slots,
                                                  check_fail_continue,
                                                  self.logger)
@@ -368,4 +372,3 @@ def deserialize_args(args):
    :return:
    """
    return cPickle.loads(args)
--- a/python/paddle/trainer/PyDataProviderWrapper.py
+++ b/python/paddle/trainer/PyDataProviderWrapper.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 This module provide a wrapper(decorator) to wrap a data process method into a
 PyDataProvider. Some examples are shown `here <data_provider/python_case.html>`_.
@@ -47,6 +46,7 @@ except ImportError:
 import io
 class SlotType(object):  # Just a hint for user.
    pass
@@ -83,6 +83,7 @@ class SparseNonValueSlot(SlotType):
    - **SubSeq**: [[[int, int, ...], [int, ....], ...] ,  \
                   [[int, int, ...], [int, ....], ...] , ...]
    """
    def __init__(self, dim):
        """
        :param dim: slot dimension
@@ -294,8 +295,9 @@ class GeneralPyDataProvider:
                fn = "%s_%d" % (self.profile_filename, self.profile_count)
                sortby = "cumulative"
                with open(fn, "w") as f:
-                    pstats.Stats(self.profiler, stream=f).sort_stats(
+                    pstats.Stats(
-                        sortby).print_stats()
+                        self.profiler,
+                        stream=f).sort_stats(sortby).print_stats()
                self.logger.info("saving profile to file %s" % fn)
                self.profile_count += 1
            self.logger.info("resetting profile")
@@ -453,9 +455,10 @@ class GeneralPyDataProvider:
            seq_stream.flush()
            subseq_stream.flush()
-            return "".join([self.int_packer.pack(current_batch_size),
+            return "".join([
-                            data_bytes.getvalue(),
+                self.int_packer.pack(current_batch_size), data_bytes.getvalue(),
-                            seq_bytes.getvalue(), subseq_bytes.getvalue()])
+                seq_bytes.getvalue(), subseq_bytes.getvalue()
+            ])
        finally:
            data_stream.close()
@@ -516,7 +519,7 @@ class GeneralPyDataProvider:
                        self.data_pool[idx])
                    idx -= 1
-                ret_list += self.data_pool[self.data_pool_idx: idx + 1]
+                ret_list += self.data_pool[self.data_pool_idx:idx + 1]
                # for speed reason, just shift left index, not delete data actually.
                self.data_pool_idx = idx + 1
@@ -537,8 +540,8 @@ class GeneralPyDataProvider:
        if self.max_pool_size == 0:
            for i in xrange(min(self.file_count, len(self.generators))):
                self.data_pool += list(self.generators[i])
-            self.generators = self.generators[
+            self.generators = self.generators[min(self.file_count,
-                              min(self.file_count, len(self.generators)):]
+                                                  len(self.generators)):]
            self.max_pool_size = len(self.data_pool)
        else:
            while len(self.data_pool) < self.max_pool_size and len(
@@ -562,9 +565,15 @@ def default_init_hook(cls, *args, **kwargs):
    del cls, args, kwargs
-def provider(slots=None, use_seq=False, should_shuffle=True, pool_size=1,
+def provider(slots=None,
-             can_over_batch_size=True, calc_batch_size=lambda data: 1,
+             use_seq=False,
-             debug=False, init_hook=default_init_hook, profile_filename=None):
+             should_shuffle=True,
+             pool_size=1,
+             can_over_batch_size=True,
+             calc_batch_size=lambda data: 1,
+             debug=False,
+             init_hook=default_init_hook,
+             profile_filename=None):
    """
    The decorator for PyDataProvider. User should use this to create Provider class.
    User should only concern how to read sample from file.

--- a/python/paddle/trainer/__init__.py
+++ b/python/paddle/trainer/__init__.py
@@ -11,4 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
--- a/python/paddle/trainer/config_parser_extension.py
+++ b/python/paddle/trainer/config_parser_extension.py
@@ -17,8 +17,7 @@ from paddle.proto.DataConfig_pb2 import DataConfig
 g_config = None
-def SimpleData(
+def SimpleData(files=None,
-        files=None,
               feat_dim=None,
               context_len=None,
               buffer_capacity=None):
@@ -33,6 +32,7 @@ def SimpleData(
        data_config.buffer_capacity = buffer_capacity
    return data_config
 def get_config_funcs(trainer_config):
    global g_config
    g_config = trainer_config

--- a/python/paddle/trainer/recurrent_units.py
+++ b/python/paddle/trainer/recurrent_units.py
--- a/python/paddle/trainer_config_helpers/activations.py
+++ b/python/paddle/trainer_config_helpers/activations.py
@@ -12,13 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__all__ = ["TanhActivation", "SigmoidActivation",
+__all__ = [
-           "SoftmaxActivation", "IdentityActivation", "LinearActivation",
+    "TanhActivation", "SigmoidActivation", "SoftmaxActivation",
-           'SequenceSoftmaxActivation', 'ExpActivation',
+    "IdentityActivation", "LinearActivation", 'SequenceSoftmaxActivation',
-           "ReluActivation", "BReluActivation", "SoftReluActivation",
+    'ExpActivation', "ReluActivation", "BReluActivation", "SoftReluActivation",
-           "STanhActivation",
+    "STanhActivation", "AbsActivation", "SquareActivation", "BaseActivation"
-           "AbsActivation", "SquareActivation",
+]
-           "BaseActivation"]
 class BaseActivation(object):
@@ -51,7 +50,8 @@ class TanhActivation(BaseActivation):
       f(z)=tanh(z)=\\frac{e^z-e^{-z}}{e^z+e^{-z}}
    """
-    def __init__(self): BaseActivation.__init__(self, 'tanh', True)
+    def __init__(self):
+        BaseActivation.__init__(self, 'tanh', True)
 class SigmoidActivation(BaseActivation):
@@ -63,7 +63,8 @@ class SigmoidActivation(BaseActivation):
       f(z) = \\frac{1}{1+exp(-z)}
    """
-    def __init__(self): BaseActivation.__init__(self, 'sigmoid', True)
+    def __init__(self):
+        BaseActivation.__init__(self, 'sigmoid', True)
 class SoftmaxActivation(BaseActivation):
@@ -104,7 +105,8 @@ class IdentityActivation(BaseActivation):
    Just do nothing for output both forward/backward.
    """
-    def __init__(self): BaseActivation.__init__(self, '', False)
+    def __init__(self):
+        BaseActivation.__init__(self, '', False)
 LinearActivation = IdentityActivation
@@ -124,7 +126,8 @@ class ReluActivation(BaseActivation):
       0  &\\quad\\mathrm{otherwize}
    """
-    def __init__(self): BaseActivation.__init__(self, 'relu', True)
+    def __init__(self):
+        BaseActivation.__init__(self, 'relu', True)
 class BReluActivation(BaseActivation):
@@ -141,7 +144,8 @@ class BReluActivation(BaseActivation):
       0  &\\quad \\mathrm{otherwise}
    """
-    def __init__(self): BaseActivation.__init__(self, 'brelu', False)
+    def __init__(self):
+        BaseActivation.__init__(self, 'brelu', False)
 class SoftReluActivation(BaseActivation):
@@ -149,7 +153,9 @@ class SoftReluActivation(BaseActivation):
    SoftRelu Activation.
    """
-    def __init__(self): BaseActivation.__init__(self, 'softrelu', False)
+    def __init__(self):
+        BaseActivation.__init__(self, 'softrelu', False)
 class STanhActivation(BaseActivation):
    """
@@ -160,7 +166,8 @@ class STanhActivation(BaseActivation):
       f(z) = 1.7159 * tanh(2/3*z)
    """
-    def __init__(self): BaseActivation.__init__(self, 'stanh', False)
+    def __init__(self):
+        BaseActivation.__init__(self, 'stanh', False)
 class AbsActivation(BaseActivation):
@@ -178,7 +185,8 @@ class AbsActivation(BaseActivation):
       0 &\\quad if \\quad z = 0
    """
-    def __init__(self): BaseActivation.__init__(self, 'abs', False)
+    def __init__(self):
+        BaseActivation.__init__(self, 'abs', False)
 class SquareActivation(BaseActivation):
@@ -189,7 +197,9 @@ class SquareActivation(BaseActivation):
       f(z) = z^2.
    """
-    def __init__(self): BaseActivation.__init__(self, 'square', False)
+    def __init__(self):
+        BaseActivation.__init__(self, 'square', False)
 class ExpActivation(BaseActivation):
    """
@@ -198,7 +208,10 @@ class ExpActivation(BaseActivation):
    .. math::
       f(z) = e^z.
    """
-    def __init__(self): BaseActivation.__init__(self, 'exponential', False)
+    def __init__(self):
+        BaseActivation.__init__(self, 'exponential', False)
 class LogActivation(BaseActivation):
    """
@@ -207,4 +220,6 @@ class LogActivation(BaseActivation):
    .. math::
       f(z) = log(z)
    """
-    def __init__(self): BaseActivation.__init__(self, 'log', False)
+    def __init__(self):
+        BaseActivation.__init__(self, 'log', False)
--- a/python/paddle/trainer_config_helpers/attrs.py
+++ b/python/paddle/trainer_config_helpers/attrs.py
@@ -13,8 +13,9 @@
 # limitations under the License.
 from paddle.trainer.config_parser import *
-__all__ = ['ParamAttr', 'ExtraAttr', 'ParameterAttribute',
+__all__ = [
-           'ExtraLayerAttribute']
+    'ParamAttr', 'ExtraAttr', 'ParameterAttribute', 'ExtraLayerAttribute'
+]
 def convert_and_compare(x, Type):
@@ -25,7 +26,8 @@ def convert_and_compare(x, Type):
    :param Type: target type to check x over                                                                                                                                                           
    """
-    return type(x)(Type(x))==x
+    return type(x)(Type(x)) == x
 def is_compatible_with(x, Type):
    """                                                                                                                                                                                                
@@ -91,9 +93,17 @@ class ParameterAttribute(object):
    :type sparse_update: bool
    """
-    def __init__(self, name=None, is_static=False, initial_std=None,
+    def __init__(self,
-                 initial_mean=None, initial_max=None, initial_min=None,
+                 name=None,
-                 l1_rate=None, l2_rate=None, learning_rate=None, momentum=None,
+                 is_static=False,
+                 initial_std=None,
+                 initial_mean=None,
+                 initial_max=None,
+                 initial_min=None,
+                 l1_rate=None,
+                 l2_rate=None,
+                 learning_rate=None,
+                 momentum=None,
                 sparse_update=False):
        # initialize strategy.
        if is_static:
@@ -183,7 +193,10 @@ class ExtraLayerAttribute(object):
    :type device: int
    """
-    def __init__(self, error_clipping_threshold=None, drop_rate=None, device=None):
+    def __init__(self,
+                 error_clipping_threshold=None,
+                 drop_rate=None,
+                 device=None):
        self.attr = dict()
        if isinstance(error_clipping_threshold, float):
            assert error_clipping_threshold > 0
@@ -200,8 +213,8 @@ class ExtraLayerAttribute(object):
        for key in self.attr:
            if not hasattr(self, 'can_%s' % key) or \
                    not getattr(self, 'can_%s' % key):
-                raise NotImplementedError(
+                raise NotImplementedError("Layer %s cannot support %s" %
-                    "Layer %s cannot support %s" % (layer_name, key))
+                                          (layer_name, key))
    @staticmethod
    def to_kwargs(attr):

--- a/python/paddle/trainer_config_helpers/data_sources.py
+++ b/python/paddle/trainer_config_helpers/data_sources.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 Data Sources are helpers to define paddle training data or testing data.
 """
@@ -26,8 +25,12 @@ except ImportError:
 __all__ = ['define_py_data_sources2']
-def define_py_data_source(file_list, cls, module,
+def define_py_data_source(file_list,
-                          obj, args=None, async=False,
+                          cls,
+                          module,
+                          obj,
+                          args=None,
+                          async=False,
                          data_cls=PyData):
    """
    Define a python data source.
@@ -76,6 +79,7 @@ def define_py_data_source(file_list, cls, module,
        args = pickle.dumps(args, 0)
    if data_cls is None:
        def py_data2(files, load_data_module, load_data_object, load_data_args,
                     **kwargs):
            data = DataBase()
@@ -86,17 +90,25 @@ def define_py_data_source(file_list, cls, module,
            data.load_data_args = load_data_args
            data.async_load_data = True
            return data
        data_cls = py_data2
-    cls(data_cls(files=file_list,
+    cls(
+        data_cls(
+            files=file_list,
            load_data_module=module,
            load_data_object=obj,
            load_data_args=args,
            async_load_data=async))
-def define_py_data_sources(train_list, test_list, module, obj, args=None,
+def define_py_data_sources(train_list,
-                           train_async=False, data_cls=PyData):
+                           test_list,
+                           module,
+                           obj,
+                           args=None,
+                           train_async=False,
+                           data_cls=PyData):
    """
    The annotation is almost the same as define_py_data_sources2, except that
    it can specific train_async and data_cls.
@@ -125,8 +137,8 @@ def define_py_data_sources(train_list, test_list, module, obj, args=None,
    """
    def __is_splitable__(o):
-        return (isinstance(o, list) or isinstance(o, tuple)
+        return (isinstance(o, list) or
-                ) and hasattr(o, '__len__') and len(o) == 2
+                isinstance(o, tuple)) and hasattr(o, '__len__') and len(o) == 2
    assert train_list is not None or test_list is not None
    assert module is not None and obj is not None
@@ -196,7 +208,8 @@ def define_py_data_sources2(train_list, test_list, module, obj, args=None):
    :return: None
    :rtype: None
    """
-    define_py_data_sources(train_list=train_list,
+    define_py_data_sources(
+        train_list=train_list,
        test_list=test_list,
        module=module,
        obj=obj,

--- a/python/paddle/trainer_config_helpers/default_decorators.py
+++ b/python/paddle/trainer_config_helpers/default_decorators.py
@@ -18,16 +18,18 @@ from .attrs import ParamAttr
 from .activations import TanhActivation
 from paddle.trainer.config_parser import *
-__all__ = ['wrap_name_default', 'wrap_param_attr_default',
+__all__ = [
-           'wrap_bias_attr_default', 'wrap_act_default',
+    'wrap_name_default', 'wrap_param_attr_default', 'wrap_bias_attr_default',
-           'wrap_param_default']
+    'wrap_act_default', 'wrap_param_default'
+]
 def __default_not_set_callback__(kwargs, name):
    return name not in kwargs or kwargs[name] is None
-def wrap_param_default(param_names=None, default_factory=None,
+def wrap_param_default(param_names=None,
+                       default_factory=None,
                       not_set_callback=__default_not_set_callback__):
    assert param_names is not None
    assert isinstance(param_names, list) or isinstance(param_names, tuple)
@@ -43,7 +45,8 @@ def wrap_param_default(param_names=None, default_factory=None,
                if argspec.defaults:
                    num_positional -= len(argspec.defaults)
                if not argspec.varargs and len(args) > num_positional:
-                    logger.fatal("Must use keyword arguments for non-positional args")
+                    logger.fatal(
+                        "Must use keyword arguments for non-positional args")
            for name in param_names:
                if not_set_callback(kwargs, name):  # Not set
                    kwargs[name] = default_factory(func)
@@ -112,13 +115,13 @@ def wrap_param_attr_default(param_names=None, default_factory=None):
    return wrap_param_default(param_names, default_factory)
-def wrap_bias_attr_default(param_names=None, default_factory=None,
+def wrap_bias_attr_default(param_names=None,
+                           default_factory=None,
                           has_bias=True):
    if param_names is None:
        param_names = ['bias_attr']
    if default_factory is None:
-        default_factory = lambda _: ParamAttr(initial_std=0.,
+        default_factory = lambda _: ParamAttr(initial_std=0., initial_mean=0.)
-                                              initial_mean=0.)
    def __bias_attr_not_set__(kwargs, name):
        if has_bias:

--- a/python/paddle/trainer_config_helpers/evaluators.py
+++ b/python/paddle/trainer_config_helpers/evaluators.py
@@ -15,13 +15,14 @@
 from paddle.trainer.config_parser import *
 from default_decorators import *
-__all__ = ["evaluator_base","classification_error_evaluator", "auc_evaluator",
+__all__ = [
-           "pnpair_evaluator", "precision_recall_evaluator",
+    "evaluator_base", "classification_error_evaluator", "auc_evaluator",
-           "ctc_error_evaluator", "chunk_evaluator", "sum_evaluator",
+    "pnpair_evaluator", "precision_recall_evaluator", "ctc_error_evaluator",
-           "column_sum_evaluator", "value_printer_evaluator",
+    "chunk_evaluator", "sum_evaluator", "column_sum_evaluator",
-           "gradient_printer_evaluator", "maxid_printer_evaluator",
+    "value_printer_evaluator", "gradient_printer_evaluator",
-           "maxframe_printer_evaluator", "seqtext_printer_evaluator",
+    "maxid_printer_evaluator", "maxframe_printer_evaluator",
-           "classification_error_printer_evaluator"]
+    "seqtext_printer_evaluator", "classification_error_printer_evaluator"
+]
 class EvaluatorAttribute(object):
@@ -32,10 +33,7 @@ class EvaluatorAttribute(object):
    FOR_UTILS = 1 << 4
    KEYS = [
-        "for_classification",
+        "for_classification", "for_regression", "for_rank", "for_print",
-        "for_regression",
-        "for_rank",
-        "for_print",
        "for_utils"
    ]
@@ -55,10 +53,11 @@ def evaluator(*attrs):
            setattr(method, EvaluatorAttribute.to_key(attr), True)
        method.is_evaluator = True
        return method
    return impl
-def evaluator_base(
-        input,
+def evaluator_base(input,
                   type,
                   label=None,
                   weight=None,
@@ -130,10 +129,10 @@ def evaluator_base(
        result_file=result_file,
        delimited=delimited)
 @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
 @wrap_name_default()
-def classification_error_evaluator(
+def classification_error_evaluator(input,
-        input,
                                   label,
                                   name=None,
                                   weight=None,
@@ -170,13 +169,14 @@ def classification_error_evaluator(
    :return: None.
    """
-    evaluator_base(name=name,
+    evaluator_base(
+        name=name,
        type="classification_error",
        input=input,
        label=label,
        weight=weight,
-                   classification_threshold=threshold,
+        classification_threshold=threshold, )
-                   )
 @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
 @wrap_name_default()
@@ -184,8 +184,7 @@ def auc_evaluator(
        input,
        label,
        name=None,
-        weight=None,
+        weight=None, ):
-        ):
    """
    Auc Evaluator which adapts to binary classification.
@@ -205,12 +204,14 @@ def auc_evaluator(
                  [sample_num, 1].
    :type weight: LayerOutput
    """
-    evaluator_base(name=name,
+    evaluator_base(
+        name=name,
        type="last-column-auc",
        input=input,
        label=label,
        weight=weight)
 @evaluator(EvaluatorAttribute.FOR_RANK)
 @wrap_name_default()
 def pnpair_evaluator(
@@ -218,8 +219,7 @@ def pnpair_evaluator(
        label,
        info,
        name=None,
-        weight=None,
+        weight=None, ):
-        ):
    """
    Positive-negative pair rate Evaluator which adapts to rank task like
    learning to rank. This evaluator must contain at least three layers.
@@ -242,13 +242,15 @@ def pnpair_evaluator(
                  [sample_num, 1]. (TODO, explaination)
    :type weight: LayerOutput
    """
-    evaluator_base(name=name,
+    evaluator_base(
+        name=name,
        type="pnpair",
        input=input,
        label=label,
        info=info,
        weight=weight)
 @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
 @wrap_name_default()
 def precision_recall_evaluator(
@@ -256,8 +258,7 @@ def precision_recall_evaluator(
        label,
        positive_label=None,
        weight=None,
-        name=None,
+        name=None, ):
-        ):
    """
    An Evaluator to calculate precision and recall, F1-score.
    It is adapt to the task with multiple labels.
@@ -286,20 +287,21 @@ def precision_recall_evaluator(
                  [sample_num, 1]. (TODO, explaination)
    :type weight: LayerOutput
    """
-    evaluator_base(name=name,
+    evaluator_base(
+        name=name,
        type="precision_recall",
        input=input,
        label=label,
        positive_label=positive_label,
        weight=weight)
 @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
 @wrap_name_default()
 def ctc_error_evaluator(
        input,
        label,
-        name=None,
+        name=None, ):
-        ):
    """
    This evaluator is to calculate sequence-to-sequence edit distance.
@@ -317,10 +319,9 @@ def ctc_error_evaluator(
                  label for ctc_layer
    :type label: LayerOutput
    """
-    evaluator_base(name=name,
+    evaluator_base(
-                   type="ctc_edit_distance",
+        name=name, type="ctc_edit_distance", input=input, label=label)
-                   input=input,
-                   label=label)
 @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
 @wrap_name_default()
@@ -328,8 +329,7 @@ def chunk_evaluator(
        input,
        name=None,
        chunk_scheme=None,
-        num_chunk_types=None,
+        num_chunk_types=None, ):
-        ):
    """
    Chunk evaluator is used to evaluate segment labelling accuracy for a
    sequence. It calculates the chunk detection F1 score.
@@ -375,19 +375,20 @@ def chunk_evaluator(
    :type chunk_scheme: basestring
    :param num_chunk_types: number of chunk types other than "other"
    """
-    evaluator_base(name=name,
+    evaluator_base(
+        name=name,
        type="chunk",
        input=input,
        chunk_scheme=chunk_scheme,
        num_chunk_types=num_chunk_types)
 @evaluator(EvaluatorAttribute.FOR_UTILS)
 @wrap_name_default()
 def sum_evaluator(
        input,
        name=None,
-        weight=None,
+        weight=None, ):
-        ):
    """
    An Evaluator to sum the result of input.
@@ -405,18 +406,15 @@ def sum_evaluator(
                  [sample_num, 1]. (TODO, explaination)
    :type weight: LayerOutput
    """
-    evaluator_base(name=name,
+    evaluator_base(name=name, type="sum", input=input, weight=weight)
-                   type="sum",
-                   input=input,
-                   weight=weight)
 @evaluator(EvaluatorAttribute.FOR_UTILS)
 @wrap_name_default()
 def column_sum_evaluator(
        input,
        name=None,
-        weight=None,
+        weight=None, ):
-        ):
    """
    This Evaluator is used to sum the last column of input.
@@ -431,22 +429,22 @@ def column_sum_evaluator(
    :param input: Input Layer name.
    :type input: LayerOutput
    """
-    evaluator_base(name=name,
+    evaluator_base(
-                   type="last-column-sum",
+        name=name, type="last-column-sum", input=input, weight=weight)
-                   input=input,
-                   weight=weight)
 """
 The following are printer Evaluators which are usually used to
 print the result, like value or gradient of input layers, the
 results generated in machine translation, the classification error etc.
 """
 @evaluator(EvaluatorAttribute.FOR_PRINT)
 @wrap_name_default()
 def value_printer_evaluator(
        input,
-        name=None,
+        name=None, ):
-        ):
    """
    This Evaluator is used to print the values of input layers. It contains
    one or more input layers.
@@ -462,16 +460,14 @@ def value_printer_evaluator(
    :param name: Evaluator name.
    :type name: None|basestring
    """
-    evaluator_base(name=name,
+    evaluator_base(name=name, type="value_printer", input=input)
-                   type="value_printer",
-                   input=input)
 @evaluator(EvaluatorAttribute.FOR_PRINT)
 @wrap_name_default()
 def gradient_printer_evaluator(
        input,
-        name=None,
+        name=None, ):
-        ):
    """
    This Evaluator is used to print the gradient of input layers. It contains
    one or more input layers.
@@ -487,17 +483,15 @@ def gradient_printer_evaluator(
    :param name: Evaluator name.
    :type name: None|basestring
    """
-    evaluator_base(name=name,
+    evaluator_base(name=name, type="gradient_printer", input=input)
-                   type="gradient_printer",
-                   input=input)
 @evaluator(EvaluatorAttribute.FOR_PRINT)
 @wrap_name_default()
 def maxid_printer_evaluator(
        input,
        num_results=None,
-        name=None,
+        name=None, ):
-        ):
    """
    This Evaluator is used to print maximum top k values and their indexes
    of each row of input layers. It contains one or more input layers.
@@ -517,18 +511,16 @@ def maxid_printer_evaluator(
    :param name: Evaluator name.
    :type name: None|basestring
    """
-    evaluator_base(name=name,
+    evaluator_base(
-                   type="max_id_printer",
+        name=name, type="max_id_printer", input=input, num_results=num_results)
-                   input=input,
-                   num_results=num_results)
 @evaluator(EvaluatorAttribute.FOR_PRINT)
 @wrap_name_default()
 def maxframe_printer_evaluator(
        input,
        num_results=None,
-        name=None,
+        name=None, ):
-        ):
    """
    This Evaluator is used to print the top k frames of each input layers.
    The input layers should contain sequences info or sequences type.
@@ -549,11 +541,13 @@ def maxframe_printer_evaluator(
    :param name: Evaluator name.
    :type name: None|basestring
    """
-    evaluator_base(name=name,
+    evaluator_base(
+        name=name,
        type="max_frame_printer",
        input=input,
        num_results=num_results)
 @evaluator(EvaluatorAttribute.FOR_PRINT)
 @wrap_name_default()
 def seqtext_printer_evaluator(
@@ -562,8 +556,7 @@ def seqtext_printer_evaluator(
        id_input=None,
        dict_file=None,
        delimited=None,
-        name=None,
+        name=None, ):
-        ):
    """
    Sequence text printer will print text according to index matrix and a
    dictionary. There can be multiple input to this layer:
@@ -636,21 +629,22 @@ def seqtext_printer_evaluator(
        inputs = [id_input, input]
        input.parents.append(id_input)
-    evaluator_base(name=name,
+    evaluator_base(
+        name=name,
        type="seq_text_printer",
        input=inputs,
        dict_file=dict_file,
        result_file=result_file,
        delimited=delimited)
 @evaluator(EvaluatorAttribute.FOR_PRINT)
 @wrap_name_default()
 def classification_error_printer_evaluator(
        input,
        label,
        threshold=0.5,
-        name=None,
+        name=None, ):
-        ):
    """
    This Evaluator is used to print the classification error of each sample.
@@ -667,7 +661,8 @@ def classification_error_printer_evaluator(
    :param name: Evaluator name.
    :type name: None|basestring
    """
-    evaluator_base(name=name,
+    evaluator_base(
+        name=name,
        type="classification_error_printer",
        input=input,
        label=label,

--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
--- a/python/paddle/trainer_config_helpers/math.py
+++ b/python/paddle/trainer_config_helpers/math.py
@@ -21,16 +21,18 @@ from paddle.trainer.config_parser import logger
 __all__ = []
 def register_unary_math_op(op_name, act):
    def op(input, name=None):
-        return mixed_layer(input=[identity_projection(input=input)],
+        return mixed_layer(
-                           name=name,
+            input=[identity_projection(input=input)], name=name, act=act)
-                           act=act)
    op = wrap_name_default(op_name)(op)
    op.__doc__ = type(act).__doc__
    globals()[op_name] = op
    __all__.append(op_name)
 register_unary_math_op('exp', act.ExpActivation())
 register_unary_math_op('log', act.LogActivation())
 register_unary_math_op('abs', act.AbsActivation())
@@ -38,6 +40,7 @@ register_unary_math_op('sigmoid', act.SigmoidActivation())
 register_unary_math_op('tanh', act.TanhActivation())
 register_unary_math_op('square', act.SquareActivation())
 def add(layeroutput, other):
    if is_compatible_with(other, float):
        return slope_intercept_layer(input=layeroutput, intercept=other)
@@ -45,8 +48,10 @@ def add(layeroutput, other):
        logger.fatal("LayerOutput can only be added with"
                     " another LayerOutput or a number")
    if layeroutput.size == other.size:
-        return mixed_layer(input=[identity_projection(input=layeroutput),
+        return mixed_layer(input=[
-                                  identity_projection(input=other)])
+            identity_projection(input=layeroutput),
+            identity_projection(input=other)
+        ])
    if other.size != 1 and layeroutput.size != 1:
        logger.fatal("Two LayerOutput can be added only if they have equal size"
                     " or one of their sizes is 1. sizes are %s and %s" %
@@ -56,12 +61,15 @@ def add(layeroutput, other):
        layeroutput = other
        other = tmp
    other = repeat_layer(other, layeroutput.size)
-    return mixed_layer(input=[identity_projection(input=layeroutput),
+    return mixed_layer(input=[
-                              identity_projection(input=other)])
+        identity_projection(input=layeroutput), identity_projection(input=other)
+    ])
 LayerOutput.__radd__ = add
 LayerOutput.__add__ = add
 def sub(layeroutput, other):
    if is_compatible_with(other, float):
        return slope_intercept_layer(input=layeroutput, intercept=other)
@@ -71,14 +79,18 @@ def sub(layeroutput, other):
    neg = slope_intercept_layer(input=other, slope=-1.0)
    return add(layeroutput, neg)
 LayerOutput.__sub__ = sub
 def rsub(layeroutput, other):
    neg = slope_intercept_layer(input=layeroutput, slope=-1.0)
    return add(neg, other)
 LayerOutput.__rsub__ = rsub
 def mul(layeroutput, other):
    if is_compatible_with(other, float):
        return slope_intercept_layer(input=layeroutput, slope=other)
@@ -93,5 +105,6 @@ def mul(layeroutput, other):
        logger.fatal("At least one of the operand of '*' must be a number"
                     " or a LayerOutput with size=1")
 LayerOutput.__mul__ = mul
 LayerOutput.__rmul__ = mul
--- a/python/paddle/trainer_config_helpers/networks.py
+++ b/python/paddle/trainer_config_helpers/networks.py
--- a/python/paddle/trainer_config_helpers/optimizers.py
+++ b/python/paddle/trainer_config_helpers/optimizers.py
@@ -17,11 +17,12 @@ from paddle.trainer.config_parser import Settings, default_decay_rate, \
 from .default_decorators import wrap_param_default
-__all__ = ['Optimizer', 'BaseSGDOptimizer', 'MomentumOptimizer',
+__all__ = [
-           'AdamaxOptimizer', 'AdamOptimizer', 'AdaGradOptimizer',
+    'Optimizer', 'BaseSGDOptimizer', 'MomentumOptimizer', 'AdamaxOptimizer',
-           'RMSPropOptimizer', 'DecayedAdaGradOptimizer',
+    'AdamOptimizer', 'AdaGradOptimizer', 'RMSPropOptimizer',
-           'AdaDeltaOptimizer', 'BaseRegularization', 'L2Regularization',
+    'DecayedAdaGradOptimizer', 'AdaDeltaOptimizer', 'BaseRegularization',
-           'settings', 'ModelAverage']
+    'L2Regularization', 'settings', 'ModelAverage'
+]
 class Optimizer(object):
@@ -90,18 +91,15 @@ class MomentumOptimizer(BaseSGDOptimizer):
    :param sparse: with sparse support or not.
    :type sparse: bool
    """
    def extra_settings(self):
        default_momentum(self.momentum)
    def to_setting_kwargs(self):
        if self.sparse:
-            return {
+            return {'learning_method': 'sparse_momentum'}
-                'learning_method': 'sparse_momentum'
-            }
        else:
-            return {
+            return {'learning_method': 'momentum'}
-                'learning_method': 'momentum'
-            }
    def __init__(self, momentum=None, sparse=False):
        self.momentum = momentum
@@ -197,9 +195,7 @@ class AdaGradOptimizer(BaseSGDOptimizer):
    """
    def to_setting_kwargs(self):
-        return {
+        return {'learning_method': 'adagrad'}
-            'learning_method': 'adagrad'
-        }
    def __init__(self):
        pass
@@ -311,9 +307,7 @@ class L2Regularization(BaseRegularization):
    def to_setting_kwargs(self):
        if self.algorithm == 'owlqn':
-            return {
+            return {'l2weight': self.decay_rate}
-                'l2weight': self.decay_rate
-            }
        else:
            return dict()
@@ -330,7 +324,8 @@ class ModelAverage(Optimizer):
            'do_average_in_cpu': self.do_average_in_cpu
        }
-    def __init__(self, average_window,
+    def __init__(self,
+                 average_window,
                 max_average_window=None,
                 do_average_in_cpu=False):
        self.average_window = average_window
@@ -356,10 +351,10 @@ def __extends__(dict1, dict2):
    return dict1
-@wrap_param_default(['learning_method'],
+@wrap_param_default(
-                    default_factory=lambda _: MomentumOptimizer())
+    ['learning_method'], default_factory=lambda _: MomentumOptimizer())
-@wrap_param_default(['regularization'],
+@wrap_param_default(
-                    default_factory=lambda _: BaseRegularization())
+    ['regularization'], default_factory=lambda _: BaseRegularization())
 def settings(batch_size,
             learning_rate=1e-3,
             learning_rate_decay_a=0.,
@@ -373,8 +368,7 @@ def settings(batch_size,
             regularization=None,
             is_async=False,
             model_average=None,
-             gradient_clipping_threshold=None
+             gradient_clipping_threshold=None):
-             ):
    """
    Set the optimization method, learning rate, batch size, and other training
    settings. The currently supported algorithms are SGD and Async-SGD.
@@ -415,10 +409,11 @@ def settings(batch_size,
    else:
        algorithm = 'owlqn'
-    args=['batch_size', 'learning_rate', 'learning_rate_decay_a',
+    args = [
-          'learning_rate_decay_b', 'learning_rate_schedule',
+        'batch_size', 'learning_rate', 'learning_rate_decay_a',
-          'learning_rate_args', 'average_window', 'do_average_in_cpu',
+        'learning_rate_decay_b', 'learning_rate_schedule', 'learning_rate_args',
-          'max_average_window']
+        'average_window', 'do_average_in_cpu', 'max_average_window'
+    ]
    kwargs = dict()
    kwargs['algorithm'] = algorithm
    for arg in args:

--- a/python/paddle/trainer_config_helpers/poolings.py
+++ b/python/paddle/trainer_config_helpers/poolings.py
@@ -11,18 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 """
 __all__ = [
-    "BasePoolingType",
+    "BasePoolingType", "MaxPooling", "AvgPooling", "CudnnMaxPooling",
-    "MaxPooling",
+    "CudnnAvgPooling", "SumPooling", "SquareRootNPooling"
-    "AvgPooling",
-    "CudnnMaxPooling",
-    "CudnnAvgPooling",
-    "SumPooling",
-    "SquareRootNPooling"
 ]
@@ -36,6 +30,7 @@ class BasePoolingType(object):
    :type name: basestring
    """
    def __init__(self, name):
        self.name = name
@@ -54,6 +49,7 @@ class MaxPooling(BasePoolingType):
                             value. None means use default value in proto.
    :type output_max_index: bool|None
    """
    def __init__(self, output_max_index=None):
        BasePoolingType.__init__(self, "max")
        self.output_max_index = output_max_index
@@ -64,6 +60,7 @@ class CudnnMaxPooling(BasePoolingType):
    Cudnn max pooling only support GPU. Return the maxinum value in the
    pooling window.
    """
    def __init__(self):
        BasePoolingType.__init__(self, "cudnn-max-pool")
@@ -73,9 +70,11 @@ class CudnnAvgPooling(BasePoolingType):
    Cudnn average pooling only support GPU. Return the average value in the
    pooling window.
    """
    def __init__(self):
        BasePoolingType.__init__(self, "cudnn-avg-pool")
 class AvgPooling(BasePoolingType):
    """
    Average pooling.
@@ -105,7 +104,9 @@ class SumPooling(AvgPooling):
        sum(samples\\_of\\_a\\_sequence)
    """
-    def __init__(self): AvgPooling.__init__(self, AvgPooling.STRATEGY_SUM)
+    def __init__(self):
+        AvgPooling.__init__(self, AvgPooling.STRATEGY_SUM)
 class SquareRootNPooling(AvgPooling):
@@ -118,4 +119,6 @@ class SquareRootNPooling(AvgPooling):
        sum(samples\\_of\\_a\\_sequence)/sqrt(sample\\_num)
    """
-    def __init__(self): AvgPooling.__init__(self, AvgPooling.STRATEGY_SQROOTN)
+    def __init__(self):
+        AvgPooling.__init__(self, AvgPooling.STRATEGY_SQROOTN)
--- a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
 from paddle.trainer_config_helpers import *
-settings(
+settings(learning_rate=1e-3, batch_size=1000)
-    learning_rate=1e-3,
-    batch_size=1000
-)
-img = data_layer(name='image', size=256*256)
+img = data_layer(name='image', size=256 * 256)
 # the parse_conv in config_parse.py is not strictly accurate when filter_size
 # is not square. So here set square filter_size.
-img_conv = img_conv_layer(input=img, num_channels=1, num_filters=64,
+img_conv = img_conv_layer(
-                          filter_size=(32, 32), padding=(1, 1), stride=(1, 1),
+    input=img,
+    num_channels=1,
+    num_filters=64,
+    filter_size=(32, 32),
+    padding=(1, 1),
+    stride=(1, 1),
    act=LinearActivation())
 img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
@@ -18,5 +20,4 @@ img_norm = img_cmrnorm_layer(input=img_bn, size=32)
 img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling())
 outputs(img_pool, img_norm)
--- a/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
 from paddle.trainer_config_helpers import *
-settings(
+settings(learning_rate=1e-3, batch_size=1000)
-    learning_rate=1e-3,
-    batch_size=1000
-)
-img = data_layer(name='image', size=227*227)
+img = data_layer(name='image', size=227 * 227)
 # the parse_conv in config_parse.py is not strictly accurate when filter_size
 # is not square. So here set square filter_size.
-img_conv = img_conv_layer(input=img, num_channels=1, num_filters=64,
+img_conv = img_conv_layer(
-                          filter_size=(32, 32), padding=(1, 1), stride=(1, 1),
+    input=img,
-                          act=LinearActivation(), trans=True)
+    num_channels=1,
+    num_filters=64,
+    filter_size=(32, 32),
+    padding=(1, 1),
+    stride=(1, 1),
+    act=LinearActivation(),
+    trans=True)
 img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
 img_norm = img_cmrnorm_layer(input=img_bn, size=32)
 img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling())
 outputs(img_pool, img_norm)
--- a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
 from paddle.trainer_config_helpers import *
-settings(
+settings(batch_size=1000, learning_rate=1e-5)
-    batch_size=1000,
-    learning_rate=1e-5
-)
 din = data_layer(name='data', size=30)
-seq_op = [
+seq_op = [first_seq, last_seq]
-    first_seq,
-    last_seq
-]
-agg_level = [
+agg_level = [AggregateLevel.EACH_SEQUENCE, AggregateLevel.EACH_TIMESTEP]
-    AggregateLevel.EACH_SEQUENCE,
-    AggregateLevel.EACH_TIMESTEP
-]
 opts = []

--- a/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
@@ -4,18 +4,18 @@ Test all activations.
 from paddle.trainer_config_helpers import *
-settings(
+settings(learning_rate=1e-4, batch_size=1000)
-    learning_rate=1e-4,
-    batch_size=1000
-)
 din = data_layer(name='input', size=100)
 acts = [
    TanhActivation, SigmoidActivation, SoftmaxActivation, IdentityActivation,
    LinearActivation, ExpActivation, ReluActivation, BReluActivation,
-    SoftReluActivation, STanhActivation, AbsActivation, SquareActivation]
+    SoftReluActivation, STanhActivation, AbsActivation, SquareActivation
+]
-outputs(
+outputs([
-    [fc_layer(input=din, size=100, act=act(), name="layer_%d" % i) for i, act in
+    fc_layer(
-     enumerate(acts)])
+        input=din, size=100, act=act(), name="layer_%d" % i)
+    for i, act in enumerate(acts)
+])
--- a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
 from paddle.trainer_config_helpers import *
 from paddle.trainer_config_helpers import math
-settings(
+settings(batch_size=1000, learning_rate=1e-5)
-    batch_size=1000,
-    learning_rate=1e-5
-)
 x = data_layer(name='data', size=100)
 x = math.exp(x)
@@ -21,10 +18,9 @@ y = y - 2
 y = 2 - y
 y = 2 * y
 y = y * 3
-z= data_layer(name='data_2', size=1)
+z = data_layer(name='data_2', size=1)
 y = y * z
 y = z * y
 y = y + z
 y = z + y
 outputs(y)
--- a/python/paddle/trainer_config_helpers/tests/configs/projections.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/projections.py
@@ -3,10 +3,7 @@ Test mixed layer, projections and operators.
 '''
 from paddle.trainer_config_helpers import *
-settings(
+settings(batch_size=1000, learning_rate=1e-4)
-    batch_size=1000,
-    learning_rate=1e-4
-)
 din = data_layer(name='test', size=100)
@@ -30,18 +27,20 @@ with mixed_layer() as m5:
 with mixed_layer() as m6:
    m6 += dotmul_operator(a=m3, b=m4)
-img = data_layer(name='img', size=32*32)
+img = data_layer(name='img', size=32 * 32)
-flt = data_layer(name='filter', size=3*3*1*64)
+flt = data_layer(name='filter', size=3 * 3 * 1 * 64)
 with mixed_layer() as m7:
-    m7 += conv_operator(img=img, filter=flt, num_filters=64,
+    m7 += conv_operator(
-                        num_channels=1, filter_size=3)
+        img=img, filter=flt, num_filters=64, num_channels=1, filter_size=3)
-end = mixed_layer(input=[full_matrix_projection(input=m5),
+end = mixed_layer(
-                         trans_full_matrix_projection(input=m6),
+    input=[
-                         full_matrix_projection(input=m7)],
+        full_matrix_projection(input=m5),
+        trans_full_matrix_projection(input=m6), full_matrix_projection(input=m7)
+    ],
    size=100,
-                  layer_attr=ExtraAttr(drop_rate=0.5,
+    layer_attr=ExtraAttr(
-                                       error_clipping_threshold=40))
+        drop_rate=0.5, error_clipping_threshold=40))
 outputs(end)
--- a/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py
 from paddle.trainer_config_helpers import *
-settings(
+settings(learning_rate=1e-4, batch_size=1000)
-    learning_rate=1e-4,
-    batch_size=1000
-)
 a = data_layer(name='feature_a', size=200)
 b = data_layer(name='feature_b', size=200)
@@ -11,12 +8,22 @@ b = data_layer(name='feature_b', size=200)
 fc_param = ParamAttr(name='fc_param', initial_max=1.0, initial_min=-1.0)
 bias_param = ParamAttr(name='bias_param', initial_mean=0.0, initial_std=0.0)
-softmax_param = ParamAttr(name='softmax_param', initial_max=1.0, initial_min=-1.0)
+softmax_param = ParamAttr(
+    name='softmax_param', initial_max=1.0, initial_min=-1.0)
-hidden_a = fc_layer(input=a, size=200, param_attr=fc_param, bias_attr=bias_param)
+hidden_a = fc_layer(
-hidden_b = fc_layer(input=b, size=200, param_attr=fc_param, bias_attr=bias_param)
+    input=a, size=200, param_attr=fc_param, bias_attr=bias_param)
+hidden_b = fc_layer(
+    input=b, size=200, param_attr=fc_param, bias_attr=bias_param)
-predict = fc_layer(input=[hidden_a, hidden_b], param_attr=[softmax_param, softmax_param],
+predict = fc_layer(
-                   bias_attr=False, size=10, act=SoftmaxActivation())
+    input=[hidden_a, hidden_b],
+    param_attr=[softmax_param, softmax_param],
+    bias_attr=False,
+    size=10,
+    act=SoftmaxActivation())
-outputs(classification_cost(input=predict, label=data_layer(name='label', size=10)))
+outputs(
+    classification_cost(
+        input=predict, label=data_layer(
+            name='label', size=10)))
--- a/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
@@ -16,14 +16,26 @@ with mixed_layer(size=400, bias_attr=False) as m2:
 lstm_param = ParamAttr(name='lstm_param')
 lstm_bias = ParamAttr(name='lstm_bias', initial_mean=0., initial_std=0.)
-lstm1 = lstmemory_group(input=m1, param_attr=lstm_param, lstm_bias_attr=lstm_bias, mixed_bias_attr=False)
+lstm1 = lstmemory_group(
-lstm2 = lstmemory_group(input=m2, param_attr=lstm_param, lstm_bias_attr=lstm_bias, mixed_bias_attr=False)
+    input=m1,
+    param_attr=lstm_param,
+    lstm_bias_attr=lstm_bias,
+    mixed_bias_attr=False)
+lstm2 = lstmemory_group(
+    input=m2,
+    param_attr=lstm_param,
+    lstm_bias_attr=lstm_bias,
+    mixed_bias_attr=False)
 softmax_param = ParamAttr(name='softmax_param')
-predict = fc_layer(input=[last_seq(input=lstm1), last_seq(input=lstm2)],
+predict = fc_layer(
+    input=[last_seq(input=lstm1), last_seq(input=lstm2)],
    size=10,
    param_attr=[softmax_param, softmax_param],
    bias_attr=False,
    act=SoftmaxActivation())
-outputs(classification_cost(input=predict, label=data_layer(name='label', size=10)))
+outputs(
+    classification_cost(
+        input=predict, label=data_layer(
+            name='label', size=10)))
--- a/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
 from paddle.trainer_config_helpers import *
-settings(
+settings(batch_size=1000, learning_rate=1e-4)
-    batch_size=1000,
-    learning_rate=1e-4
-)
 din = data_layer(name='data', size=200)
@@ -13,24 +10,28 @@ rnn = recurrent_layer(input=hidden, act=SigmoidActivation())
 rnn2 = recurrent_layer(input=hidden, act=SigmoidActivation(), reverse=True)
-lstm1_param = fc_layer(input=hidden, size=200*4, act=LinearActivation(),
+lstm1_param = fc_layer(
-                       bias_attr=False)
+    input=hidden, size=200 * 4, act=LinearActivation(), bias_attr=False)
 lstm1 = lstmemory(input=lstm1_param, act=SigmoidActivation())
-lstm2_param = fc_layer(input=hidden, size=200*4, act=LinearActivation(),
+lstm2_param = fc_layer(
-                       bias_attr=False)
+    input=hidden, size=200 * 4, act=LinearActivation(), bias_attr=False)
 lstm2 = lstmemory(input=lstm2_param, act=SigmoidActivation(), reverse=True)
-gru1_param = fc_layer(input=hidden, size=200*3, act=LinearActivation(),
+gru1_param = fc_layer(
-                      bias_attr=False)
+    input=hidden, size=200 * 3, act=LinearActivation(), bias_attr=False)
 gru1 = grumemory(input=gru1_param, act=SigmoidActivation())
-gru2_param = fc_layer(input=hidden, size=200*3, act=LinearActivation(),
+gru2_param = fc_layer(
-                      bias_attr=False)
+    input=hidden, size=200 * 3, act=LinearActivation(), bias_attr=False)
 gru2 = grumemory(input=gru2_param, act=SigmoidActivation(), reverse=True)
-outputs(last_seq(input=rnn), first_seq(input=rnn2),
+outputs(
-        last_seq(input=lstm1), first_seq(input=lstm2),
+    last_seq(input=rnn),
-        last_seq(input=gru1), first_seq(gru2))
+    first_seq(input=rnn2),
+    last_seq(input=lstm1),
+    first_seq(input=lstm2),
+    last_seq(input=gru1),
+    first_seq(gru2))
--- a/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py
 from paddle.trainer_config_helpers import *
-settings(
+settings(batch_size=1000, learning_rate=1e-4)
-    batch_size=1000,
-    learning_rate=1e-4
-)
 din = data_layer(name='data', size=120)

--- a/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
 from paddle.trainer_config_helpers import *
-settings(
+settings(batch_size=1000, learning_rate=1e-5)
-    batch_size=1000,
-    learning_rate=1e-5
-)
 data = data_layer(name='data', size=2304)
-conv = img_conv_layer(input=data,
+conv = img_conv_layer(
-                      filter_size = 3,
+    input=data,
+    filter_size=3,
    num_channels=1,
    num_filters=16,
    padding=1,
    act=LinearActivation(),
    bias_attr=True)
-bilinear = bilinear_interp_layer(input=conv,
+bilinear = bilinear_interp_layer(input=conv, out_size_x=64, out_size_y=64)
-                                 out_size_x=64,
-                                 out_size_y=64)
-pool = img_pool_layer(input=bilinear,
+pool = img_pool_layer(
+    input=bilinear,
    num_channels=4,
    pool_size=2,
    stride=2,

--- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_fc.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_fc.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py
--- a/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
--- a/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
--- a/python/paddle/trainer_config_helpers/tests/configs/util_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/util_layers.py
--- a/python/paddle/trainer_config_helpers/tests/layers_test_config.py
+++ b/python/paddle/trainer_config_helpers/tests/layers_test_config.py
--- a/python/paddle/trainer_config_helpers/utils.py
+++ b/python/paddle/trainer_config_helpers/utils.py
--- a/python/paddle/utils/image_util.py
+++ b/python/paddle/utils/image_util.py
--- a/python/paddle/utils/make_model_diagram.py
+++ b/python/paddle/utils/make_model_diagram.py
--- a/python/paddle/utils/plotcurve.py
+++ b/python/paddle/utils/plotcurve.py
--- a/python/paddle/utils/predefined_net.py
+++ b/python/paddle/utils/predefined_net.py
--- a/python/paddle/utils/preprocess_img.py
+++ b/python/paddle/utils/preprocess_img.py
--- a/python/paddle/utils/preprocess_util.py
+++ b/python/paddle/utils/preprocess_util.py
--- a/python/paddle/utils/show_pb.py
+++ b/python/paddle/utils/show_pb.py
--- a/python/paddle/utils/torch2paddle.py
+++ b/python/paddle/utils/torch2paddle.py