diff --git a/.travis.yml b/.travis.yml
index cf0cca113471ec81f9428346f16fde28bcfee31a..7de4ec7fc511832998cd0dc053645e52136042b8 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -8,10 +8,13 @@ os:
 env:
   - JOB=DOCS
   - JOB=BUILD_AND_TEST
+  - JOB=PRE_COMMIT
 matrix:
   exclude:
     - os: osx
-      env: JOB=DOCS  # Only generate documentation in linux
+      env: JOB=DOCS  # Only generate documentation in linux.
+    - os: osx
+      env: JOB=PRE_COMMIT # Only check pre-commit hook in linux
 
 addons:
   apt:
@@ -39,18 +42,23 @@ addons:
       - lcov
       - graphviz
       - swig
+      - clang-format-3.8
 before_install:
   - |
     if [ ${JOB} == "BUILD_AND_TEST" ]; then
-      if ! git diff --name-only $TRAVIS_COMMIT_RANGE | grep -qvE '(\.md$)|(\.rst$)|(\.jpg$)|(\.png$)'
-      then
-        echo "Only markdown docs were updated, stopping build process."
-        exit
+      local change_list=`git diff --name-only $TRAVIS_COMMIT_RANGE`
+      if [ $? -eq 0 ]; then  # if git diff return no zero, then rerun unit test.
+        if ! echo ${change_list} | grep -qvE '(\.md$)|(\.rst$)|(\.jpg$)|(\.png$)'
+        then
+          echo "Only markdown docs were updated, stopping build process."
+          exit
+        fi
       fi
     fi
   - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi
   - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
-  - pip install wheel protobuf sphinx recommonmark virtualenv numpy sphinx_rtd_theme
+  - if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
+  - pip install wheel protobuf sphinx recommonmark virtualenv numpy sphinx_rtd_theme pre-commit
 script:
   - paddle/scripts/travis/main.sh
 notifications:
diff --git a/WORKSPACE b/WORKSPACE
index 14699da90523c48d80f8ba5917bc7aa7e29e0152..f4358f0195aed8f0ce1321ae2ef935b887619cea 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -1,10 +1,9 @@
 # External dependency to Google protobuf.
 http_archive(
-    name = "protobuf",
-    url = "http://github.com/google/protobuf/archive/v3.1.0.tar.gz",
-    sha256 = "0a0ae63cbffc274efb573bdde9a253e3f32e458c41261df51c5dbc5ad541e8f7",
-    strip_prefix = "protobuf-3.1.0",
-)
+    name="protobuf",
+    url="http://github.com/google/protobuf/archive/v3.1.0.tar.gz",
+    sha256="0a0ae63cbffc274efb573bdde9a253e3f32e458c41261df51c5dbc5ad541e8f7",
+    strip_prefix="protobuf-3.1.0", )
 
 # External dependency to gtest 1.7.0.  This method comes from
 # https://www.bazel.io/versions/master/docs/tutorial/cpp.html.
diff --git a/benchmark/tensorflow/rnn/run_multi.sh b/benchmark/tensorflow/rnn/run_multi.sh
index f7f52e01e38d304bb3bf8185c53bd0da26014d3a..c2d7dd597e6da54cd5c4cda311fbbd18486b4647 100755
--- a/benchmark/tensorflow/rnn/run_multi.sh
+++ b/benchmark/tensorflow/rnn/run_multi.sh
@@ -25,4 +25,3 @@ test 4 2 256 512
 test 4 2 512 128 
 test 4 2 512 256 
 test 4 2 512 512 
-
diff --git a/demo/gan/README.md b/demo/gan/README.md
index fdc970a07b488c3a4146c9baa76a133a456fc9ab..1908b534b0c1f63904d5503399b961d74ce0037c 100644
--- a/demo/gan/README.md
+++ b/demo/gan/README.md
@@ -10,4 +10,4 @@ Then you can run the command below. The flag -d specifies the training data (cif
 $python gan_trainer.py -d cifar --use_gpu 1
 
 The generated images will be stored in ./cifar_samples/
-The corresponding models will be stored in ./cifar_params/
\ No newline at end of file
+The corresponding models will be stored in ./cifar_params/
diff --git a/demo/gan/data/download_cifar.sh b/demo/gan/data/download_cifar.sh
index 32e73b3d8e50ec845c79e4ce93f220583f364360..ae24ef2b7f2012fb719037d4868bdf0e7f9ce71d 100755
--- a/demo/gan/data/download_cifar.sh
+++ b/demo/gan/data/download_cifar.sh
@@ -15,4 +15,3 @@ set -e
 wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
 tar zxf cifar-10-python.tar.gz
 rm cifar-10-python.tar.gz
-
diff --git a/demo/gan/data/get_mnist_data.sh b/demo/gan/data/get_mnist_data.sh
index d21bf7067135f1f8be486ef0f13fc3ec94ffc4ed..a77c81bf5af9ddb6634ff89460797ca543c5e517 100644
--- a/demo/gan/data/get_mnist_data.sh
+++ b/demo/gan/data/get_mnist_data.sh
@@ -15,5 +15,3 @@ do
         gunzip ${fname}.gz
     fi
 done
-
-
diff --git a/demo/gan/gan_conf.py b/demo/gan/gan_conf.py
index 58ba9dde58bafb90a4bd1d76f5d8138e8948dd3a..86ac2dffe5f4490a88e12d1fa5e8cd9fa61a69f4 100644
--- a/demo/gan/gan_conf.py
+++ b/demo/gan/gan_conf.py
@@ -14,10 +14,9 @@
 from paddle.trainer_config_helpers import *
 
 mode = get_config_arg("mode", str, "generator")
-assert mode in set(["generator",
-                    "discriminator",
-                    "generator_training",
-                    "discriminator_training"])
+assert mode in set([
+    "generator", "discriminator", "generator_training", "discriminator_training"
+])
 
 is_generator_training = mode == "generator_training"
 is_discriminator_training = mode == "discriminator_training"
@@ -38,8 +37,8 @@ sample_dim = 2
 settings(
     batch_size=128,
     learning_rate=1e-4,
-    learning_method=AdamOptimizer(beta1=0.5)
-)
+    learning_method=AdamOptimizer(beta1=0.5))
+
 
 def discriminator(sample):
     """
@@ -50,70 +49,87 @@ def discriminator(sample):
     of the sample is from real data.
     """
     param_attr = ParamAttr(is_static=is_generator_training)
-    bias_attr = ParamAttr(is_static=is_generator_training,
-                          initial_mean=1.0,
-                          initial_std=0)
-
-    hidden = fc_layer(input=sample, name="dis_hidden", size=hidden_dim,
-                    bias_attr=bias_attr,
-                    param_attr=param_attr,
-                    act=ReluActivation())
-
-    hidden2 = fc_layer(input=hidden, name="dis_hidden2", size=hidden_dim,
-                    bias_attr=bias_attr,
-                    param_attr=param_attr,
-                    act=LinearActivation())
-    
-    hidden_bn = batch_norm_layer(hidden2, 
-                     act=ReluActivation(), 
-                     name="dis_hidden_bn", 
-                     bias_attr=bias_attr, 
-                     param_attr=ParamAttr(is_static=is_generator_training,
-                           initial_mean=1.0,
-                           initial_std=0.02),
-                     use_global_stats=False)
-    
-    return fc_layer(input=hidden_bn, name="dis_prob", size=2,
-                    bias_attr=bias_attr,
-                    param_attr=param_attr,
-                    act=SoftmaxActivation())
+    bias_attr = ParamAttr(
+        is_static=is_generator_training, initial_mean=1.0, initial_std=0)
+
+    hidden = fc_layer(
+        input=sample,
+        name="dis_hidden",
+        size=hidden_dim,
+        bias_attr=bias_attr,
+        param_attr=param_attr,
+        act=ReluActivation())
+
+    hidden2 = fc_layer(
+        input=hidden,
+        name="dis_hidden2",
+        size=hidden_dim,
+        bias_attr=bias_attr,
+        param_attr=param_attr,
+        act=LinearActivation())
+
+    hidden_bn = batch_norm_layer(
+        hidden2,
+        act=ReluActivation(),
+        name="dis_hidden_bn",
+        bias_attr=bias_attr,
+        param_attr=ParamAttr(
+            is_static=is_generator_training, initial_mean=1.0,
+            initial_std=0.02),
+        use_global_stats=False)
+
+    return fc_layer(
+        input=hidden_bn,
+        name="dis_prob",
+        size=2,
+        bias_attr=bias_attr,
+        param_attr=param_attr,
+        act=SoftmaxActivation())
+
 
 def generator(noise):
     """
     generator generates a sample given noise
     """
     param_attr = ParamAttr(is_static=is_discriminator_training)
-    bias_attr = ParamAttr(is_static=is_discriminator_training,
-                           initial_mean=1.0,
-                           initial_std=0)
-    
-    hidden = fc_layer(input=noise,
-                    name="gen_layer_hidden",
-                    size=hidden_dim,
-                    bias_attr=bias_attr,
-                    param_attr=param_attr,
-                    act=ReluActivation())
-
-    hidden2 = fc_layer(input=hidden, name="gen_hidden2", size=hidden_dim,
-                    bias_attr=bias_attr,
-                    param_attr=param_attr,
-                    act=LinearActivation())
-    
-    hidden_bn = batch_norm_layer(hidden2, 
-                     act=ReluActivation(), 
-                     name="gen_layer_hidden_bn", 
-                     bias_attr=bias_attr, 
-                     param_attr=ParamAttr(is_static=is_discriminator_training,
-                           initial_mean=1.0,
-                           initial_std=0.02),
-                     use_global_stats=False)
-    
-    return fc_layer(input=hidden_bn,
-                    name="gen_layer1",
-                    size=sample_dim,
-                    bias_attr=bias_attr,
-                    param_attr=param_attr,
-                    act=LinearActivation())
+    bias_attr = ParamAttr(
+        is_static=is_discriminator_training, initial_mean=1.0, initial_std=0)
+
+    hidden = fc_layer(
+        input=noise,
+        name="gen_layer_hidden",
+        size=hidden_dim,
+        bias_attr=bias_attr,
+        param_attr=param_attr,
+        act=ReluActivation())
+
+    hidden2 = fc_layer(
+        input=hidden,
+        name="gen_hidden2",
+        size=hidden_dim,
+        bias_attr=bias_attr,
+        param_attr=param_attr,
+        act=LinearActivation())
+
+    hidden_bn = batch_norm_layer(
+        hidden2,
+        act=ReluActivation(),
+        name="gen_layer_hidden_bn",
+        bias_attr=bias_attr,
+        param_attr=ParamAttr(
+            is_static=is_discriminator_training,
+            initial_mean=1.0,
+            initial_std=0.02),
+        use_global_stats=False)
+
+    return fc_layer(
+        input=hidden_bn,
+        name="gen_layer1",
+        size=sample_dim,
+        bias_attr=bias_attr,
+        param_attr=param_attr,
+        act=LinearActivation())
+
 
 if is_generator_training:
     noise = data_layer(name="noise", size=noise_dim)
@@ -126,7 +142,8 @@ if is_generator_training or is_discriminator_training:
     label = data_layer(name="label", size=1)
     prob = discriminator(sample)
     cost = cross_entropy(input=prob, label=label)
-    classification_error_evaluator(input=prob, label=label, name=mode+'_error')
+    classification_error_evaluator(
+        input=prob, label=label, name=mode + '_error')
     outputs(cost)
 
 if is_generator:
diff --git a/demo/gan/gan_conf_image.py b/demo/gan/gan_conf_image.py
index 5c2b140537418d52760719c7b605e778790cb7a6..f89a4e706c3b7eeaa7858f54f8fa04a5e038b66e 100644
--- a/demo/gan/gan_conf_image.py
+++ b/demo/gan/gan_conf_image.py
@@ -15,10 +15,9 @@ from paddle.trainer_config_helpers import *
 
 mode = get_config_arg("mode", str, "generator")
 dataSource = get_config_arg("data", str, "mnist")
-assert mode in set(["generator",
-                    "discriminator",
-                    "generator_training",
-                    "discriminator_training"])
+assert mode in set([
+    "generator", "discriminator", "generator_training", "discriminator_training"
+])
 
 is_generator_training = mode == "generator_training"
 is_discriminator_training = mode == "discriminator_training"
@@ -36,24 +35,33 @@ noise_dim = 100
 gf_dim = 64
 df_dim = 64
 if dataSource == "mnist":
-    sample_dim = 28 # image dim
-    c_dim = 1 # image color
+    sample_dim = 28  # image dim
+    c_dim = 1  # image color
 else:
     sample_dim = 32
     c_dim = 3
-s2, s4 = int(sample_dim/2), int(sample_dim/4), 
-s8, s16 = int(sample_dim/8), int(sample_dim/16)
+s2, s4 = int(sample_dim / 2), int(sample_dim / 4),
+s8, s16 = int(sample_dim / 8), int(sample_dim / 16)
 
 settings(
     batch_size=128,
     learning_rate=2e-4,
-    learning_method=AdamOptimizer(beta1=0.5)
-)
+    learning_method=AdamOptimizer(beta1=0.5))
 
-def conv_bn(input, channels, imgSize, num_filters, output_x, stride, name, 
-                 param_attr, bias_attr, param_attr_bn, bn, trans=False, 
-                 act=ReluActivation()):
-    
+
+def conv_bn(input,
+            channels,
+            imgSize,
+            num_filters,
+            output_x,
+            stride,
+            name,
+            param_attr,
+            bias_attr,
+            param_attr_bn,
+            bn,
+            trans=False,
+            act=ReluActivation()):
     """
     conv_bn is a utility function that constructs a convolution/deconv layer 
     with an optional batch_norm layer
@@ -63,10 +71,10 @@ def conv_bn(input, channels, imgSize, num_filters, output_x, stride, name,
     :param trans: whether to use conv (False) or deconv (True)
     :type trans: bool
     """
-    
+
     # calculate the filter_size and padding size based on the given
     # imgSize and ouput size
-    tmp =  imgSize - (output_x - 1) * stride
+    tmp = imgSize - (output_x - 1) * stride
     if tmp <= 1 or tmp > 5:
         raise ValueError("conv input-output dimension does not fit")
     elif tmp <= 3:
@@ -76,111 +84,134 @@ def conv_bn(input, channels, imgSize, num_filters, output_x, stride, name,
         filter_size = tmp
         padding = 0
 
-    print (imgSize, output_x, stride, filter_size, padding)
-    
+    print(imgSize, output_x, stride, filter_size, padding)
+
     if trans:
         nameApx = "_conv"
     else:
         nameApx = "_convt"
-    
+
     if bn:
-        conv = img_conv_layer(input, filter_size=filter_size, 
-                   num_filters=num_filters,
-                   name=name + nameApx, num_channels=channels,
-                   act=LinearActivation(), groups=1, stride=stride, 
-                   padding=padding, bias_attr=bias_attr,
-                   param_attr=param_attr, shared_biases=True, layer_attr=None,
-                   filter_size_y=None, stride_y=None, padding_y=None, 
-                   trans=trans)
-        
-        conv_bn = batch_norm_layer(conv, 
-                         act=act, 
-                         name=name + nameApx + "_bn", 
-                         bias_attr=bias_attr, 
-                         param_attr=param_attr_bn,
-                         use_global_stats=False)
-        
+        conv = img_conv_layer(
+            input,
+            filter_size=filter_size,
+            num_filters=num_filters,
+            name=name + nameApx,
+            num_channels=channels,
+            act=LinearActivation(),
+            groups=1,
+            stride=stride,
+            padding=padding,
+            bias_attr=bias_attr,
+            param_attr=param_attr,
+            shared_biases=True,
+            layer_attr=None,
+            filter_size_y=None,
+            stride_y=None,
+            padding_y=None,
+            trans=trans)
+
+        conv_bn = batch_norm_layer(
+            conv,
+            act=act,
+            name=name + nameApx + "_bn",
+            bias_attr=bias_attr,
+            param_attr=param_attr_bn,
+            use_global_stats=False)
+
         return conv_bn
     else:
-        conv = img_conv_layer(input, filter_size=filter_size, 
-                   num_filters=num_filters,
-                   name=name + nameApx, num_channels=channels,
-                   act=act, groups=1, stride=stride, 
-                   padding=padding, bias_attr=bias_attr,
-                   param_attr=param_attr, shared_biases=True, layer_attr=None,
-                   filter_size_y=None, stride_y=None, padding_y=None,
-                   trans=trans)
+        conv = img_conv_layer(
+            input,
+            filter_size=filter_size,
+            num_filters=num_filters,
+            name=name + nameApx,
+            num_channels=channels,
+            act=act,
+            groups=1,
+            stride=stride,
+            padding=padding,
+            bias_attr=bias_attr,
+            param_attr=param_attr,
+            shared_biases=True,
+            layer_attr=None,
+            filter_size_y=None,
+            stride_y=None,
+            padding_y=None,
+            trans=trans)
         return conv
-    
+
+
 def generator(noise):
     """
     generator generates a sample given noise
     """
-    param_attr = ParamAttr(is_static=is_discriminator_training,
-                           initial_mean=0.0,
-                           initial_std=0.02)
-    bias_attr = ParamAttr(is_static=is_discriminator_training,
-                           initial_mean=0.0,
-                           initial_std=0.0)
-    
-    param_attr_bn=ParamAttr(is_static=is_discriminator_training,
-                           initial_mean=1.0,
-                           initial_std=0.02)
-    
-    h1 = fc_layer(input=noise,
-                    name="gen_layer_h1",
-                    size=s8 * s8 * gf_dim * 4,
-                    bias_attr=bias_attr,
-                    param_attr=param_attr,
-                    act=LinearActivation())
-    
-    h1_bn = batch_norm_layer(h1, 
-                     act=ReluActivation(), 
-                     name="gen_layer_h1_bn", 
-                     bias_attr=bias_attr, 
-                     param_attr=param_attr_bn,
-                     use_global_stats=False)
-    
-    h2_bn = conv_bn(h1_bn, 
-                    channels=gf_dim*4, 
-                    output_x=s8,
-                    num_filters=gf_dim*2, 
-                    imgSize=s4,
-                    stride=2,
-                    name="gen_layer_h2", 
-                    param_attr=param_attr, 
-                    bias_attr=bias_attr, 
-                    param_attr_bn=param_attr_bn,
-                    bn=True,
-                    trans=True)
-    
-    h3_bn = conv_bn(h2_bn, 
-                    channels=gf_dim*2, 
-                    output_x=s4,
-                    num_filters=gf_dim, 
-                    imgSize=s2,
-                    stride=2,
-                    name="gen_layer_h3", 
-                    param_attr=param_attr, 
-                    bias_attr=bias_attr, 
-                    param_attr_bn=param_attr_bn,
-                    bn=True,
-                    trans=True)
-     
-    
-    return conv_bn(h3_bn,
-                   channels=gf_dim, 
-                   output_x=s2,
-                   num_filters=c_dim, 
-                   imgSize=sample_dim,
-                   stride=2,
-                   name="gen_layer_h4", 
-                   param_attr=param_attr, 
-                   bias_attr=bias_attr, 
-                   param_attr_bn=param_attr_bn,
-                   bn=False,
-                   trans=True,
-                   act=TanhActivation())
+    param_attr = ParamAttr(
+        is_static=is_discriminator_training, initial_mean=0.0, initial_std=0.02)
+    bias_attr = ParamAttr(
+        is_static=is_discriminator_training, initial_mean=0.0, initial_std=0.0)
+
+    param_attr_bn = ParamAttr(
+        is_static=is_discriminator_training, initial_mean=1.0, initial_std=0.02)
+
+    h1 = fc_layer(
+        input=noise,
+        name="gen_layer_h1",
+        size=s8 * s8 * gf_dim * 4,
+        bias_attr=bias_attr,
+        param_attr=param_attr,
+        act=LinearActivation())
+
+    h1_bn = batch_norm_layer(
+        h1,
+        act=ReluActivation(),
+        name="gen_layer_h1_bn",
+        bias_attr=bias_attr,
+        param_attr=param_attr_bn,
+        use_global_stats=False)
+
+    h2_bn = conv_bn(
+        h1_bn,
+        channels=gf_dim * 4,
+        output_x=s8,
+        num_filters=gf_dim * 2,
+        imgSize=s4,
+        stride=2,
+        name="gen_layer_h2",
+        param_attr=param_attr,
+        bias_attr=bias_attr,
+        param_attr_bn=param_attr_bn,
+        bn=True,
+        trans=True)
+
+    h3_bn = conv_bn(
+        h2_bn,
+        channels=gf_dim * 2,
+        output_x=s4,
+        num_filters=gf_dim,
+        imgSize=s2,
+        stride=2,
+        name="gen_layer_h3",
+        param_attr=param_attr,
+        bias_attr=bias_attr,
+        param_attr_bn=param_attr_bn,
+        bn=True,
+        trans=True)
+
+    return conv_bn(
+        h3_bn,
+        channels=gf_dim,
+        output_x=s2,
+        num_filters=c_dim,
+        imgSize=sample_dim,
+        stride=2,
+        name="gen_layer_h4",
+        param_attr=param_attr,
+        bias_attr=bias_attr,
+        param_attr_bn=param_attr_bn,
+        bn=False,
+        trans=True,
+        act=TanhActivation())
 
 
 def discriminator(sample):
@@ -191,58 +222,60 @@ def discriminator(sample):
     of the sample is from generator and dimension 1 is the probabblity
     of the sample is from real data.
     """
-    param_attr = ParamAttr(is_static=is_generator_training,
-                           initial_mean=0.0,
-                           initial_std=0.02)
-    bias_attr = ParamAttr(is_static=is_generator_training,
-                          initial_mean=0.0,
-                          initial_std=0.0)
-    
-    param_attr_bn=ParamAttr(is_static=is_generator_training,
-                           initial_mean=1.0,
-                           initial_std=0.02)
-    
-    h0 = conv_bn(sample, 
-                 channels=c_dim, 
-                 imgSize=sample_dim,
-                 num_filters=df_dim, 
-                 output_x=s2, 
-                 stride=2, 
-                 name="dis_h0", 
-                 param_attr=param_attr, 
-                 bias_attr=bias_attr, 
-                 param_attr_bn=param_attr_bn, 
-                 bn=False)
-    
-    h1_bn = conv_bn(h0, 
-                 channels=df_dim,
-                 imgSize=s2,
-                 num_filters=df_dim*2, 
-                 output_x=s4, 
-                 stride=2, 
-                 name="dis_h1", 
-                 param_attr=param_attr, 
-                 bias_attr=bias_attr, 
-                 param_attr_bn=param_attr_bn, 
-                 bn=True)
-
-    h2_bn = conv_bn(h1_bn, 
-                 channels=df_dim*2,
-                 imgSize=s4,
-                 num_filters=df_dim*4, 
-                 output_x=s8, 
-                 stride=2, 
-                 name="dis_h2", 
-                 param_attr=param_attr, 
-                 bias_attr=bias_attr, 
-                 param_attr_bn=param_attr_bn, 
-                 bn=True)
-        
-    return fc_layer(input=h2_bn, name="dis_prob", size=2,
-                    bias_attr=bias_attr,
-                    param_attr=param_attr,
-                    act=SoftmaxActivation())
+    param_attr = ParamAttr(
+        is_static=is_generator_training, initial_mean=0.0, initial_std=0.02)
+    bias_attr = ParamAttr(
+        is_static=is_generator_training, initial_mean=0.0, initial_std=0.0)
+
+    param_attr_bn = ParamAttr(
+        is_static=is_generator_training, initial_mean=1.0, initial_std=0.02)
+
+    h0 = conv_bn(
+        sample,
+        channels=c_dim,
+        imgSize=sample_dim,
+        num_filters=df_dim,
+        output_x=s2,
+        stride=2,
+        name="dis_h0",
+        param_attr=param_attr,
+        bias_attr=bias_attr,
+        param_attr_bn=param_attr_bn,
+        bn=False)
+
+    h1_bn = conv_bn(
+        h0,
+        channels=df_dim,
+        imgSize=s2,
+        num_filters=df_dim * 2,
+        output_x=s4,
+        stride=2,
+        name="dis_h1",
+        param_attr=param_attr,
+        bias_attr=bias_attr,
+        param_attr_bn=param_attr_bn,
+        bn=True)
+
+    h2_bn = conv_bn(
+        h1_bn,
+        channels=df_dim * 2,
+        imgSize=s4,
+        num_filters=df_dim * 4,
+        output_x=s8,
+        stride=2,
+        name="dis_h2",
+        param_attr=param_attr,
+        bias_attr=bias_attr,
+        param_attr_bn=param_attr_bn,
+        bn=True)
 
+    return fc_layer(
+        input=h2_bn,
+        name="dis_prob",
+        size=2,
+        bias_attr=bias_attr,
+        param_attr=param_attr,
+        act=SoftmaxActivation())
 
 
 if is_generator_training:
@@ -250,13 +283,14 @@ if is_generator_training:
     sample = generator(noise)
 
 if is_discriminator_training:
-    sample = data_layer(name="sample", size=sample_dim * sample_dim*c_dim)
+    sample = data_layer(name="sample", size=sample_dim * sample_dim * c_dim)
 
 if is_generator_training or is_discriminator_training:
     label = data_layer(name="label", size=1)
     prob = discriminator(sample)
     cost = cross_entropy(input=prob, label=label)
-    classification_error_evaluator(input=prob, label=label, name=mode+'_error')
+    classification_error_evaluator(
+        input=prob, label=label, name=mode + '_error')
     outputs(cost)
 
 if is_generator:
diff --git a/demo/gan/gan_trainer.py b/demo/gan/gan_trainer.py
index a8c1bd0414529f48feb23bdb850751782de52c04..4a26c230f7a21cc6dd4a3cdb52e32730b1ce73ca 100644
--- a/demo/gan/gan_trainer.py
+++ b/demo/gan/gan_trainer.py
@@ -16,7 +16,7 @@ import argparse
 import random
 import numpy
 import cPickle
-import sys,os
+import sys, os
 from PIL import Image
 
 from paddle.trainer.config_parser import parse_config
@@ -24,6 +24,7 @@ from paddle.trainer.config_parser import logger
 import py_paddle.swig_paddle as api
 import matplotlib.pyplot as plt
 
+
 def plot2DScatter(data, outputfile):
     '''
     Plot the data as a 2D scatter plot and save to outputfile
@@ -41,9 +42,11 @@ def plot2DScatter(data, outputfile):
     plt.scatter(x, y)
     plt.savefig(outputfile, bbox_inches='tight')
 
+
 def CHECK_EQ(a, b):
     assert a == b, "a=%s, b=%s" % (a, b)
 
+
 def copy_shared_parameters(src, dst):
     '''
     copy the parameters from src to dst
@@ -52,11 +55,9 @@ def copy_shared_parameters(src, dst):
     :param dst: the destination of the parameters
     :type dst: GradientMachine
     '''
-    src_params = [src.getParameter(i)
-               for i in xrange(src.getParameterSize())]
+    src_params = [src.getParameter(i) for i in xrange(src.getParameterSize())]
     src_params = dict([(p.getName(), p) for p in src_params])
 
-
     for i in xrange(dst.getParameterSize()):
         dst_param = dst.getParameter(i)
         src_param = src_params.get(dst_param.getName(), None)
@@ -67,15 +68,17 @@ def copy_shared_parameters(src, dst):
         CHECK_EQ(len(src_value), len(dst_value))
         dst_value.copyFrom(src_value)
         dst_param.setValueUpdated()
-        
+
+
 def print_parameters(src):
-    src_params = [src.getParameter(i)
-               for i in xrange(src.getParameterSize())]
+    src_params = [src.getParameter(i) for i in xrange(src.getParameterSize())]
 
     print "***************"
     for p in src_params:
         print "Name is %s" % p.getName()
-        print "value is %s \n" % p.getBuf(api.PARAMETER_VALUE).copyToNumpyArray()
+        print "value is %s \n" % p.getBuf(api.PARAMETER_VALUE).copyToNumpyArray(
+        )
+
 
 def load_mnist_data(imageFile):
     f = open(imageFile, "rb")
@@ -86,33 +89,36 @@ def load_mnist_data(imageFile):
         n = 60000
     else:
         n = 10000
-    
-    data = numpy.fromfile(f, 'ubyte', count=n*28*28).reshape((n, 28*28))
+
+    data = numpy.fromfile(f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28))
     data = data / 255.0 * 2.0 - 1.0
 
     f.close()
     return data.astype('float32')
 
+
 def load_cifar_data(cifar_path):
     batch_size = 10000
-    data = numpy.zeros((5*batch_size, 32*32*3), dtype = "float32")
+    data = numpy.zeros((5 * batch_size, 32 * 32 * 3), dtype="float32")
     for i in range(1, 6):
         file = cifar_path + "/data_batch_" + str(i)
         fo = open(file, 'rb')
         dict = cPickle.load(fo)
         fo.close()
-        data[(i - 1)*batch_size:(i*batch_size), :] = dict["data"]
-    
+        data[(i - 1) * batch_size:(i * batch_size), :] = dict["data"]
+
     data = data / 255.0 * 2.0 - 1.0
     return data
 
+
 # synthesize 2-D uniform data
 def load_uniform_data():
     data = numpy.random.rand(1000000, 2).astype('float32')
     return data
 
+
 def merge(images, size):
-    if images.shape[1] == 28*28:
+    if images.shape[1] == 28 * 28:
         h, w, c = 28, 28, 1
     else:
         h, w, c = 32, 32, 3
@@ -124,6 +130,7 @@ def merge(images, size):
           ((images[idx, :].reshape((h, w, c), order="F").transpose(1, 0, 2) + 1.0) / 2.0 * 255.0)
     return img.astype('uint8')
 
+
 def save_images(images, path):
     merged_img = merge(images, [8, 8])
     if merged_img.shape[2] == 1:
@@ -131,14 +138,17 @@ def save_images(images, path):
     else:
         im = Image.fromarray(merged_img, mode="RGB")
     im.save(path)
-    
+
+
 def get_real_samples(batch_size, data_np):
-    return data_np[numpy.random.choice(data_np.shape[0], batch_size, 
-                                       replace=False),:]
-    
+    return data_np[numpy.random.choice(
+        data_np.shape[0], batch_size, replace=False), :]
+
+
 def get_noise(batch_size, noise_dim):
     return numpy.random.normal(size=(batch_size, noise_dim)).astype('float32')
 
+
 def get_fake_samples(generator_machine, batch_size, noise):
     gen_inputs = api.Arguments.createArguments(1)
     gen_inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
@@ -147,12 +157,14 @@ def get_fake_samples(generator_machine, batch_size, noise):
     fake_samples = gen_outputs.getSlotValue(0).copyToNumpyMat()
     return fake_samples
 
+
 def get_training_loss(training_machine, inputs):
     outputs = api.Arguments.createArguments(0)
     training_machine.forward(inputs, outputs, api.PASS_TEST)
     loss = outputs.getSlotValue(0).copyToNumpyMat()
     return numpy.mean(loss)
 
+
 def prepare_discriminator_data_batch_pos(batch_size, data_np):
     real_samples = get_real_samples(batch_size, data_np)
     labels = numpy.ones(batch_size, dtype='int32')
@@ -161,6 +173,7 @@ def prepare_discriminator_data_batch_pos(batch_size, data_np):
     inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(labels))
     return inputs
 
+
 def prepare_discriminator_data_batch_neg(generator_machine, batch_size, noise):
     fake_samples = get_fake_samples(generator_machine, batch_size, noise)
     labels = numpy.zeros(batch_size, dtype='int32')
@@ -169,6 +182,7 @@ def prepare_discriminator_data_batch_neg(generator_machine, batch_size, noise):
     inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(labels))
     return inputs
 
+
 def prepare_generator_data_batch(batch_size, noise):
     label = numpy.ones(batch_size, dtype='int32')
     inputs = api.Arguments.createArguments(2)
@@ -193,10 +207,9 @@ def get_layer_size(model_conf, layer_name):
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("-d", "--data_source", help="mnist or cifar or uniform")
-    parser.add_argument("--use_gpu", default="1", 
-                        help="1 means use gpu for training")
-    parser.add_argument("--gpu_id", default="0", 
-                        help="the gpu_id parameter")
+    parser.add_argument(
+        "--use_gpu", default="1", help="1 means use gpu for training")
+    parser.add_argument("--gpu_id", default="0", help="the gpu_id parameter")
     args = parser.parse_args()
     data_source = args.data_source
     use_gpu = args.use_gpu
@@ -208,30 +221,32 @@ def main():
 
     if not os.path.exists("./%s_params/" % data_source):
         os.makedirs("./%s_params/" % data_source)
-        
-    api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10', '--log_period=100', 
-                   '--gpu_id=' + args.gpu_id, '--save_dir=' + "./%s_params/" % data_source)
-    
+
+    api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10',
+                   '--log_period=100', '--gpu_id=' + args.gpu_id,
+                   '--save_dir=' + "./%s_params/" % data_source)
+
     if data_source == "uniform":
         conf = "gan_conf.py"
         num_iter = 10000
     else:
         conf = "gan_conf_image.py"
         num_iter = 1000
-        
+
     gen_conf = parse_config(conf, "mode=generator_training,data=" + data_source)
-    dis_conf = parse_config(conf, "mode=discriminator_training,data=" + data_source)
+    dis_conf = parse_config(conf,
+                            "mode=discriminator_training,data=" + data_source)
     generator_conf = parse_config(conf, "mode=generator,data=" + data_source)
     batch_size = dis_conf.opt_config.batch_size
     noise_dim = get_layer_size(gen_conf.model_config, "noise")
-    
+
     if data_source == "mnist":
         data_np = load_mnist_data("./data/mnist_data/train-images-idx3-ubyte")
     elif data_source == "cifar":
         data_np = load_cifar_data("./data/cifar-10-batches-py/")
     else:
         data_np = load_uniform_data()
-    
+
     # this creates a gradient machine for discriminator
     dis_training_machine = api.GradientMachine.createFromConfigProto(
         dis_conf.model_config)
@@ -244,26 +259,24 @@ def main():
     logger.info(str(generator_conf.model_config))
     generator_machine = api.GradientMachine.createFromConfigProto(
         generator_conf.model_config)
-    
-    dis_trainer = api.Trainer.create(
-        dis_conf, dis_training_machine)
 
-    gen_trainer = api.Trainer.create(
-        gen_conf, gen_training_machine)
-    
+    dis_trainer = api.Trainer.create(dis_conf, dis_training_machine)
+
+    gen_trainer = api.Trainer.create(gen_conf, gen_training_machine)
+
     dis_trainer.startTrain()
     gen_trainer.startTrain()
-    
+
     # Sync parameters between networks (GradientMachine) at the beginning
     copy_shared_parameters(gen_training_machine, dis_training_machine)
     copy_shared_parameters(gen_training_machine, generator_machine)
-    
+
     # constrain that either discriminator or generator can not be trained
     # consecutively more than MAX_strike times
     curr_train = "dis"
     curr_strike = 0
     MAX_strike = 5
-     
+
     for train_pass in xrange(100):
         dis_trainer.startTrainPass()
         gen_trainer.startTrainPass()
@@ -272,23 +285,25 @@ def main():
             noise = get_noise(batch_size, noise_dim)
             data_batch_dis_pos = prepare_discriminator_data_batch_pos(
                 batch_size, data_np)
-            dis_loss_pos = get_training_loss(dis_training_machine, data_batch_dis_pos)
-            
+            dis_loss_pos = get_training_loss(dis_training_machine,
+                                             data_batch_dis_pos)
+
             data_batch_dis_neg = prepare_discriminator_data_batch_neg(
                 generator_machine, batch_size, noise)
-            dis_loss_neg = get_training_loss(dis_training_machine, data_batch_dis_neg)            
-                         
+            dis_loss_neg = get_training_loss(dis_training_machine,
+                                             data_batch_dis_neg)
+
             dis_loss = (dis_loss_pos + dis_loss_neg) / 2.0
-            
+
             # Do forward pass in generator to get the gen_loss
-            data_batch_gen = prepare_generator_data_batch(
-                    batch_size, noise)
+            data_batch_gen = prepare_generator_data_batch(batch_size, noise)
             gen_loss = get_training_loss(gen_training_machine, data_batch_gen)
-             
+
             if i % 100 == 0:
-                print "d_pos_loss is %s     d_neg_loss is %s" % (dis_loss_pos, dis_loss_neg) 
+                print "d_pos_loss is %s     d_neg_loss is %s" % (dis_loss_pos,
+                                                                 dis_loss_neg)
                 print "d_loss is %s    g_loss is %s" % (dis_loss, gen_loss)
-            
+
             # Decide which network to train based on the training history
             # And the relative size of the loss        
             if (not (curr_train == "dis" and curr_strike == MAX_strike)) and \
@@ -297,11 +312,12 @@ def main():
                     curr_strike += 1
                 else:
                     curr_train = "dis"
-                    curr_strike = 1                
+                    curr_strike = 1
                 dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_neg)
-                dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_pos)               
-                copy_shared_parameters(dis_training_machine, gen_training_machine)
- 
+                dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_pos)
+                copy_shared_parameters(dis_training_machine,
+                                       gen_training_machine)
+
             else:
                 if curr_train == "gen":
                     curr_strike += 1
@@ -311,19 +327,23 @@ def main():
                 gen_trainer.trainOneDataBatch(batch_size, data_batch_gen)
                 # TODO: add API for paddle to allow true parameter sharing between different GradientMachines 
                 # so that we do not need to copy shared parameters. 
-                copy_shared_parameters(gen_training_machine, dis_training_machine)
+                copy_shared_parameters(gen_training_machine,
+                                       dis_training_machine)
                 copy_shared_parameters(gen_training_machine, generator_machine)
- 
+
         dis_trainer.finishTrainPass()
         gen_trainer.finishTrainPass()
         # At the end of each pass, save the generated samples/images
         fake_samples = get_fake_samples(generator_machine, batch_size, noise)
         if data_source == "uniform":
-            plot2DScatter(fake_samples, "./%s_samples/train_pass%s.png" % (data_source, train_pass))
+            plot2DScatter(fake_samples, "./%s_samples/train_pass%s.png" %
+                          (data_source, train_pass))
         else:
-            save_images(fake_samples, "./%s_samples/train_pass%s.png" % (data_source, train_pass))
+            save_images(fake_samples, "./%s_samples/train_pass%s.png" %
+                        (data_source, train_pass))
     dis_trainer.finishTrain()
     gen_trainer.finishTrain()
 
+
 if __name__ == '__main__':
     main()
diff --git a/demo/quick_start/trainer_config.resnet-lstm.py b/demo/quick_start/trainer_config.resnet-lstm.py
index 5bed925d84a0a6d94da446e1a8c64061ad54ae55..89a837abb7cdeaaa249160123e1f2001d23d7aa1 100644
--- a/demo/quick_start/trainer_config.resnet-lstm.py
+++ b/demo/quick_start/trainer_config.resnet-lstm.py
@@ -13,7 +13,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 This configuration is a demonstration of how to implement the stacked LSTM
 with residual connections, i.e. an LSTM layer takes the sum of the hidden states
@@ -46,11 +45,12 @@ is_predict = get_config_arg('is_predict', bool, False)
 trn = 'data/train.list' if not is_predict else None
 tst = 'data/test.list' if not is_predict else 'data/pred.list'
 process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
-                        test_list=tst,
-                        module="dataprovider_emb",
-                        obj=process,
-                        args={"dictionary": word_dict})
+define_py_data_sources2(
+    train_list=trn,
+    test_list=tst,
+    module="dataprovider_emb",
+    obj=process,
+    args={"dictionary": word_dict})
 
 batch_size = 128 if not is_predict else 1
 settings(
@@ -58,10 +58,9 @@ settings(
     learning_rate=2e-3,
     learning_method=AdamOptimizer(),
     regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25
-)
+    gradient_clipping_threshold=25)
 
-bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
+bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
 
 data = data_layer(name="word", size=len(word_dict))
 emb = embedding_layer(input=data, size=128)
@@ -73,17 +72,15 @@ for i in range(3):
     # The input to the current layer is the sum of the hidden state
     # and input of the previous layer.
     current_input = addto_layer(input=[previous_input, previous_hidden_state])
-    hidden_state = simple_lstm(input=current_input, size=128,
-                               lstm_cell_attr=ExtraAttr(drop_rate=0.1))
+    hidden_state = simple_lstm(
+        input=current_input, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.1))
     previous_input, previous_hidden_state = current_input, hidden_state
 
 lstm = previous_hidden_state
 
 lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
-output = fc_layer(input=lstm_last, size=2,
-                  bias_attr=bias_attr,
-                  act=SoftmaxActivation())
-
+output = fc_layer(
+    input=lstm_last, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
 
 if is_predict:
     maxid = maxid_layer(output)
diff --git a/demo/semantic_role_labeling/data/extract_dict_feature.py b/demo/semantic_role_labeling/data/extract_dict_feature.py
index 123df022f508cad1d4557b845619dd18761f357e..a02a49a86ed31f44058c192525a2acd979c5de0b 100644
--- a/demo/semantic_role_labeling/data/extract_dict_feature.py
+++ b/demo/semantic_role_labeling/data/extract_dict_feature.py
@@ -33,7 +33,7 @@ def extract_dict_features(pair_file, feature_file):
                 ctx_n1 = sentence_list[verb_index - 1]
             else:
                 ctx_n1 = 'bos'
-            
+
             if verb_index > 1:
                 mark[verb_index - 2] = 1
                 ctx_n2 = sentence_list[verb_index - 2]
@@ -48,7 +48,7 @@ def extract_dict_features(pair_file, feature_file):
                 ctx_p1 = sentence_list[verb_index + 1]
             else:
                 ctx_p1 = 'eos'
-            
+
             if verb_index < len(labels_list) - 3:
                 mark[verb_index + 2] = 1
                 ctx_p2 = sentence_list[verb_index + 2]
@@ -69,7 +69,6 @@ def extract_dict_features(pair_file, feature_file):
             feature_out.write(feature_str + '\n')
 
 
-
 if __name__ == '__main__':
 
     usage = '-p pair_file -f feature_file'
diff --git a/demo/semantic_role_labeling/data/extract_pairs.py b/demo/semantic_role_labeling/data/extract_pairs.py
index 2d0d535c53a74a9fbf9ea2521930333b7f89581b..94a8488c16734eb1882d54f7ec36f4b9308c09d4 100644
--- a/demo/semantic_role_labeling/data/extract_pairs.py
+++ b/demo/semantic_role_labeling/data/extract_pairs.py
@@ -66,8 +66,8 @@ def transform_labels(sentences, labels):
         else:
             verb_list = []
             for x in labels[i][0]:
-                if x !='-':
-                   verb_list.append(x)
+                if x != '-':
+                    verb_list.append(x)
 
             for j in xrange(1, len(labels[i])):
                 label_list = labels[i][j]
@@ -93,7 +93,7 @@ def transform_labels(sentences, labels):
                         is_in_bracket = True
                     else:
                         print 'error:', ll
-                sen_lab_pair.append((sentences[i], verb_list[j-1], label_seq))
+                sen_lab_pair.append((sentences[i], verb_list[j - 1], label_seq))
     return sen_lab_pair
 
 
@@ -103,7 +103,7 @@ def write_file(sen_lab_pair, output_file):
             sentence = x[0]
             label_seq = ' '.join(x[2])
             assert len(sentence.split()) == len(x[2])
-            fout.write(sentence + '\t' + x[1]+'\t' +label_seq + '\n')
+            fout.write(sentence + '\t' + x[1] + '\t' + label_seq + '\n')
 
 
 if __name__ == '__main__':
diff --git a/demo/semantic_role_labeling/dataprovider.py b/demo/semantic_role_labeling/dataprovider.py
index d12f10bfcb65e25972035d863997bb9d26ba86eb..042cd4e7a9e256cd597ac34eed423040f1d7ccd5 100644
--- a/demo/semantic_role_labeling/dataprovider.py
+++ b/demo/semantic_role_labeling/dataprovider.py
@@ -21,7 +21,7 @@ def hook(settings, word_dict, label_dict, predicate_dict, **kwargs):
     settings.word_dict = word_dict
     settings.label_dict = label_dict
     settings.predicate_dict = predicate_dict
-   
+
     #all inputs are integral and sequential type
     settings.slots = [
         integer_value_sequence(len(word_dict)),
@@ -29,25 +29,28 @@ def hook(settings, word_dict, label_dict, predicate_dict, **kwargs):
         integer_value_sequence(len(word_dict)),
         integer_value_sequence(len(word_dict)),
         integer_value_sequence(len(word_dict)),
-        integer_value_sequence(len(word_dict)), 
-        integer_value_sequence(len(predicate_dict)),
-        integer_value_sequence(2),
+        integer_value_sequence(len(word_dict)),
+        integer_value_sequence(len(predicate_dict)), integer_value_sequence(2),
         integer_value_sequence(len(label_dict))
     ]
 
 
 def get_batch_size(yeild_data):
     return len(yeild_data[0])
-    
 
-@provider(init_hook=hook, should_shuffle=True, calc_batch_size=get_batch_size, 
-          can_over_batch_size=False, cache=CacheType.CACHE_PASS_IN_MEM)
+
+@provider(
+    init_hook=hook,
+    should_shuffle=True,
+    calc_batch_size=get_batch_size,
+    can_over_batch_size=False,
+    cache=CacheType.CACHE_PASS_IN_MEM)
 def process(settings, file_name):
     with open(file_name, 'r') as fdata:
         for line in fdata:
             sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2,  mark, label = \
                 line.strip().split('\t')
-           
+
             words = sentence.split()
             sen_len = len(words)
             word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
diff --git a/demo/semantic_role_labeling/db_lstm.py b/demo/semantic_role_labeling/db_lstm.py
index 75946bd72e04341c189f6e88fdde98e03f4a8bfb..04e2a559b19bd4b9aec0242eb43edf6ab1e7624e 100644
--- a/demo/semantic_role_labeling/db_lstm.py
+++ b/demo/semantic_role_labeling/db_lstm.py
@@ -20,7 +20,7 @@ from paddle.trainer_config_helpers import *
 #file paths
 word_dict_file = './data/wordDict.txt'
 label_dict_file = './data/targetDict.txt'
-predicate_file= './data/verbDict.txt'
+predicate_file = './data/verbDict.txt'
 train_list_file = './data/train.list'
 test_list_file = './data/test.list'
 
@@ -47,7 +47,6 @@ if not is_predict:
             w = line.strip()
             predicate_dict[w] = i
 
-
     if is_test:
         train_list_file = None
 
@@ -57,9 +56,11 @@ if not is_predict:
         test_list=test_list_file,
         module='dataprovider',
         obj='process',
-        args={'word_dict': word_dict,
-              'label_dict': label_dict,
-              'predicate_dict': predicate_dict })
+        args={
+            'word_dict': word_dict,
+            'label_dict': label_dict,
+            'predicate_dict': predicate_dict
+        })
 
     word_dict_len = len(word_dict)
     label_dict_len = len(label_dict)
@@ -77,24 +78,16 @@ mark_dim = 5
 hidden_dim = 512
 depth = 8
 
-
-
 ########################### Optimizer #######################################
 
-
 settings(
     batch_size=150,
     learning_method=MomentumOptimizer(momentum=0),
     learning_rate=2e-2,
     regularization=L2Regularization(8e-4),
     is_async=False,
-    model_average=ModelAverage(average_window=0.5,
-                               max_average_window=10000),
-                               
-)
-
-
-
+    model_average=ModelAverage(
+        average_window=0.5, max_average_window=10000), )
 
 ####################################### network ##############################
 #8 features and 1 target
@@ -108,22 +101,28 @@ ctx_p1 = data_layer(name='ctx_p1_data', size=word_dict_len)
 ctx_p2 = data_layer(name='ctx_p2_data', size=word_dict_len)
 mark = data_layer(name='mark_data', size=mark_dict_len)
 
-
 if not is_predict:
     target = data_layer(name='target', size=label_dict_len)
 
-
-default_std=1/math.sqrt(hidden_dim)/3.0
+default_std = 1 / math.sqrt(hidden_dim) / 3.0
 
 emb_para = ParameterAttribute(name='emb', initial_std=0., learning_rate=0.)
 std_0 = ParameterAttribute(initial_std=0.)
-std_default = ParameterAttribute(initial_std=default_std) 
-
-predicate_embedding = embedding_layer(size=word_dim, input=predicate, param_attr=ParameterAttribute(name='vemb',initial_std=default_std))
-mark_embedding = embedding_layer(name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0)
-
-word_input=[word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
-emb_layers = [embedding_layer(size=word_dim, input=x, param_attr=emb_para) for x in word_input]
+std_default = ParameterAttribute(initial_std=default_std)
+
+predicate_embedding = embedding_layer(
+    size=word_dim,
+    input=predicate,
+    param_attr=ParameterAttribute(
+        name='vemb', initial_std=default_std))
+mark_embedding = embedding_layer(
+    name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0)
+
+word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
+emb_layers = [
+    embedding_layer(
+        size=word_dim, input=x, param_attr=emb_para) for x in word_input
+]
 emb_layers.append(predicate_embedding)
 emb_layers.append(mark_embedding)
 
@@ -131,84 +130,89 @@ hidden_0 = mixed_layer(
     name='hidden0',
     size=hidden_dim,
     bias_attr=std_default,
-    input=[ full_matrix_projection(input=emb, param_attr=std_default ) for emb in emb_layers ])
-
+    input=[
+        full_matrix_projection(
+            input=emb, param_attr=std_default) for emb in emb_layers
+    ])
 
 mix_hidden_lr = 1e-3
 lstm_para_attr = ParameterAttribute(initial_std=0.0, learning_rate=1.0)
-hidden_para_attr = ParameterAttribute(initial_std=default_std, learning_rate=mix_hidden_lr)
-
-lstm_0 = lstmemory(name='lstm0',
-                   input=hidden_0, 
-                   act=ReluActivation(),
-                   gate_act=SigmoidActivation(),
-                   state_act=SigmoidActivation(),
-                   bias_attr=std_0,
-                   param_attr=lstm_para_attr)
+hidden_para_attr = ParameterAttribute(
+    initial_std=default_std, learning_rate=mix_hidden_lr)
+
+lstm_0 = lstmemory(
+    name='lstm0',
+    input=hidden_0,
+    act=ReluActivation(),
+    gate_act=SigmoidActivation(),
+    state_act=SigmoidActivation(),
+    bias_attr=std_0,
+    param_attr=lstm_para_attr)
 
 #stack L-LSTM and R-LSTM with direct edges
 input_tmp = [hidden_0, lstm_0]
 
-
 for i in range(1, depth):
 
-    mix_hidden = mixed_layer(name='hidden'+str(i),
-                             size=hidden_dim, 
-                             bias_attr=std_default,
-                             input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr),
-                                    full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr)
-                                   ]
-                             )
-
-    lstm = lstmemory(name='lstm'+str(i),
-                     input=mix_hidden,
-                     act=ReluActivation(),
-                     gate_act=SigmoidActivation(),
-                     state_act=SigmoidActivation(),
-                     reverse=((i % 2)==1),
-                     bias_attr=std_0,
-                     param_attr=lstm_para_attr)
+    mix_hidden = mixed_layer(
+        name='hidden' + str(i),
+        size=hidden_dim,
+        bias_attr=std_default,
+        input=[
+            full_matrix_projection(
+                input=input_tmp[0], param_attr=hidden_para_attr),
+            full_matrix_projection(
+                input=input_tmp[1], param_attr=lstm_para_attr)
+        ])
+
+    lstm = lstmemory(
+        name='lstm' + str(i),
+        input=mix_hidden,
+        act=ReluActivation(),
+        gate_act=SigmoidActivation(),
+        state_act=SigmoidActivation(),
+        reverse=((i % 2) == 1),
+        bias_attr=std_0,
+        param_attr=lstm_para_attr)
 
     input_tmp = [mix_hidden, lstm]
 
-feature_out = mixed_layer(name='output',
-                          size=label_dict_len,
-                          bias_attr=std_default, 
-                          input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr),
-                                 full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr)
-                                ],
-                          )
-
-
+feature_out = mixed_layer(
+    name='output',
+    size=label_dict_len,
+    bias_attr=std_default,
+    input=[
+        full_matrix_projection(
+            input=input_tmp[0], param_attr=hidden_para_attr),
+        full_matrix_projection(
+            input=input_tmp[1], param_attr=lstm_para_attr)
+    ], )
 
 if not is_predict:
-    crf_l = crf_layer( name = 'crf',
-                       size = label_dict_len,
-                       input = feature_out, 
-                       label = target,
-                       param_attr=ParameterAttribute(name='crfw',initial_std=default_std, learning_rate=mix_hidden_lr)
-
-                      )
-
-    
-    crf_dec_l = crf_decoding_layer(name = 'crf_dec_l',
-                                   size = label_dict_len,
-                                   input = feature_out,
-                                   label = target,
-                                   param_attr=ParameterAttribute(name='crfw')
-                                       )
-
+    crf_l = crf_layer(
+        name='crf',
+        size=label_dict_len,
+        input=feature_out,
+        label=target,
+        param_attr=ParameterAttribute(
+            name='crfw', initial_std=default_std, learning_rate=mix_hidden_lr))
+
+    crf_dec_l = crf_decoding_layer(
+        name='crf_dec_l',
+        size=label_dict_len,
+        input=feature_out,
+        label=target,
+        param_attr=ParameterAttribute(name='crfw'))
 
     eval = sum_evaluator(input=crf_dec_l)
-        
+
     outputs(crf_l)
 
 else:
-    crf_dec_l = crf_decoding_layer(name = 'crf_dec_l',
-                                   size = label_dict_len,
-                                   input = feature_out,
-                                   param_attr=ParameterAttribute(name='crfw')
-                                       )
+    crf_dec_l = crf_decoding_layer(
+        name='crf_dec_l',
+        size=label_dict_len,
+        input=feature_out,
+        param_attr=ParameterAttribute(name='crfw'))
 
     outputs(crf_dec_l)
-
diff --git a/demo/semantic_role_labeling/predict.py b/demo/semantic_role_labeling/predict.py
index 15145fafceb2422ee201684e85ef5d1043a7bf7d..372fd090b6e8f08f5bb34697772c2e4976810595 100644
--- a/demo/semantic_role_labeling/predict.py
+++ b/demo/semantic_role_labeling/predict.py
@@ -26,7 +26,8 @@ UNK_IDX = 0
 
 
 class Prediction():
-    def __init__(self, train_conf, dict_file, model_dir, label_file, predicate_dict_file):
+    def __init__(self, train_conf, dict_file, model_dir, label_file,
+                 predicate_dict_file):
         """
         train_conf: trainer configure.
         dict_file: word dictionary file name.
@@ -35,7 +36,7 @@ class Prediction():
 
         self.dict = {}
         self.labels = {}
-        self.predicate_dict={}
+        self.predicate_dict = {}
         self.labels_reverse = {}
         self.load_dict_label(dict_file, label_file, predicate_dict_file)
 
@@ -44,25 +45,18 @@ class Prediction():
         len_pred = len(self.predicate_dict)
 
         conf = parse_config(
-            train_conf,
-            'dict_len=' + str(len_dict) + 
-            ',label_len=' + str(len_label) +
-            ',pred_len=' + str(len_pred) +
-            ',is_predict=True')
+            train_conf, 'dict_len=' + str(len_dict) + ',label_len=' +
+            str(len_label) + ',pred_len=' + str(len_pred) + ',is_predict=True')
         self.network = swig_paddle.GradientMachine.createFromConfigProto(
             conf.model_config)
         self.network.loadParameters(model_dir)
 
         slots = [
-            integer_value_sequence(len_dict),
-            integer_value_sequence(len_dict),
-            integer_value_sequence(len_dict),
-            integer_value_sequence(len_dict),
-            integer_value_sequence(len_dict),
-            integer_value_sequence(len_dict), 
-            integer_value_sequence(len_pred),
-            integer_value_sequence(2)
-            ]
+            integer_value_sequence(len_dict), integer_value_sequence(len_dict),
+            integer_value_sequence(len_dict), integer_value_sequence(len_dict),
+            integer_value_sequence(len_dict), integer_value_sequence(len_dict),
+            integer_value_sequence(len_pred), integer_value_sequence(2)
+        ]
         self.converter = DataProviderConverter(slots)
 
     def load_dict_label(self, dict_file, label_file, predicate_dict_file):
@@ -78,6 +72,7 @@ class Prediction():
 
         for line_count, line in enumerate(open(predicate_dict_file, 'r')):
             self.predicate_dict[line.strip()] = line_count
+
     def get_data(self, data_file):
         """
         Get input data of paddle format.
@@ -88,9 +83,10 @@ class Prediction():
                 ).split('\t')
                 words = sentence.split()
                 sen_len = len(words)
-                 
+
                 word_slot = [self.dict.get(w, UNK_IDX) for w in words]
-                predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX)] * sen_len
+                predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX)
+                                  ] * sen_len
                 ctx_n2_slot = [self.dict.get(ctx_n2, UNK_IDX)] * sen_len
                 ctx_n1_slot = [self.dict.get(ctx_n1, UNK_IDX)] * sen_len
                 ctx_0_slot = [self.dict.get(ctx_0, UNK_IDX)] * sen_len
@@ -99,7 +95,7 @@ class Prediction():
 
                 marks = mark.split()
                 mark_slot = [int(w) for w in marks]
-                
+
                 yield word_slot, ctx_n2_slot, ctx_n1_slot, \
                       ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot
 
@@ -123,8 +119,9 @@ class Prediction():
 
 
 def option_parser():
-    usage = ("python predict.py -c config -w model_dir " 
-             "-d word dictionary -l label_file -i input_file  -p pred_dict_file")
+    usage = (
+        "python predict.py -c config -w model_dir "
+        "-d word dictionary -l label_file -i input_file  -p pred_dict_file")
     parser = OptionParser(usage="usage: %s [options]" % usage)
     parser.add_option(
         "-c",
@@ -187,8 +184,9 @@ def main():
     output_file = options.output_file
 
     swig_paddle.initPaddle("--use_gpu=0")
-    predict = Prediction(train_conf, dict_file, model_path, label_file, predict_dict_file)
-    predict.predict(data_file,output_file)
+    predict = Prediction(train_conf, dict_file, model_path, label_file,
+                         predict_dict_file)
+    predict.predict(data_file, output_file)
 
 
 if __name__ == '__main__':
diff --git a/demo/sentiment/predict.py b/demo/sentiment/predict.py
index 0095c6f7272a2191ea39e042a836f7d6038032aa..8ec490f64691924013200a3d0038d39aa834b038 100755
--- a/demo/sentiment/predict.py
+++ b/demo/sentiment/predict.py
@@ -71,9 +71,7 @@ class SentimentPrediction():
         transform word into integer index according to the dictionary.
         """
         words = data.strip().split()
-        word_slot = [
-            self.word_dict[w] for w in words if w in self.word_dict
-        ]
+        word_slot = [self.word_dict[w] for w in words if w in self.word_dict]
         return word_slot
 
     def batch_predict(self, data_batch):
@@ -85,8 +83,8 @@ class SentimentPrediction():
             if self.label is None:
                 print("predicting label is %d" % (lab[0]))
             else:
-                print("predicting label is %s" %
-                      (self.label[lab[0]]))
+                print("predicting label is %s" % (self.label[lab[0]]))
+
 
 def option_parser():
     usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
@@ -143,9 +141,10 @@ def main():
         batch.append([predict.get_index(line)])
         if len(batch) == batch_size:
             predict.batch_predict(batch)
-            batch=[]
+            batch = []
     if len(batch) > 0:
         predict.batch_predict(batch)
 
+
 if __name__ == '__main__':
     main()
diff --git a/doc/api/data_provider/pydataprovider2_en.rst b/doc/api/data_provider/pydataprovider2_en.rst
index 083436e2710b4582e11741aaeaf5932d59869473..50e8b0d32923c4fea37f2296a76cf5b44c8364e7 100644
--- a/doc/api/data_provider/pydataprovider2_en.rst
+++ b/doc/api/data_provider/pydataprovider2_en.rst
@@ -1,4 +1,4 @@
-..  _api_pydataprovider:
+..  _api_pydataprovider2_en:
 
 PyDataProvider2
 ===============
@@ -104,6 +104,8 @@ And PaddlePadle will do all of the rest things\:
 
 Is this cool?
 
+..  _api_pydataprovider2_en_sequential_model:
+
 DataProvider for the sequential model
 -------------------------------------
 A sequence model takes sequences as its input. A sequence is made up of several
diff --git a/doc/api/predict/swig_py_paddle_en.rst b/doc/api/predict/swig_py_paddle_en.rst
index 9845cd1607b425dc0a4ddc665aab40d96fa2fbe4..8b145e5b30a88db9f61c63249885dac92dd1fa9c 100644
--- a/doc/api/predict/swig_py_paddle_en.rst
+++ b/doc/api/predict/swig_py_paddle_en.rst
@@ -23,7 +23,7 @@ python's :code:`help()` function. Let's walk through the above python script:
 
 * At the beginning, use :code:`swig_paddle.initPaddle()` to initialize
   PaddlePaddle with command line arguments, for more about command line arguments
-  see `Command Line Arguments <../cmd_argument/detail_introduction.html>`_.
+  see :ref:`cmd_detail_introduction_en` .
 * Parse the configuration file that is used in training with :code:`parse_config()`.
   Because data to predict with always have no label, and output of prediction work
   normally is the output layer rather than the cost layer, so you should modify
@@ -36,7 +36,7 @@ python's :code:`help()` function. Let's walk through the above python script:
     - Note: As swig_paddle can only accept C++ matrices, we offer a utility
       class DataProviderConverter that can accept the same input data with
       PyDataProvider2, for more information please refer to document
-      of `PyDataProvider2 <../data_provider/pydataprovider2.html>`_.
+      of :ref:`api_pydataprovider2_en` .
 * Do the prediction with :code:`forwardTest()`, which takes the converted
   input data and outputs the activations of the output layer.
 
diff --git a/doc/api/trainer_config_helpers/layers.rst b/doc/api/trainer_config_helpers/layers.rst
index 12a75080d0deab1ecce6b2579b059ba56abf6711..52a6cfb120504d57617f0d777b5ca49cd7d269d7 100644
--- a/doc/api/trainer_config_helpers/layers.rst
+++ b/doc/api/trainer_config_helpers/layers.rst
@@ -1,3 +1,5 @@
+..  _api_trainer_config_helpers_layers:
+
 ======
 Layers
 ======
diff --git a/doc/getstarted/basic_usage/index_en.rst b/doc/getstarted/basic_usage/index_en.rst
index dca7a6b1f4f017b302148c611122806f112564a9..4ffadc68ee53e12e3b3cb56ea27021c52505aebf 100644
--- a/doc/getstarted/basic_usage/index_en.rst
+++ b/doc/getstarted/basic_usage/index_en.rst
@@ -99,11 +99,3 @@ In PaddlePaddle, training is just to get a collection of model parameters, which
 Although starts from a random guess, you can see that value of ``w`` changes quickly towards 2 and ``b`` changes quickly towards 0.3. In the end, the predicted line is almost identical with real answer.
 
 There, you have recovered the underlying pattern between ``X`` and ``Y`` only from observed data.
-
-
-5. Where to Go from Here
--------------------------
-
-- `Install and Build <../build_and_install/index.html>`_
-- `Tutorials <../demo/quick_start/index_en.html>`_
-- `Example and Demo <../demo/index.html>`_
diff --git a/doc/getstarted/build_and_install/build_from_source_en.md b/doc/getstarted/build_and_install/build_from_source_en.md
index 3771d316a1b520b9f29b30babd663b4dd27fd650..5db871d59ae83666263d03a6ea3b504d323293ee 100644
--- a/doc/getstarted/build_and_install/build_from_source_en.md
+++ b/doc/getstarted/build_and_install/build_from_source_en.md
@@ -14,6 +14,13 @@ cd paddle
 git submodule update --init --recursive
 ```
 
+If you already have a local PaddlePaddle repo and have not initialized the submodule, your local submodule folder will be empty. You can simply run the last line of the above codes in your PaddlePaddle home directory to initialize your submodule folder.
+
+If you have already initialized your submodule and you would like to sync with the upstream submodule repo, you can run the following command
+```
+git submodule update --remote
+```
+
 ## <span id="requirements">Requirements</span>
 
 To compile the source code, your computer must be equipped with the following dependencies.
diff --git a/doc/getstarted/build_and_install/docker_install_en.rst b/doc/getstarted/build_and_install/docker_install_en.rst
index feb027ccbbcdb68766e3462f0b8180e3734ef9c7..8df7e063a1ffba5ed4b4bad409d35671de53a633 100644
--- a/doc/getstarted/build_and_install/docker_install_en.rst
+++ b/doc/getstarted/build_and_install/docker_install_en.rst
@@ -122,9 +122,9 @@ The general development workflow with Docker and Bazel is as follows:
       git clone --recursive https://github.com/paddlepaddle/paddle
 
 
-2. Build a development Docker image `paddle:dev` from the source code.
-   This image contains all the development tools and dependencies of
-   PaddlePaddle.
+2. Build a development Docker image :code:`paddle:dev` from the source
+   code.  This image contains all the development tools and
+   dependencies of PaddlePaddle.
 
 
    .. code-block:: bash
@@ -139,14 +139,22 @@ The general development workflow with Docker and Bazel is as follows:
 
    .. code-block:: bash
 
-      docker run \
-       -d # run the container in background mode \
-       --name paddle # we can run a nginx container to serve documents \
-       -p 2022:22    # so we can SSH into this container \
-       -v $PWD:/paddle # mount the source code \
-       -v $HOME/.cache/bazel:/root/.cache/bazel # mount Bazel cache \
+      docker run       \
+       -d              \
+       --name paddle   \
+       -p 2022:22      \
+       -v $PWD:/paddle \
+       -v $HOME/.cache/bazel:/root/.cache/bazel \
        paddle:dev
 
+   where :code:`-d` makes the container running in background,
+   :code:`--name paddle` allows us to run a nginx container to serve
+   documents in this container, :code:`-p 2022:22` allows us to SSH
+   into this container, :code:`-v $PWD:/paddle` shares the source code
+   on the host with the container, :code:`-v
+   $HOME/.cache/bazel:/root/.cache/bazel` shares Bazel cache on the
+   host with the container.
+
 4. SSH into the container:
 
    .. code-block:: bash
diff --git a/doc/howto/cmd_parameter/detail_introduction_en.md b/doc/howto/cmd_parameter/detail_introduction_en.md
index 510396b629e398cef2ccda2f1cec474160693219..82136b7d4f65ffcdff60243feb25b31a4a468637 100644
--- a/doc/howto/cmd_parameter/detail_introduction_en.md
+++ b/doc/howto/cmd_parameter/detail_introduction_en.md
@@ -1,3 +1,7 @@
+```eval_rst
+..  _cmd_detail_introduction_en:
+```
+
 # Detail Description
 
 ## Common
diff --git a/doc/howto/deep_model/rnn/rnn_en.rst b/doc/howto/deep_model/rnn/rnn_en.rst
index da29b8efadd299fe4fc74a71392cbc9a56e32be3..b4c0c8bb4cf063872abc783932df737642fb9178 100644
--- a/doc/howto/deep_model/rnn/rnn_en.rst
+++ b/doc/howto/deep_model/rnn/rnn_en.rst
@@ -30,7 +30,7 @@ Then at the :code:`process` function, each :code:`yield` function will return th
     yield src_ids, trg_ids, trg_ids_next
 
 
-For more details description of how to write a data provider, please refer to `PyDataProvider2 <../../ui/data_provider/index.html>`_. The full data provider file is located at :code:`demo/seqToseq/dataprovider.py`.
+For more details description of how to write a data provider, please refer to :ref:`api_pydataprovider2_en` . The full data provider file is located at :code:`demo/seqToseq/dataprovider.py`.
 
 ===============================================
 Configure Recurrent Neural Network Architecture
@@ -106,7 +106,7 @@ We will use the sequence to sequence model with attention as an example to demon
 
 In this model, the source sequence :math:`S = \{s_1, \dots, s_T\}` is encoded with a bidirectional gated recurrent neural networks. The hidden states of the bidirectional gated recurrent neural network :math:`H_S = \{H_1, \dots, H_T\}` is called *encoder vector* The decoder is a gated recurrent neural network. When decoding each token :math:`y_t`, the gated recurrent neural network generates a set of weights :math:`W_S^t = \{W_1^t, \dots, W_T^t\}`, which are used to compute a weighted sum of the encoder vector. The weighted sum of the encoder vector is utilized to condition the generation of the token :math:`y_t`.
 
-The encoder part of the model is listed below. It calls :code:`grumemory` to represent gated recurrent neural network. It is the recommended way of using recurrent neural network if the network architecture is simple, because it is faster than :code:`recurrent_group`. We have implemented most of the commonly used recurrent neural network architectures, you can refer to `Layers <../../ui/api/trainer_config_helpers/layers_index.html>`_  for more details.
+The encoder part of the model is listed below. It calls :code:`grumemory` to represent gated recurrent neural network. It is the recommended way of using recurrent neural network if the network architecture is simple, because it is faster than :code:`recurrent_group`. We have implemented most of the commonly used recurrent neural network architectures, you can refer to :ref:`api_trainer_config_helpers_layers` for more details.
 
 We also project the encoder vector to :code:`decoder_size` dimensional space, get the first instance of the backward recurrent network, and project it to :code:`decoder_size` dimensional space:
 
@@ -246,6 +246,6 @@ The code is listed below:
     outputs(beam_gen)
 
 
-Notice that this generation technique is only useful for decoder like generation process. If you are working on sequence tagging tasks, please refer to `Semantic Role Labeling Demo <../../demo/semantic_role_labeling/index.html>`_ for more details.
+Notice that this generation technique is only useful for decoder like generation process. If you are working on sequence tagging tasks, please refer to :ref:`semantic_role_labeling_en` for more details.
 
 The full configuration file is located at :code:`demo/seqToseq/seqToseq_net.py`.
diff --git a/doc/howto/optimization/gpu_profiling_en.rst b/doc/howto/optimization/gpu_profiling_en.rst
index 667bf1364e7cd4c9098caba72a127228d78ca38b..40ba698f4e571dfd9370fcfb9382ea50e814ca2e 100644
--- a/doc/howto/optimization/gpu_profiling_en.rst
+++ b/doc/howto/optimization/gpu_profiling_en.rst
@@ -51,7 +51,7 @@ In this tutorial, we will focus on nvprof and nvvp.
 :code:`test_GpuProfiler` from :code:`paddle/math/tests` directory will be used to evaluate
 above profilers. 
 
-.. literalinclude:: ../../paddle/math/tests/test_GpuProfiler.cpp
+.. literalinclude:: ../../../paddle/math/tests/test_GpuProfiler.cpp
    :language: c++
    :lines: 111-124
    :linenos:
@@ -77,7 +77,7 @@ As a simple example, consider the following:
 
 1. Add :code:`REGISTER_TIMER_INFO` and :code:`printAllStatus` functions (see the emphasize-lines).
 
-    .. literalinclude:: ../../paddle/math/tests/test_GpuProfiler.cpp
+    .. literalinclude:: ../../../paddle/math/tests/test_GpuProfiler.cpp
         :language: c++
         :lines: 111-124
         :emphasize-lines: 8-10,13
@@ -124,7 +124,7 @@ To use this command line profiler **nvprof**, you can simply issue the following
 
 1. Add :code:`REGISTER_GPU_PROFILER` function (see the emphasize-lines).
 
-    .. literalinclude:: ../../paddle/math/tests/test_GpuProfiler.cpp
+    .. literalinclude:: ../../../paddle/math/tests/test_GpuProfiler.cpp
         :language: c++
         :lines: 111-124
         :emphasize-lines: 6-7
diff --git a/doc/tutorials/embedding_model/index_en.md b/doc/tutorials/embedding_model/index_en.md
index 06f3ff1f009e470cdb9687658613a76acbb79751..d793a50f488e464bcd90a2fb506a8dcc3c760433 100644
--- a/doc/tutorials/embedding_model/index_en.md
+++ b/doc/tutorials/embedding_model/index_en.md
@@ -93,7 +93,7 @@ where `train.sh` is almost the same as `demo/seqToseq/translation/train.sh`, the
 - `--init_model_path`: path of the initialization model, here is `data/paraphrase_model`
 - `--load_missing_parameter_strategy`: operations when model file is missing, here use a normal distibution to initialize the other parameters except for the embedding layer
 
-For users who want to understand the dataset format, model architecture and training procedure in detail, please refer to [Text generation Tutorial](../text_generation/text_generation.md).
+For users who want to understand the dataset format, model architecture and training procedure in detail, please refer to [Text generation Tutorial](../text_generation/index_en.md).
 
 ## Optional Function ##
 ###  Embedding Parameters Observation
diff --git a/doc/tutorials/quick_start/index_en.md b/doc/tutorials/quick_start/index_en.md
index ec548b5393d7b210d6409328c00917aeb679a451..29637293fad79f3c3b9aabe83b71758b471b9338 100644
--- a/doc/tutorials/quick_start/index_en.md
+++ b/doc/tutorials/quick_start/index_en.md
@@ -12,7 +12,7 @@ This tutorial will teach the basics of deep learning (DL), including how to impl
 
 To get started, please install PaddlePaddle on your computer. Throughout this tutorial, you will learn by implementing different DL models for text classification.
 
-To install PaddlePaddle, please follow the instructions here: <a href = "../../build/index.html" >Build and Install</a>.
+To install PaddlePaddle, please follow the instructions here: <a href = "../../getstarted/build_and_install/index_en.html" >Build and Install</a>.
 
 ## Overview
 For the first step, you will use PaddlePaddle to build a **text classification** system. For example, suppose you run an e-commence  website, and you want to analyze the sentiment of user reviews to evaluate product quality.
@@ -156,14 +156,14 @@ define_py_data_sources2(train_list='data/train.list',
                         obj="process",
                         args={"dictionary": word_dict})
 ```
-You can refer to the following link for more detailed examples and data formats: <a href = "../../ui/data_provider/pydataprovider2.html">PyDataProvider2</a>.
+You can refer to the following link for more detailed examples and data formats: <a href = "../../api/data_provider/pydataprovider2_en.html">PyDataProvider2</a>.
 
 ## Network Architecture
 You will describe four kinds of network architectures in this section.
 <center> ![](./PipelineNetwork_en.jpg) </center>
 
 First, you will build a logistic regression model. Later, you will also get chance to build other more powerful network architectures.
-For more detailed documentation, you could refer to: <a href = "../../ui/api/trainer_config_helpers/layers_index.html">Layer documentation</a>。All configuration files are in `demo/quick_start` directory.
+For more detailed documentation, you could refer to: <a href = "../../api/trainer_config_helpers/layers.html">layer documentation</a>. All configuration files are in `demo/quick_start` directory.
 
 ### Logistic Regression
 The architecture is illustrated in the following picture:
@@ -366,7 +366,7 @@ You can use single layer LSTM model with Dropout for our text classification pro
 <br>
 
 ## Optimization Algorithm
-<a href = "../../ui/api/trainer_config_helpers/optimizers.html">Optimization algorithms</a> include Momentum, RMSProp, AdaDelta, AdaGrad, Adam, and Adamax. You can use Adam optimization method here, with L2 regularization and gradient clipping, because Adam has been proved to work very well for training recurrent neural network.
+<a href = "../../api/trainer_config_helpers/optimizers.html">Optimization algorithms</a> include Momentum, RMSProp, AdaDelta, AdaGrad, Adam, and Adamax. You can use Adam optimization method here, with L2 regularization and gradient clipping, because Adam has been proved to work very well for training recurrent neural network.
 
 ```python
 settings(batch_size=128,
@@ -391,7 +391,8 @@ paddle train \
 --use_gpu=false
 ```
 
-If you want to install the remote training platform, which enables distributed training on clusters, follow the instructions here: <a href = "../../cluster/index.html">Platform</a> documentation. We do not provide examples on how to train on clusters. Please refer to other demos or platform training documentation for mode details on training on clusters.
+We do not provide examples on how to train on clusters here. If you want to train on clusters, please follow the <a href = "../../howto/cluster/cluster_train_en.html">distributed training</a> documentation or other demos for more details.
+
 ## Inference
 You can use the trained model to perform prediction on the dataset with no labels. You can also evaluate the model on dataset with labels to obtain its test accuracy.
 <center> ![](./PipelineTest_en.png) </center>
@@ -406,7 +407,7 @@ paddle train \
 --init_model_path=./output/pass-0000x
 ```
 
-We will give an example of performing prediction using Recurrent model on a dataset with no labels. You can refer to: <a href = "../../ui/predict/swig_py_paddle_en.html">Python Prediction API</a> tutorial，or other <a href = "../../demo/index.html">demo</a> for the prediction process using Python. You can also use the following script for inference or evaluation.
+We will give an example of performing prediction using Recurrent model on a dataset with no labels. You can refer to <a href = "../../api/predict/swig_py_paddle_en.html">Python Prediction API</a> tutorial，or other <a href = "../../tutorials/index_en.html">demo</a> for the prediction process using Python. You can also use the following script for inference or evaluation.
 
 inference script (predict.sh)：
 
@@ -508,7 +509,7 @@ The scripts of data downloading, network configurations, and training scrips are
 * \--config_args：Other configuration arguments.
 * \--init_model_path：The path of the initial model parameter.
 
-By default, the trainer will save model every pass. You can also specify `saving_period_by_batches` to set the frequency of batch saving. You can use `show_parameter_stats_period` to print the statistics of the parameters, which are very useful for tuning parameters. Other command line arguments can be found in <a href = "../../ui/index.html#command-line-argument">command line argument documentation</a>。
+By default, the trainer will save model every pass. You can also specify `saving_period_by_batches` to set the frequency of batch saving. You can use `show_parameter_stats_period` to print the statistics of the parameters, which are very useful for tuning parameters. Other command line arguments can be found in <a href = "../../howto/cmd_parameter/index_en.html">command line argument documentation</a>。
 
 ### Log
 
diff --git a/doc/tutorials/rec/ml_regression_en.rst b/doc/tutorials/rec/ml_regression_en.rst
index ddc00dc706535e1204b033b505ee8bd579f8dea3..6346090a84fad71ab9dff21de0dcc536b5760b83 100644
--- a/doc/tutorials/rec/ml_regression_en.rst
+++ b/doc/tutorials/rec/ml_regression_en.rst
@@ -264,7 +264,7 @@ In this :code:`dataprovider.py`, we should set\:
 * use_seq\: Whether this :code:`dataprovider.py` in sequence mode or not.
 * process\: Return each sample of data to :code:`paddle`.
 
-The data provider details document see :ref:`api_pydataprovider`.
+The data provider details document see :ref:`api_pydataprovider2_en`.
 
 Train
 `````
diff --git a/doc/tutorials/semantic_role_labeling/index_en.md b/doc/tutorials/semantic_role_labeling/index_en.md
index f5bdf64487aa189cefcd55d633cc6638912b9e31..bdd12c0d9abd759d8507a3029f373dc5db6f8f40 100644
--- a/doc/tutorials/semantic_role_labeling/index_en.md
+++ b/doc/tutorials/semantic_role_labeling/index_en.md
@@ -1,3 +1,7 @@
+```eval_rst
+..  _semantic_role_labeling_en:
+```
+
 # Semantic Role labeling Tutorial #
 
 Semantic role labeling (SRL) is a form of shallow semantic parsing whose goal is to discover the predicate-argument structure of each predicate in a given input sentence. SRL is useful as an intermediate step in a wide range of natural language processing tasks, such as information extraction. automatic document categorization and question answering.  An instance is as following [1]:
diff --git a/doc/tutorials/semantic_role_labeling/semantic_role_labeling_cn.md b/doc/tutorials/semantic_role_labeling/semantic_role_labeling_cn.md
deleted file mode 100644
index f3c855a9fd72b894ab69050b08c750fe9e4aa1a2..0000000000000000000000000000000000000000
--- a/doc/tutorials/semantic_role_labeling/semantic_role_labeling_cn.md
+++ /dev/null
@@ -1,201 +0,0 @@
-# 语义角色标注教程 #
-
-语义角色标注（Semantic role labeling, SRL）是浅语义解析的一种形式，其目的是在给定的输入句子中发现每个谓词的谓词参数结构。 SRL作为很多自然语言处理任务中的中间步骤是很有用的，如信息提取、文档自动分类和问答。 实例如下 [1]:
-
- [ <sub>A0</sub> 他 ] [ <sub>AM-MOD</sub> 将 ][ <sub>AM-NEG</sub> 不会 ] [ <sub>V</sub> 接受] [ <sub>A1</sub> 任何东西 ] 从 [<sub>A2</sub> 那些他写的东西中 ]。
-
-- V: 动词
-- A0: 接受者
-- A1: 接受的东西
-- A2: 从……接受
-- A3: 属性
-- AM-MOD: 情态动词 
-- AM-NEG: 否定
-
-给定动词“接受”，句子中的大部分将会扮演某些语义角色。这里，标签方案来自 Penn Proposition Bank。
-
-到目前为止，大多数成功的SRL系统是建立在某种形式的解析结果之上的，其中在语法结构上使用了预先定义的特征模板。 本教程将介绍使用深度双向长短期记忆（DB-LSTM）模型[2]的端到端系统来解决SRL任务，这在很大程度上优于先前的最先进的系统。 这个系统将SRL任务视为序列标记问题。
-
-## 数据描述
-相关论文[2]采用 CoNLL-2005＆2012 共享任务中设置的数据进行训练和测试。根据数据许可证，演示采用 CoNLL-2005 的测试数据集，可以在网站上找到。
-
-用户只需执行以下命令就可以下载并处理原始数据：
-
-```bash
-cd data
-./get_data.sh
-```
-`data `目录会出现如下几个新的文件：
-```bash
-conll05st-release：the test data set of CoNll-2005 shared task 
-test.wsj.words：the Wall Street Journal data sentences
-test.wsj.props:  the propositional arguments
-feature: the extracted features from data set
-```
-
-## 训练
-### DB-LSTM
-请参阅情绪分析的演示以了解有关长期短期记忆单元的更多信息。
-
-与在 Sentiment Analysis 演示中使用的 Bidirectional-LSTM 不同，DB-LSTM 采用另一种方法来堆叠LSTM层。首先，标准LSTM以正向处理该序列。该 LSTM 层的输入和输出作为下一个 LSTM 层的输入，并被反向处理。这两个标准 LSTM 层组成一对 LSTM。然后我们堆叠一对对的 LSTM 层后得到深度 LSTM 模型。
-
-下图展示了时间扩展的2层 DB-LSTM 网络。
-<center>
-![pic](./network_arch.png)
-</center>
-
-### 特征
-两个输入特性在这个管道中起着至关重要的作用：predicate（pred）和argument（arguments）。 还采用了两个其他特征：谓词上下文（ctx-p）和区域标记（mr）。 因为单个谓词不能精确地描述谓词信息，特别是当相同的词在句子中出现多于一次时。 使用谓词上下文，可以在很大程度上消除歧义。类似地，如果它位于谓词上下文区域中，则使用区域标记 m<sub>r</sub> = 1 来表示参数位置，反之则 m<sub>r</sub> = 0。这四个简单的特征是我们的SRL系统所需要的。上下文大小设置为1的一个样本的特征如下[2]所示：
-<center>
-![pic](./feature.jpg)
-</center>
-
-在这个示例中，相应的标记句子是：
-
-[ <sub>A1</sub> A record date ] has [ <sub>AM-NEG</sub> n't ] been [ <sub>V</sub> set ] . 
-
-在演示中, 我们采用上面的特征模板, 包括：  `argument`, `predicate`, `ctx-p (p=-1,0,1)`, `mark` 并使用 `B/I/O` 方案来标记每个参数。这些特征和标签存储在 `feature` 文件中, 用`\t`分割。
-
-### 数据提供
-
-`dataprovider.py` 是一个包装数据的 Python 文件。 函数 `hook()` 定义了网络的数据槽。六个特征和标签都是索引槽。
-```
-def hook(settings, word_dict, label_dict, **kwargs):
-    settings.word_dict = word_dict
-    settings.label_dict = label_dict
-    #all inputs are integral and sequential type
-    settings.slots = [
-        integer_value_sequence(len(word_dict)),
-        integer_value_sequence(len(predicate_dict)),
-        integer_value_sequence(len(word_dict)),
-        integer_value_sequence(len(word_dict)),
-        integer_value_sequence(len(word_dict)),
-        integer_value_sequence(len(word_dict)),
-        integer_value_sequence(len(word_dict)),
-        integer_value_sequence(2),
-        integer_value_sequence(len(label_dict))]
-```
-相应的数据迭代器如下：
-```
-@provider(init_hook=hook, should_shuffle=True, calc_batch_size=get_batch_size,
-          can_over_batch_size=False, cache=CacheType.CACHE_PASS_IN_MEM)
-def process(settings, file_name):
-    with open(file_name, 'r') as fdata:
-        for line in fdata:
-            sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2,  mark, label = \
-                line.strip().split('\t')
-
-            words = sentence.split()
-            sen_len = len(words)
-            word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
-
-            predicate_slot = [settings.predicate_dict.get(predicate)] * sen_len
-            ctx_n2_slot = [settings.word_dict.get(ctx_n2, UNK_IDX)] * sen_len
-            ctx_n1_slot = [settings.word_dict.get(ctx_n1, UNK_IDX)] * sen_len
-            ctx_0_slot = [settings.word_dict.get(ctx_0, UNK_IDX)] * sen_len
-            ctx_p1_slot = [settings.word_dict.get(ctx_p1, UNK_IDX)] * sen_len
-            ctx_p2_slot = [settings.word_dict.get(ctx_p2, UNK_IDX)] * sen_len
-
-            marks = mark.split()
-            mark_slot = [int(w) for w in marks]
-
-            label_list = label.split()
-            label_slot = [settings.label_dict.get(w) for w in label_list]
-            yield word_slot, predicate_slot, ctx_n2_slot, ctx_n1_slot, \
-                  ctx_0_slot, ctx_p1_slot, ctx_p2_slot, mark_slot, label_slot
-```
-函数 `process` 产出有8个特征和标签的9个表。
-
-### 神经网络配置
-
-`db_lstm.py` 是在训练过程中加载字典并定义数据提供程序模块和网络架构的神经网络配置文件。
-
-九个 `data_layer` 从数据提供程序加载实例。八个特征分别转换为嵌入，并由`mixed_layer`混合。 深度双向LSTM层提取softmax层的特征。目标函数是标签的交叉熵。
-
-### 训练 
-训练的脚本是 `train.sh`，用户只需执行:
-```bash
-  ./train.sh
-```
-`train.sh` 中的内容：
-```
-paddle train \
-  --config=./db_lstm.py \
-  --use_gpu=0 \
-  --log_period=5000 \
-  --trainer_count=1 \
-  --show_parameter_stats_period=5000 \
-  --save_dir=./output \
-  --num_passes=10000 \
-  --average_test_period=10000000 \
-  --init_model_path=./data \
-  --load_missing_parameter_strategy=rand \
-  --test_all_data_in_one_period=1 \
-2>&1 | tee 'train.log'
-```
-
--  \--config=./db_lstm.py : 网络配置文件
--  \--use_gpu=false: 使用 CPU 训练（如果已安装 PaddlePaddle GPU版本并想使用 GPU 训练可以设置为true，目前 crf_layer 不支持 GPU）
--  \--log_period=500: 每20批(batch)输出日志
--  \--trainer_count=1: 设置线程数（或 GPU 数）
--  \--show_parameter_stats_period=5000: 每100批显示参数统计
--  \--save_dir=./output: 模型输出路径
--  \--num_passes=10000: 设置通过数，一次通过意味着PaddlePaddle训练数据集中的所有样本一次
--  \--average_test_period=10000000:  每个 average_test_period 批次对平均参数进行测试
--  \--init_model_path=./data: 参数初始化路径
--  \--load_missing_parameter_strategy=rand: 随机初始不存在的参数
--  \--test_all_data_in_one_period=1: 在一个周期内测试所有数据
-
-
-训练后，模型将保存在目录`output`中。 我们的训练曲线如下：
-<center>
-![pic](./curve.jpg)
-</center>
-
-### 测试
-测试脚本是 `test.sh`, 执行:
-```bash
-  ./test.sh
-```
-`tesh.sh` 的主要部分：
-```
-paddle train \
-  --config=./db_lstm.py \
-  --model_list=$model_list \
-  --job=test \
-  --config_args=is_test=1 \
-```
-
-  - \--config=./db_lstm.py: 网络配置文件
-  - \--model_list=$model_list.list: 模型列表文件
-  - \--job=test: 指示测试任务
-  - \--config_args=is_test=1: 指示测试任务的标记
-  - \--test_all_data_in_one_period=1: 在一个周期内测试所有数据
-  
-
-### 预测
-预测脚本是 `predict.sh`，用户只需执行：
-```bash
-  ./predict.sh
-  
-```
-在`predict.sh`中，用户应该提供网络配置文件，模型路径，标签文件，字典文件，特征文件。
-```
-python predict.py 
-     -c $config_file \
-     -w $best_model_path \
-     -l $label_file \
-     -p $predicate_dict_file  \
-     -d $dict_file \
-     -i $input_file \
-     -o $output_file
-```
-
-`predict.py` 是主要的可执行python脚本，其中包括函数：加载模型，加载数据，数据预测。网络模型将输出标签的概率分布。 在演示中，我们使用最大概率的标签作为结果。用户还可以根据概率分布矩阵实现集束搜索或维特比解码。
-
-预测后，结果保存在 `predict.res` 中。
-
-## 引用
-[1] Martha Palmer, Dan Gildea, and Paul Kingsbury. The Proposition Bank: An Annotated Corpus of Semantic Roles , Computational Linguistics, 31(1), 2005. 
-
-[2] Zhou, Jie, and Wei Xu. "End-to-end learning of semantic role labeling using recurrent neural networks." Proceedings of the Annual Meeting of the Association for Computational Linguistics. 2015.
diff --git a/doc_cn/cluster/k8s/distributed_training_on_kubernetes.md b/doc_cn/cluster/k8s/distributed_training_on_kubernetes.md
index d9ed431ec0566cf90f11ebaeec56560ff69e71fe..64f8fd4b4398ee6ca324584f7cd2418601cb4c57 100644
--- a/doc_cn/cluster/k8s/distributed_training_on_kubernetes.md
+++ b/doc_cn/cluster/k8s/distributed_training_on_kubernetes.md
@@ -306,4 +306,4 @@ I1116 09:10:18.019069    50 ParameterClient2.cpp:122] pserver 2 192.168.223.143:
 I1116 09:10:18.019492    50 ParameterClient2.cpp:122] pserver 3 192.168.223.143:7165
 I1116 09:10:18.019716    50 ParameterClient2.cpp:122] pserver 4 192.168.129.71:7164
 I1116 09:10:18.019836    50 ParameterClient2.cpp:122] pserver 5 192.168.129.71:7165
-```
\ No newline at end of file
+```
diff --git a/doc_cn/cluster/k8s/job.yaml b/doc_cn/cluster/k8s/job.yaml
index 1e0ac464b2ec71e98c28f090124690b01b0755ce..488aad0bede4f940b25c7be04259f209c3de9f52 100644
--- a/doc_cn/cluster/k8s/job.yaml
+++ b/doc_cn/cluster/k8s/job.yaml
@@ -40,4 +40,4 @@ spec:
         - name: jobpath
           mountPath: /home/jobpath       
       restartPolicy: Never
-    
\ No newline at end of file
+    
diff --git a/doc_cn/cluster/k8s/start_paddle.py b/doc_cn/cluster/k8s/start_paddle.py
index 6a461614101aa74f3badf67e65c0d6fcb985ee9b..df00d82919faa2acecc79c28e3d773ba3de9672a 100755
--- a/doc_cn/cluster/k8s/start_paddle.py
+++ b/doc_cn/cluster/k8s/start_paddle.py
@@ -19,7 +19,6 @@ import socket
 import os
 import argparse
 
-
 # configuration for cluster
 API = "/api/v1/namespaces/"
 JOBSELECTOR = "labelSelector=job-name="
@@ -145,8 +144,8 @@ def startPaddle(idMap={}, train_args_dict=None):
 
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(prog="start_paddle.py",
-                                     description='simple tool for k8s')
+    parser = argparse.ArgumentParser(
+        prog="start_paddle.py", description='simple tool for k8s')
     args, train_args_list = parser.parse_known_args()
     train_args = refine_unknown_args(train_args_list)
     train_args_dict = dict(zip(train_args[:-1:2], train_args[1::2]))
diff --git a/doc_cn/demo/sentiment_analysis/index.rst b/doc_cn/demo/sentiment_analysis/index.rst
index 82400b2459ebcaf89ff5e884edfe721b9ec01d7f..9d7972b219851d117b1ce72d8eb83eea256e2f87 100644
--- a/doc_cn/demo/sentiment_analysis/index.rst
+++ b/doc_cn/demo/sentiment_analysis/index.rst
@@ -1,8 +1,8 @@
-情感分析教程
-===========================
-
-.. toctree::
-    :maxdepth: 3
-    :glob:
-
+情感分析教程
+===========================
+
+.. toctree::
+    :maxdepth: 3
+    :glob:
+
     Training Locally <sentiment_analysis.md>
\ No newline at end of file
diff --git a/doc_theme/static/js/paddle_doc_init.js b/doc_theme/static/js/paddle_doc_init.js
index 5c815a8d3a3dab9bdbce544ff3bb49be40ad8934..153ce30745a0a21097fb385f2d66f12e6c8d5be5 100644
--- a/doc_theme/static/js/paddle_doc_init.js
+++ b/doc_theme/static/js/paddle_doc_init.js
@@ -28,4 +28,4 @@ $(document).ready(function(){
     $('.doc-menu-vertical').find('li.current').last().addClass('active');
 
     $('.doc-menu-vertical').perfectScrollbar();
-});
\ No newline at end of file
+});
diff --git a/paddle/api/GradientMachine.cpp b/paddle/api/GradientMachine.cpp
index c1b546dbcb4dc6581bbcfe6a821ab15d0e048ea1..297eaa19bb9981c7f07c90763d76494b7910af93 100644
--- a/paddle/api/GradientMachine.cpp
+++ b/paddle/api/GradientMachine.cpp
@@ -15,8 +15,8 @@ limitations under the License. */
 #include "PaddleAPI.h"
 #include "PaddleAPIPrivate.h"
 
-#include "paddle/gserver/gradientmachines/NeuralNetwork.h"
 #include "Internal.h"
+#include "paddle/gserver/gradientmachines/NeuralNetwork.h"
 
 std::vector<int> GradientMachine::defaultParamTypes = {
     PARAMETER_VALUE, PARAMETER_GRADIENT, PARAMETER_MOMENTUM};
diff --git a/paddle/api/Internal.h b/paddle/api/Internal.h
index 4a07880d80440526002f31b1fccff4f7c25ea182..d48dd3a04c14f559e3c8ceb67226ddb36272e444 100644
--- a/paddle/api/Internal.h
+++ b/paddle/api/Internal.h
@@ -16,14 +16,13 @@ limitations under the License. */
 
 #include "PaddleAPI.h"
 
-#include <vector>
 #include <algorithm>
+#include <vector>
 
 template <typename T1, typename T2>
 void staticCastVector(std::vector<T2>* dest, const std::vector<T1>& src) {
   dest->resize(src.size());
-  std::transform(src.begin(),
-                 src.end(),
-                 dest->begin(),
-                 [](T1 t) { return static_cast<T2>(t); });
+  std::transform(src.begin(), src.end(), dest->begin(), [](T1 t) {
+    return static_cast<T2>(t);
+  });
 }
diff --git a/paddle/api/Matrix.cpp b/paddle/api/Matrix.cpp
index d4c00e7093d1ed62b37ff2ce05e44fc9bdbc204a..7c375e5cfb91fc5824f823346af6f80c90b36821 100644
--- a/paddle/api/Matrix.cpp
+++ b/paddle/api/Matrix.cpp
@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "PaddleAPI.h"
 #include "paddle/math/Matrix.h"
-#include "paddle/math/SparseMatrix.h"
-#include "paddle/math/CpuSparseMatrix.h"
-#include <iostream>
 #include <cstring>
+#include <iostream>
+#include "PaddleAPI.h"
+#include "paddle/math/CpuSparseMatrix.h"
+#include "paddle/math/SparseMatrix.h"
 
 struct MatrixPrivate {
   std::shared_ptr<paddle::Matrix> mat;
diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h
index f3c80e3b06ebd824f44ebec49158bd06e25b1a1c..84a66719c33678fc4aeb038bb81a6b7c5d0c93fb 100644
--- a/paddle/api/PaddleAPI.h
+++ b/paddle/api/PaddleAPI.h
@@ -16,8 +16,8 @@ limitations under the License. */
 
 #include <stddef.h>
 #include <stdint.h>
-#include <string>
 #include <stdexcept>
+#include <string>
 #include <vector>
 #include "paddle/utils/GlobalConstants.h"
 #include "paddle/utils/TypeDefs.h"
diff --git a/paddle/api/Parameter.cpp b/paddle/api/Parameter.cpp
index 742ad0679cf090b826405db1d2b24de206ed8b32..4eed00a84a695f2c48ff93b33419ae2b3dd03768 100644
--- a/paddle/api/Parameter.cpp
+++ b/paddle/api/Parameter.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "PaddleAPI.h"
 #include "paddle/parameter/Parameter.h"
+#include "PaddleAPI.h"
 
 struct ParameterPrivate {
   std::shared_ptr<paddle::Parameter> sharedPtr;
diff --git a/paddle/api/ParameterOptimizer.cpp b/paddle/api/ParameterOptimizer.cpp
index 606dccd5ac4a4e12a7fe414627e53540f594184a..21b851dd5e26c4752888067b20d0b1e16a4ab52d 100644
--- a/paddle/api/ParameterOptimizer.cpp
+++ b/paddle/api/ParameterOptimizer.cpp
@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "PaddleAPI.h"
-#include "PaddleAPIPrivate.h"
 #include "paddle/parameter/ParameterOptimizer.h"
-#include "Internal.h"
 #include <algorithm>
+#include "Internal.h"
+#include "PaddleAPI.h"
+#include "PaddleAPIPrivate.h"
 
 struct ParameterOptimizerPrivate {
   std::unique_ptr<paddle::ParameterOptimizer> optimizer;
@@ -36,16 +36,13 @@ struct ParameterTraverseCallbackPrivate {
              size_t sparseId) {
     std::vector<paddle::VectorPtr> real_vecs;
     real_vecs.resize(vecs.size());
-    std::transform(vecs.begin(),
-                   vecs.end(),
-                   real_vecs.begin(),
-                   [](Vector* v) {
-                     if (v) {
-                       return *(paddle::VectorPtr*)(v->getSharedPtr());
-                     } else {
-                       return paddle::VectorPtr();
-                     }
-                   });
+    std::transform(vecs.begin(), vecs.end(), real_vecs.begin(), [](Vector* v) {
+      if (v) {
+        return *(paddle::VectorPtr*)(v->getSharedPtr());
+      } else {
+        return paddle::VectorPtr();
+      }
+    });
 
     paddle::ParameterConfig& real_conf =
         *(paddle::ParameterConfig*)(const_cast<ParameterConfig&>(conf)
diff --git a/paddle/api/SequenceGenerator.cpp b/paddle/api/SequenceGenerator.cpp
index 5c65b34f2393dd0d41fcf5293f5a4ed8a402beb6..8428edc60df6219fd1d3aebf74b0911a79d370cb 100644
--- a/paddle/api/SequenceGenerator.cpp
+++ b/paddle/api/SequenceGenerator.cpp
@@ -12,14 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include <algorithm>
+#include <iterator>
+#include <sstream>
+#include <vector>
 #include "PaddleAPI.h"
 #include "paddle/gserver/gradientmachines/GradientMachine.h"
 #include "paddle/parameter/Argument.h"
 #include "paddle/utils/Flags.h"
-#include <vector>
-#include <sstream>
-#include <algorithm>
-#include <iterator>
 
 // used to represent partial sequence
 struct Path {
diff --git a/paddle/api/Trainer.cpp b/paddle/api/Trainer.cpp
index 9aeb874bdcee8101d255b8d0fbc80b82647f80f1..59b47d4b1c7b6d586e89624c155d7ba6f3885eb6 100644
--- a/paddle/api/Trainer.cpp
+++ b/paddle/api/Trainer.cpp
@@ -16,12 +16,12 @@ limitations under the License. */
 #include "PaddleAPIPrivate.h"
 
 #include <stdlib.h>
-#include <memory>
 #include <atomic>
+#include <memory>
 
+#include "paddle/gserver/gradientmachines/NeuralNetwork.h"
 #include "paddle/trainer/ParamUtil.h"
 #include "paddle/trainer/Trainer.h"
-#include "paddle/gserver/gradientmachines/NeuralNetwork.h"
 #include "paddle/trainer/TrainerInternal.h"
 #include "paddle/utils/Flags.h"
 
diff --git a/paddle/api/Util.cpp b/paddle/api/Util.cpp
index 0c9c048099771653c56d922ef106b23881e965f3..c3f739568f50b6ee8b0894d06a4d7f91c7816879 100644
--- a/paddle/api/Util.cpp
+++ b/paddle/api/Util.cpp
@@ -14,16 +14,16 @@ limitations under the License. */
 
 #include "PaddleAPI.h"
 
-#include "paddle/utils/Util.h"
-#include "paddle/utils/PythonUtil.h"
-#include "paddle/utils/Flags.h"
-#include "paddle/utils/Excepts.h"
 #include "paddle/parameter/Parameter.h"
+#include "paddle/utils/Excepts.h"
+#include "paddle/utils/Flags.h"
+#include "paddle/utils/PythonUtil.h"
+#include "paddle/utils/Util.h"
 
 #include <fenv.h>
+#include <algorithm>
 #include <iostream>
 #include <iterator>
-#include <algorithm>
 
 void initPaddle(int argc, char** argv) {
   paddle::initMain(argc, argv);
diff --git a/paddle/api/Vector.cpp b/paddle/api/Vector.cpp
index 4f3ab7de60d28415368500597ced7a11afbfa30c..874f2fd044e9e86b44f8ca69f08bdfd3287d4749 100644
--- a/paddle/api/Vector.cpp
+++ b/paddle/api/Vector.cpp
@@ -282,7 +282,7 @@ FloatArray Vector::getData() const {
 }
 
 void Vector::copyFrom(Vector* src) throw(RangeError) {
-  if (src->m->vec->getSize() !=  m->vec->getSize()) {
+  if (src->m->vec->getSize() != m->vec->getSize()) {
     throw RangeError();
   }
   m->vec->copyFrom(*src->m->vec);
diff --git a/paddle/api/test/testMatrix.py b/paddle/api/test/testMatrix.py
index f76f84d2e12af7802532b014d3983fe017fbe2b1..37666bdccc9aedfe8f8079124129aad2ade53a43 100644
--- a/paddle/api/test/testMatrix.py
+++ b/paddle/api/test/testMatrix.py
@@ -100,11 +100,12 @@ class TestMatrix(unittest.TestCase):
 
             for a, e in zip(gpu_m.getData(), [1.0, 3.23, 3.0, 4.0, 5.0, 6.0]):
                 self.assertAlmostEqual(a, e)
-    
+
     def test_numpy(self):
         numpy_mat = np.matrix([[1, 2], [3, 4], [5, 6]], dtype="float32")
         m = swig_paddle.Matrix.createDenseFromNumpy(numpy_mat)
-        self.assertEqual((int(m.getHeight()), int(m.getWidth())), numpy_mat.shape)
+        self.assertEqual((int(m.getHeight()), int(m.getWidth())),
+                         numpy_mat.shape)
         self.assertEqual(m.isGpu(), swig_paddle.isUsingGpu())
         for a, e in zip(m.getData(), [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]):
             self.assertAlmostEqual(a, e)
diff --git a/paddle/api/test/testVector.py b/paddle/api/test/testVector.py
index 525ed97eddbc51188f8c4a6d5c5c1c13ce08bac2..1ab095c1d3d0d2c84d2d2f95a03f172b901de209 100644
--- a/paddle/api/test/testVector.py
+++ b/paddle/api/test/testVector.py
@@ -26,17 +26,17 @@ class TestIVector(unittest.TestCase):
             self.assertEqual(m[i], 0)
             m[i] = i
             self.assertEqual(m[i], i)
-        
+
         m = swig_paddle.IVector.createZero(10)
         self.assertEqual(m.isGpu(), swig_paddle.isUsingGpu())
-        self.assertEqual(m.getData(), [0]*10)
+        self.assertEqual(m.getData(), [0] * 10)
 
     def test_create(self):
         m = swig_paddle.IVector.create(range(10), False)
         self.assertIsNotNone(m)
         for i in xrange(10):
             self.assertEqual(m[i], i)
-        
+
         m = swig_paddle.IVector.create(range(10))
         self.assertEqual(m.isGpu(), swig_paddle.isUsingGpu())
         self.assertEqual(m.getData(), range(10))
@@ -69,7 +69,7 @@ class TestIVector(unittest.TestCase):
             expect_vec = range(0, 10)
             expect_vec[4] = 7
             self.assertEqual(vec.getData(), expect_vec)
-    
+
     def test_numpy(self):
         vec = np.array([1, 3, 4, 65, 78, 1, 4], dtype="int32")
         iv = swig_paddle.IVector.createVectorFromNumpy(vec)
@@ -85,10 +85,10 @@ class TestVector(unittest.TestCase):
             self.assertTrue(util.doubleEqual(v[i], 0))
             v[i] = i
             self.assertTrue(util.doubleEqual(v[i], i))
-        
+
         v = swig_paddle.Vector.createZero(10)
         self.assertEqual(v.isGpu(), swig_paddle.isUsingGpu())
-        self.assertEqual(v.getData(), [0]*10)
+        self.assertEqual(v.getData(), [0] * 10)
 
     def testCreate(self):
         v = swig_paddle.Vector.create([x / 100.0 for x in xrange(100)], False)
@@ -96,14 +96,13 @@ class TestVector(unittest.TestCase):
         for i in xrange(len(v)):
             self.assertTrue(util.doubleEqual(v[i], i / 100.0))
         self.assertEqual(100, len(v))
-        
+
         v = swig_paddle.Vector.create([x / 100.0 for x in xrange(100)])
         self.assertEqual(v.isGpu(), swig_paddle.isUsingGpu())
         self.assertEqual(100, len(v))
         vdata = v.getData()
         for i in xrange(len(v)):
             self.assertTrue(util.doubleEqual(vdata[i], i / 100.0))
-        
 
     def testCpuNumpy(self):
         numpy_arr = np.array([1.2, 2.3, 3.4, 4.5], dtype="float32")
@@ -128,7 +127,7 @@ class TestVector(unittest.TestCase):
 
         for i in xrange(1, len(numpy_3)):
             util.doubleEqual(numpy_3[i], vec[i])
-    
+
     def testNumpy(self):
         numpy_arr = np.array([1.2, 2.3, 3.4, 4.5], dtype="float32")
         vec = swig_paddle.Vector.createVectorFromNumpy(numpy_arr)
@@ -136,7 +135,6 @@ class TestVector(unittest.TestCase):
         vecData = vec.getData()
         for n, v in zip(numpy_arr, vecData):
             self.assertTrue(util.doubleEqual(n, v))
-        
 
     def testCopyFromNumpy(self):
         vec = swig_paddle.Vector.createZero(1, False)
diff --git a/paddle/cuda/include/hl_base.h b/paddle/cuda/include/hl_base.h
index 0b9dfc6117685b48102a0681b38f25493259d624..84c5f2d5c91feb7896643d2c5f60a279ebe944e7 100644
--- a/paddle/cuda/include/hl_base.h
+++ b/paddle/cuda/include/hl_base.h
@@ -223,9 +223,9 @@ typedef struct {
 
 #ifdef __NVCC__
 
-#include "paddle/utils/Logging.h"
-#include "hl_cuda.h"
 #include "cuda_runtime.h"
+#include "hl_cuda.h"
+#include "paddle/utils/Logging.h"
 
 extern __thread bool g_sync_flag;
 extern __thread cudaStream_t default_stream;
diff --git a/paddle/cuda/include/hl_dso_loader.h b/paddle/cuda/include/hl_dso_loader.h
index 9ddf0e61ee5ecb49e02ac7f6f35e4961cb2119f1..20c13f21e61a92b0635b686f6f724ae2b44518cc 100644
--- a/paddle/cuda/include/hl_dso_loader.h
+++ b/paddle/cuda/include/hl_dso_loader.h
@@ -16,8 +16,8 @@ limitations under the License. */
 #define HL_DSO_LOADER_H_
 
 #include <dlfcn.h>
-#include <string>
 #include <memory>
+#include <string>
 #include "hl_base.h"
 
 /**
diff --git a/paddle/cuda/include/hl_gpu.h b/paddle/cuda/include/hl_gpu.h
index aad0450c8c9b0ce7ed647962fdf94985c2f4a6fc..ede2670882ee2b93f610a2261a4ecc1784bc2d0c 100644
--- a/paddle/cuda/include/hl_gpu.h
+++ b/paddle/cuda/include/hl_gpu.h
@@ -15,28 +15,28 @@ limitations under the License. */
 #ifndef HL_GPU_H_
 #define HL_GPU_H_
 
+#include "hl_aggregate.h"
 #include "hl_base.h"
+#include "hl_cnn.h"
 #include "hl_cuda.h"
 #include "hl_cuda_cublas.h"
 #include "hl_cuda_cudnn.h"
-#include "hl_matrix.h"
-#include "hl_aggregate.h"
-#include "hl_cnn.h"
-#include "hl_sparse.h"
 #include "hl_lstm.h"
+#include "hl_matrix.h"
 #include "hl_sequence.h"
+#include "hl_sparse.h"
 #include "hl_warpctc_wrap.h"
 
 #ifdef HPPL_STUB_FUNC
-#include "stub/hl_cuda_stub.h"
-#include "stub/hl_cuda_cublas_stub.h"
-#include "stub/hl_cuda_cudnn_stub.h"
-#include "stub/hl_matrix_stub.h"
 #include "stub/hl_aggregate_stub.h"
 #include "stub/hl_cnn_stub.h"
-#include "stub/hl_sparse_stub.h"
+#include "stub/hl_cuda_cublas_stub.h"
+#include "stub/hl_cuda_cudnn_stub.h"
+#include "stub/hl_cuda_stub.h"
 #include "stub/hl_lstm_stub.h"
+#include "stub/hl_matrix_stub.h"
 #include "stub/hl_sequence_stub.h"
+#include "stub/hl_sparse_stub.h"
 #endif
 
 #endif /* HL_GPU_H_ */
diff --git a/paddle/cuda/include/hl_time.h b/paddle/cuda/include/hl_time.h
index f214b055f98de8eae76554bb4ec1deb868903750..f63f02582060156562061f73c429fc7bbd878d2c 100644
--- a/paddle/cuda/include/hl_time.h
+++ b/paddle/cuda/include/hl_time.h
@@ -14,7 +14,7 @@ limitations under the License. */
 
 #ifndef HL_TIME_H_
 #define HL_TIME_H_
-
+#include <cstdint>
 /**
  * @brief   High resolution timer.
  *
diff --git a/paddle/cuda/src/hl_cuda_cublas.cc b/paddle/cuda/src/hl_cuda_cublas.cc
index 7cede8c63c8a6503b3cdb73f9cb6d01cba23af7a..182e8ab218cce18448f8a08f5c1a1dab7e38f2b6 100644
--- a/paddle/cuda/src/hl_cuda_cublas.cc
+++ b/paddle/cuda/src/hl_cuda_cublas.cc
@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "hl_cuda_cublas.h"
 #include <sys/time.h>
 #include <mutex>
 #include "hl_cuda.h"
-#include "hl_cuda_cublas.h"
-#include "hl_thread.ph"
 #include "hl_dso_loader.h"
+#include "hl_thread.ph"
 #include "paddle/utils/Logging.h"
 
 namespace dynload {
diff --git a/paddle/cuda/src/hl_cuda_cudnn.cc b/paddle/cuda/src/hl_cuda_cudnn.cc
index 9c9b8906c2b3137be6fbbe79a2cbc126f9b8e6f7..7111224d599f0d67395254a95d7f63110a6a87c4 100644
--- a/paddle/cuda/src/hl_cuda_cudnn.cc
+++ b/paddle/cuda/src/hl_cuda_cudnn.cc
@@ -12,14 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "hl_cuda_cudnn.h"
 #include <cudnn.h>
 #include <mutex>
-#include "hl_cuda_cudnn.h"
 #include "hl_cuda_cudnn.ph"
-#include "hl_thread.ph"
 #include "hl_dso_loader.h"
-#include "paddle/utils/Logging.h"
+#include "hl_thread.ph"
 #include "paddle/utils/CommandLineParser.h"
+#include "paddle/utils/Logging.h"
 
 P_DEFINE_int32(cudnn_conv_workspace_limit_in_mb,
                4096,
diff --git a/paddle/cuda/src/hl_cuda_device.cc b/paddle/cuda/src/hl_cuda_device.cc
index d1814482929768ea6626459ca51af5ad527e7b43..b0bba73594d0f7d4aba02745d78da68f0baa3f8a 100644
--- a/paddle/cuda/src/hl_cuda_device.cc
+++ b/paddle/cuda/src/hl_cuda_device.cc
@@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "hl_cuda.h"
 #include <cuda_profiler_api.h>
 #include <string.h>
 #include <sys/syscall.h>
 #include <sys/time.h>
 #include <unistd.h>
 #include <mutex>
-#include "hl_cuda.h"
 #include "hl_cuda.ph"
 #include "hl_dso_loader.h"
 #include "hl_thread.ph"
diff --git a/paddle/cuda/src/hl_cudart_wrap.cc b/paddle/cuda/src/hl_cudart_wrap.cc
index a3ac750b530eb10f3889a3ab3cdef7330037acc1..ecc03a729dde2f2b4f8f004234a47d9272997a50 100644
--- a/paddle/cuda/src/hl_cudart_wrap.cc
+++ b/paddle/cuda/src/hl_cudart_wrap.cc
@@ -14,8 +14,8 @@ limitations under the License. */
 
 #ifdef PADDLE_USE_DSO
 
-#include <mutex>
 #include <cuda_runtime.h>
+#include <mutex>
 #include "hl_dso_loader.h"
 
 /**
diff --git a/paddle/cuda/src/hl_time.cc b/paddle/cuda/src/hl_time.cc
index 300506589967bb257b6d2ea1ca39a6dfd592d98d..7e5d7e8aaecbcdc61c1e5b5006a2958d4dc84460 100644
--- a/paddle/cuda/src/hl_time.cc
+++ b/paddle/cuda/src/hl_time.cc
@@ -12,10 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <chrono>
+#include "hl_time.h"
 #include <stdlib.h>
+#include <chrono>
+#include <cstdint>
 #include <iostream>
-#include "hl_time.h"
 
 using std::chrono::high_resolution_clock;
 
diff --git a/paddle/cuda/src/hl_warpctc_wrap.cc b/paddle/cuda/src/hl_warpctc_wrap.cc
index 619b90120f6c86f966154a9e6902db8469500629..9ae8bc0f220e143a5c59d8c3ead012a20369e7b9 100644
--- a/paddle/cuda/src/hl_warpctc_wrap.cc
+++ b/paddle/cuda/src/hl_warpctc_wrap.cc
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <mutex>
 #include "hl_warpctc_wrap.h"
+#include <mutex>
 #include "hl_dso_loader.h"
 #include "paddle/utils/Logging.h"
 
diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp
index f1d09c568db875d847564380179a8ccc6d0d3049..f8c4bcac2f8eb41400659dc24ba81768e7ae3640 100644
--- a/paddle/gserver/activations/ActivationFunction.cpp
+++ b/paddle/gserver/activations/ActivationFunction.cpp
@@ -15,13 +15,13 @@ limitations under the License. */
 #include "ActivationFunction.h"
 
 #include <algorithm>
-#include <memory>
 #include <iostream>
-#include <type_traits>
+#include <memory>
 #include <string>
 #include <thread>
-#include "paddle/utils/ClassRegistrar.h"
+#include <type_traits>
 #include "paddle/parameter/Argument.h"
+#include "paddle/utils/ClassRegistrar.h"
 
 #include "paddle/utils/Logging.h"
 
diff --git a/paddle/gserver/dataproviders/DataProvider.cpp b/paddle/gserver/dataproviders/DataProvider.cpp
index 55ca62543aa33cf40d1f69d0fa1d6348ccdf1251..0478256f9cd81f4a99eb0cbcbd1a5a21de5cf14b 100644
--- a/paddle/gserver/dataproviders/DataProvider.cpp
+++ b/paddle/gserver/dataproviders/DataProvider.cpp
@@ -14,12 +14,12 @@ limitations under the License. */
 
 #include "DataProvider.h"
 
-#include "paddle/utils/Util.h"
-#include "paddle/utils/StringUtil.h"
-#include "paddle/utils/Logging.h"
-#include <algorithm>
 #include <unistd.h>
+#include <algorithm>
 #include "ProtoDataProvider.h"
+#include "paddle/utils/Logging.h"
+#include "paddle/utils/StringUtil.h"
+#include "paddle/utils/Util.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/dataproviders/DataProvider.h b/paddle/gserver/dataproviders/DataProvider.h
index 5b854936c6c34926b789436efe58f193aff5cb9d..9b7f7e36cedaa230ae0694d87cc033bd6fa6e652 100644
--- a/paddle/gserver/dataproviders/DataProvider.h
+++ b/paddle/gserver/dataproviders/DataProvider.h
@@ -14,28 +14,28 @@ limitations under the License. */
 
 #pragma once
 
-#include <vector>
-#include <memory>
-#include <mutex>
-#include <iostream>
-#include <fstream>
 #include <stdint.h>
-#include <string.h>
-#include <stdlib.h>
 #include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <mutex>
+#include <vector>
 
+#include "DataConfig.pb.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/SparseMatrix.h"
+#include "paddle/math/Vector.h"
+#include "paddle/parameter/Argument.h"
+#include "paddle/utils/ClassRegistrar.h"
+#include "paddle/utils/Locks.h"
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Queue.h"
-#include "paddle/utils/Locks.h"
 #include "paddle/utils/ThreadLocal.h"
 #include "paddle/utils/TypeDefs.h"
-#include "paddle/math/Matrix.h"
-#include "paddle/math/SparseMatrix.h"
 #include "paddle/utils/Util.h"
-#include "paddle/math/Vector.h"
-#include "DataConfig.pb.h"
-#include "paddle/utils/ClassRegistrar.h"
-#include "paddle/parameter/Argument.h"
 
 namespace paddle {
 /**
diff --git a/paddle/gserver/dataproviders/MultiDataProvider.cpp b/paddle/gserver/dataproviders/MultiDataProvider.cpp
index e1fc4c93656bdeafc8d96d7a822104787e084cdf..46fe053768e480c5f69f597c49f363cb966a4168 100644
--- a/paddle/gserver/dataproviders/MultiDataProvider.cpp
+++ b/paddle/gserver/dataproviders/MultiDataProvider.cpp
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Util.h"
 #include "MultiDataProvider.h"
-#include "paddle/utils/Logging.h"
 #include <algorithm>
+#include "paddle/utils/Logging.h"
+#include "paddle/utils/Util.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/dataproviders/ProtoDataProvider.cpp b/paddle/gserver/dataproviders/ProtoDataProvider.cpp
index 6a0cb5ef63bc7bf4232ed56ebca775790b89cd31..d16ecca2d977478e7e7f8819f3b5a5ea48e69b07 100644
--- a/paddle/gserver/dataproviders/ProtoDataProvider.cpp
+++ b/paddle/gserver/dataproviders/ProtoDataProvider.cpp
@@ -13,14 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "ProtoDataProvider.h"
-#include "paddle/utils/Util.h"
-#include "paddle/utils/StringUtil.h"
 #include <algorithm>
 #include <fstream>
 #include <istream>
+#include "paddle/utils/StringUtil.h"
+#include "paddle/utils/Util.h"
 
-#include "paddle/utils/Logging.h"
 #include "DataProviderGroup.h"
+#include "paddle/utils/Logging.h"
 
 P_DEFINE_double(memory_threshold_on_load_data,
                 1.0,
@@ -562,16 +562,16 @@ int64_t ProtoDataProvider::getNextBatchInternal(int64_t size,
         auto mat = cpuArguments[slot].value;
         mat->resize(size, dim);
         if (std::dynamic_pointer_cast<GpuSparseMatrix>(mat)) {
-          std::dynamic_pointer_cast<GpuSparseMatrix>(mat)
-              ->copyFrom(dataPos.data(),
-                         slots_[slot].indices.data(),
-                         slots_[slot].sparseNonValueData.data(),
-                         HPPL_STREAM_1);
+          std::dynamic_pointer_cast<GpuSparseMatrix>(mat)->copyFrom(
+              dataPos.data(),
+              slots_[slot].indices.data(),
+              slots_[slot].sparseNonValueData.data(),
+              HPPL_STREAM_1);
         } else if (std::dynamic_pointer_cast<CpuSparseMatrix>(mat)) {
-          std::dynamic_pointer_cast<CpuSparseMatrix>(mat)
-              ->copyFrom(dataPos.data(),
-                         slots_[slot].indices.data(),
-                         slots_[slot].sparseNonValueData.data());
+          std::dynamic_pointer_cast<CpuSparseMatrix>(mat)->copyFrom(
+              dataPos.data(),
+              slots_[slot].indices.data(),
+              slots_[slot].sparseNonValueData.data());
         } else {
           LOG(FATAL) << "Not Supported";
         }
@@ -598,16 +598,16 @@ int64_t ProtoDataProvider::getNextBatchInternal(int64_t size,
         auto mat = cpuArguments[slot].value;
         mat->resize(size, dim);
         if (std::dynamic_pointer_cast<GpuSparseMatrix>(mat)) {
-          std::dynamic_pointer_cast<GpuSparseMatrix>(mat)
-              ->copyFrom(dataPos.data(),
-                         slots_[slot].indices.data(),
-                         slots_[slot].sparseFloatValueData.data(),
-                         HPPL_STREAM_1);
+          std::dynamic_pointer_cast<GpuSparseMatrix>(mat)->copyFrom(
+              dataPos.data(),
+              slots_[slot].indices.data(),
+              slots_[slot].sparseFloatValueData.data(),
+              HPPL_STREAM_1);
         } else if (std::dynamic_pointer_cast<CpuSparseMatrix>(mat)) {
-          std::dynamic_pointer_cast<CpuSparseMatrix>(mat)
-              ->copyFrom(dataPos.data(),
-                         slots_[slot].indices.data(),
-                         slots_[slot].sparseFloatValueData.data());
+          std::dynamic_pointer_cast<CpuSparseMatrix>(mat)->copyFrom(
+              dataPos.data(),
+              slots_[slot].indices.data(),
+              slots_[slot].sparseFloatValueData.data());
         } else {
           LOG(FATAL) << "Not Supported";
         }
diff --git a/paddle/gserver/dataproviders/ProtoDataProvider.h b/paddle/gserver/dataproviders/ProtoDataProvider.h
index 9ec5cb97c02d80b40371409c00e2487dceb3757c..7dd45e062248f20d24c633dd4e1c8b7eebcbfa1b 100644
--- a/paddle/gserver/dataproviders/ProtoDataProvider.h
+++ b/paddle/gserver/dataproviders/ProtoDataProvider.h
@@ -16,8 +16,8 @@ limitations under the License. */
 
 #include <vector>
 
-#include "paddle/utils/Stat.h"
 #include "DataFormat.pb.h"
+#include "paddle/utils/Stat.h"
 
 #include "DataProvider.h"
 #include "ProtoReader.h"
diff --git a/paddle/gserver/dataproviders/ProtoReader.h b/paddle/gserver/dataproviders/ProtoReader.h
index 6708e7cde7b5db5e739cc4bbf9bc04a124fe9703..4e6f58a5292bec276994fde0764278d12d7ae9d5 100644
--- a/paddle/gserver/dataproviders/ProtoReader.h
+++ b/paddle/gserver/dataproviders/ProtoReader.h
@@ -16,10 +16,10 @@ limitations under the License. */
 
 #include <memory>
 
-#include <google/protobuf/message_lite.h>
 #include <google/protobuf/io/coded_stream.h>
-#include <google/protobuf/io/zero_copy_stream_impl.h>
 #include <google/protobuf/io/gzip_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/message_lite.h>
 
 namespace paddle {
 
diff --git a/paddle/gserver/dataproviders/PyDataProvider.cpp b/paddle/gserver/dataproviders/PyDataProvider.cpp
index f5dcbfcf3464a027a3a8f2a67e66037a4495848c..5bdd55309c8bf8d5dcf84f5dcef2c5c85249a668 100644
--- a/paddle/gserver/dataproviders/PyDataProvider.cpp
+++ b/paddle/gserver/dataproviders/PyDataProvider.cpp
@@ -13,10 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "PyDataProvider.h"
-#include "paddle/utils/PythonUtil.h"
 #include <fenv.h>
-#include "paddle/utils/Util.h"
 #include "paddle/utils/Excepts.h"
+#include "paddle/utils/PythonUtil.h"
+#include "paddle/utils/Util.h"
 
 namespace paddle {
 
@@ -316,16 +316,16 @@ void PyDataProvider::handleSparseNonValueSlot(
   auto mat = cpuArguments[slotIndex].value;
   mat->resize(slot.sampleNum, dim, slot.sampleNum, NO_VALUE, SPARSE_CSR);
   if (std::dynamic_pointer_cast<GpuSparseMatrix>(mat)) {
-    std::dynamic_pointer_cast<GpuSparseMatrix>(mat)
-        ->copyFrom(slot.sampleSequenceIdVec.data(),
-                   slot.indices.data(),
-                   slot.sparseNonValueData.data(),
-                   HPPL_STREAM_1);
+    std::dynamic_pointer_cast<GpuSparseMatrix>(mat)->copyFrom(
+        slot.sampleSequenceIdVec.data(),
+        slot.indices.data(),
+        slot.sparseNonValueData.data(),
+        HPPL_STREAM_1);
   } else if (std::dynamic_pointer_cast<CpuSparseMatrix>(mat)) {
-    std::dynamic_pointer_cast<CpuSparseMatrix>(mat)
-        ->copyFrom(slot.sampleSequenceIdVec.data(),
-                   slot.indices.data(),
-                   slot.sparseNonValueData.data());
+    std::dynamic_pointer_cast<CpuSparseMatrix>(mat)->copyFrom(
+        slot.sampleSequenceIdVec.data(),
+        slot.indices.data(),
+        slot.sparseNonValueData.data());
   } else {
     LOG(FATAL) << "Not Supported";
   }
@@ -347,16 +347,16 @@ void PyDataProvider::handleSparseValueSlot(
   auto mat = cpuArguments[slotIndex].value;
   mat->resize(slot.sampleNum, dim, slot.sampleNum, FLOAT_VALUE, SPARSE_CSR);
   if (std::dynamic_pointer_cast<GpuSparseMatrix>(mat)) {
-    std::dynamic_pointer_cast<GpuSparseMatrix>(mat)
-        ->copyFrom(slot.sampleSequenceIdVec.data(),
-                   slot.indices.data(),
-                   slot.sparseFloatValueData.data(),
-                   HPPL_STREAM_DEFAULT);
+    std::dynamic_pointer_cast<GpuSparseMatrix>(mat)->copyFrom(
+        slot.sampleSequenceIdVec.data(),
+        slot.indices.data(),
+        slot.sparseFloatValueData.data(),
+        HPPL_STREAM_DEFAULT);
   } else if (std::dynamic_pointer_cast<CpuSparseMatrix>(mat)) {
-    std::dynamic_pointer_cast<CpuSparseMatrix>(mat)
-        ->copyFrom(slot.sampleSequenceIdVec.data(),
-                   slot.indices.data(),
-                   slot.sparseFloatValueData.data());
+    std::dynamic_pointer_cast<CpuSparseMatrix>(mat)->copyFrom(
+        slot.sampleSequenceIdVec.data(),
+        slot.indices.data(),
+        slot.sparseFloatValueData.data());
   } else {
     LOG(FATAL) << "Not Supported";
   }
diff --git a/paddle/gserver/dataproviders/PyDataProvider2.cpp b/paddle/gserver/dataproviders/PyDataProvider2.cpp
index 8b04a03f6d26df5eee44fe112bea7bb53f7ef5a7..460efc5adc6f017e91dc9daff6ab32312e4460c1 100644
--- a/paddle/gserver/dataproviders/PyDataProvider2.cpp
+++ b/paddle/gserver/dataproviders/PyDataProvider2.cpp
@@ -15,18 +15,18 @@ limitations under the License. */
 #ifndef PADDLE_NO_PYTHON
 
 #include <Python.h>
+#include <numpy/numpyconfig.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <unordered_set>
 #include <list>
-#include <numpy/numpyconfig.h>
+#include <unordered_set>
 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 #include <numpy/ndarrayobject.h>
 
 #include "DataProvider.h"
 
-#include "paddle/utils/PythonUtil.h"
 #include "paddle/utils/Locks.h"
+#include "paddle/utils/PythonUtil.h"
 #include "paddle/utils/Stat.h"
 
 namespace paddle {
@@ -400,10 +400,9 @@ private:
 
       if (this->loadThread_) {  // wait poolActualSize < poolSize;
         std::unique_lock<std::mutex> l(mtx_);
-        pushCV_.wait(l,
-                     [this, additionalBatchSize] {
-                       return this->poolActualSize_ < poolSize_;
-                     });
+        pushCV_.wait(l, [this, additionalBatchSize] {
+          return this->poolActualSize_ < poolSize_;
+        });
       }
 
       {
@@ -529,12 +528,10 @@ public:
                         // but, loading from cache, cache object should ensure
                         // data pool ready.
       std::unique_lock<std::mutex> l(mtx_);
-      pullCV_.wait(l,
-                   [this, &size] {
-                     return this->poolActualSize_ >=
-                                std::max(size, this->minPoolSize_) ||
-                            callingContexts_.empty();
-                   });
+      pullCV_.wait(l, [this, &size] {
+        return this->poolActualSize_ >= std::max(size, this->minPoolSize_) ||
+               callingContexts_.empty();
+      });
 
       if (unittest::OnPoolFilled) {
         (*unittest::OnPoolFilled)(this->poolActualSize_);
diff --git a/paddle/gserver/evaluators/Evaluator.cpp b/paddle/gserver/evaluators/Evaluator.cpp
index aa6dc7cb86cbbda6bac8823614901a0c2d175278..7556d21e01e0314d3ee17fa37642081174ec41f3 100644
--- a/paddle/gserver/evaluators/Evaluator.cpp
+++ b/paddle/gserver/evaluators/Evaluator.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Stat.h"
 #include "paddle/gserver/evaluators/Evaluator.h"
+#include "paddle/utils/Stat.h"
 
 #include "paddle/gserver/gradientmachines/NeuralNetwork.h"
 
@@ -842,9 +842,9 @@ void PnpairEvaluator::calc(std::vector<PredictionResult>& predictArray) {
   auto start = predictArray.begin();
   while (start != predictArray.end()) {
     auto end = std::find_if(
-        start + 1,
-        predictArray.end(),
-        [=](const PredictionResult& x) { return x.queryid != start->queryid; });
+        start + 1, predictArray.end(), [=](const PredictionResult& x) {
+          return x.queryid != start->queryid;
+        });
     CHECK(end != start);
     stat(start - predictArray.begin(),
          end - predictArray.begin(),
diff --git a/paddle/gserver/evaluators/Evaluator.h b/paddle/gserver/evaluators/Evaluator.h
index a26c650c388d826d635fb1b98ac4da28a8bbb148..5770847309670ef1856cfb9255fa847c24513b56 100644
--- a/paddle/gserver/evaluators/Evaluator.h
+++ b/paddle/gserver/evaluators/Evaluator.h
@@ -14,11 +14,11 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/pserver/ParameterClient2.h"
-#include "paddle/utils/ClassRegistrar.h"
+#include <fstream>
 #include "ModelConfig.pb.h"
 #include "paddle/parameter/Argument.h"
-#include <fstream>
+#include "paddle/pserver/ParameterClient2.h"
+#include "paddle/utils/ClassRegistrar.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/gradientmachines/GradientMachine.cpp b/paddle/gserver/gradientmachines/GradientMachine.cpp
index 6adee05dbee1fa9db9ea98fb27fb5e8a4e8ef328..36ca05b919b136c162105cf4f1fb7705ae7ca7f3 100644
--- a/paddle/gserver/gradientmachines/GradientMachine.cpp
+++ b/paddle/gserver/gradientmachines/GradientMachine.cpp
@@ -14,16 +14,16 @@ limitations under the License. */
 
 #include "GradientMachine.h"
 
-#include "paddle/utils/Logging.h"
 #include <fstream>
+#include "paddle/utils/Logging.h"
 
-#include "hl_gpu.h"
-#include "NeuralNetwork.h"
-#include "ParallelNeuralNetwork.h"
+#include "GradientMachineMode.h"
 #include "MultiGradientMachine.h"
-#include "NeuralNetwork.h"
 #include "MultiNetwork.h"
-#include "GradientMachineMode.h"
+#include "NeuralNetwork.h"
+#include "NeuralNetwork.h"
+#include "ParallelNeuralNetwork.h"
+#include "hl_gpu.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h
index f3e44a9e3962c9d54cd1f9e2710c84f3f476e7ca..579eca71d4cdd2545a3a8be1c7f1dacfdd5ef66b 100644
--- a/paddle/gserver/gradientmachines/GradientMachine.h
+++ b/paddle/gserver/gradientmachines/GradientMachine.h
@@ -17,15 +17,15 @@ limitations under the License. */
 #include <iostream>
 #include <vector>
 
-#include "paddle/math/Matrix.h"
-#include "paddle/parameter/Parameter.h"
-#include "paddle/parameter/ParameterUpdaterBase.h"
-#include "paddle/utils/Thread.h"
-#include "TrainerConfig.pb.h"
 #include "ModelConfig.pb.h"
+#include "TrainerConfig.pb.h"
 #include "paddle/gserver/dataproviders/DataProvider.h"
 #include "paddle/gserver/evaluators/Evaluator.h"
 #include "paddle/gserver/layers/Layer.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/parameter/Parameter.h"
+#include "paddle/parameter/ParameterUpdaterBase.h"
+#include "paddle/utils/Thread.h"
 
 namespace paddle {
 /**
diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.h b/paddle/gserver/gradientmachines/MultiGradientMachine.h
index fe6d96e8ea3eff56f27da412d3a538730ccebbf1..5f9855c4be869aa73aaebfc2e75ee51f050f2722 100644
--- a/paddle/gserver/gradientmachines/MultiGradientMachine.h
+++ b/paddle/gserver/gradientmachines/MultiGradientMachine.h
@@ -18,9 +18,9 @@ limitations under the License. */
 
 #include "GradientMachine.h"
 
-#include "paddle/utils/Queue.h"
-#include "paddle/utils/Locks.h"
 #include "hl_gpu.h"
+#include "paddle/utils/Locks.h"
+#include "paddle/utils/Queue.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/gradientmachines/MultiNetwork.cpp b/paddle/gserver/gradientmachines/MultiNetwork.cpp
index 61af82fcb7e85a24f9b1311ca0b8168470c5ad8a..6eb3d8db962161ed4123b4ef4a4bb42147bfdf19 100644
--- a/paddle/gserver/gradientmachines/MultiNetwork.cpp
+++ b/paddle/gserver/gradientmachines/MultiNetwork.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include <algorithm>
 #include "paddle/utils/Stat.h"
 #include "paddle/utils/Util.h"
-#include <algorithm>
 
 #include "MultiNetwork.h"
 
diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp
index dbcb97b42baa796dbd7017834867454f769cd3f2..ee36a87b9d848edcc37f89221141de3f939e1110 100644
--- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp
+++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp
@@ -14,15 +14,15 @@ limitations under the License. */
 
 #include "paddle/utils/Util.h"
 
-#include "paddle/utils/Logging.h"
 #include "paddle/utils/CustomStackTrace.h"
+#include "paddle/utils/Logging.h"
 
-#include "paddle/utils/Stat.h"
-#include "hl_gpu.h"
+#include "MultiNetwork.h"
 #include "NeuralNetwork.h"
 #include "RecurrentGradientMachine.h"
-#include "MultiNetwork.h"
+#include "hl_gpu.h"
 #include "paddle/gserver/layers/AgentLayer.h"
+#include "paddle/utils/Stat.h"
 
 namespace paddle {
 void parameterInitNN(int paramId,
diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.h b/paddle/gserver/gradientmachines/NeuralNetwork.h
index fd885b436a710d7910586f48a26faebded3a6fd1..384ca88f47ffb20ca7d16a276a190b063158d273 100644
--- a/paddle/gserver/gradientmachines/NeuralNetwork.h
+++ b/paddle/gserver/gradientmachines/NeuralNetwork.h
@@ -14,18 +14,18 @@ limitations under the License. */
 
 #pragma once
 
-#include <memory>
-#include <map>
 #include <functional>
+#include <map>
+#include <memory>
 
-#include "paddle/utils/ClassRegistrar.h"
-#include "paddle/parameter/Parameter.h"
 #include "ModelConfig.pb.h"
+#include "paddle/gserver/dataproviders/DataProvider.h"
 #include "paddle/gserver/gradientmachines/GradientMachine.h"
 #include "paddle/gserver/layers/CostLayer.h"
 #include "paddle/gserver/layers/DataLayer.h"
-#include "paddle/gserver/dataproviders/DataProvider.h"
 #include "paddle/gserver/layers/Layer.h"
+#include "paddle/parameter/Parameter.h"
+#include "paddle/utils/ClassRegistrar.h"
 
 namespace paddle {
 /*
@@ -57,14 +57,13 @@ void parameterInitNN(int paramId,
 
 class NeuralNetwork : public GradientMachine {
 public:
-  virtual void init(
-      const ModelConfig& config,
-      ParamInitCallback callback = nullptr,
-      const std::vector<ParameterType>&
-          parameterTypes = std::vector<ParameterType>{PARAMETER_VALUE,
-                                                      PARAMETER_GRADIENT,
-                                                      PARAMETER_MOMENTUM},
-      bool useGpu = FLAGS_use_gpu);
+  virtual void init(const ModelConfig& config,
+                    ParamInitCallback callback = nullptr,
+                    const std::vector<ParameterType>& parameterTypes =
+                        std::vector<ParameterType>{PARAMETER_VALUE,
+                                                   PARAMETER_GRADIENT,
+                                                   PARAMETER_MOMENTUM},
+                    bool useGpu = FLAGS_use_gpu);
 
   /**
    * Connect two submodels and
diff --git a/paddle/gserver/gradientmachines/ParallelNeuralNetwork.h b/paddle/gserver/gradientmachines/ParallelNeuralNetwork.h
index 934a7cfc7b5f21e101542016b74cb3e4e3e24e2d..8f445b1ded3eb8960dc06512dd3f80b00d284acc 100644
--- a/paddle/gserver/gradientmachines/ParallelNeuralNetwork.h
+++ b/paddle/gserver/gradientmachines/ParallelNeuralNetwork.h
@@ -37,14 +37,13 @@ public:
                         NeuralNetwork *rootNetwork = nullptr)
       : NeuralNetwork(subModelName, rootNetwork) {}
 
-  virtual void init(
-      const ModelConfig &config,
-      ParamInitCallback callback = nullptr,
-      const std::vector<ParameterType>
-          &parameterTypes = std::vector<ParameterType>{PARAMETER_VALUE,
-                                                       PARAMETER_GRADIENT,
-                                                       PARAMETER_MOMENTUM},
-      bool useGpu = FLAGS_use_gpu);
+  virtual void init(const ModelConfig &config,
+                    ParamInitCallback callback = nullptr,
+                    const std::vector<ParameterType> &parameterTypes =
+                        std::vector<ParameterType>{PARAMETER_VALUE,
+                                                   PARAMETER_GRADIENT,
+                                                   PARAMETER_MOMENTUM},
+                    bool useGpu = FLAGS_use_gpu);
 
   virtual void forward(const std::vector<Argument> &inArgs,
                        std::vector<Argument> *outArgs,
diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
index 4fb1a44ab7b278a59fcec45e8ddeac094e12e44e..ee1c92bdf531d9e5cc4bbd63c4f6d91b32b8cee9 100644
--- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
@@ -12,17 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Stat.h"
-#include "paddle/utils/Util.h"
-#include "paddle/utils/Flags.h"
+#include "RecurrentGradientMachine.h"
+#include <dlfcn.h>
 #include <algorithm>
+#include <cmath>
 #include <functional>
-#include <dlfcn.h>
 #include <limits>
-#include <cmath>
-#include "RecurrentGradientMachine.h"
 #include "NeuralNetwork.h"
 #include "paddle/gserver/layers/AgentLayer.h"
+#include "paddle/utils/Flags.h"
+#include "paddle/utils/Stat.h"
+#include "paddle/utils/Util.h"
 
 P_DEFINE_string(diy_beam_search_prob_so, "", "the diy beam search cost so");
 
@@ -78,20 +78,22 @@ static inline SymbolType loadDiySymbol(const char* symbolName) {
   return reinterpret_cast<SymbolType>(sym);
 }
 
-static InitFunction __init__diy_prob_method([] {
-  std::string soName = FLAGS_diy_beam_search_prob_so;
-  if (!soName.empty()) {
-    gDiyProbHandle = dlopen(soName.c_str(), RTLD_LAZY);
-    CHECK(gDiyProbHandle) << "Cannot Open DIY Prob So " << soName;
-    atexit(exit_diy_prob);
-    gDiyProbMethod =
-        loadDiySymbol<decltype(gDiyProbMethod)>(DIY_CALC_PROB_SYMBOL_NAME);
-    gDiyProbStart =
-        loadDiySymbol<decltype(gDiyProbStart)>(DIY_START_CALC_PROB_SYMBOL_NAME);
-    gDiyProbStop =
-        loadDiySymbol<decltype(gDiyProbStop)>(DIY_FINISH_CALC_PROB_SYMBOL_NAME);
-  }
-}, std::numeric_limits<int>::max());
+static InitFunction __init__diy_prob_method(
+    [] {
+      std::string soName = FLAGS_diy_beam_search_prob_so;
+      if (!soName.empty()) {
+        gDiyProbHandle = dlopen(soName.c_str(), RTLD_LAZY);
+        CHECK(gDiyProbHandle) << "Cannot Open DIY Prob So " << soName;
+        atexit(exit_diy_prob);
+        gDiyProbMethod =
+            loadDiySymbol<decltype(gDiyProbMethod)>(DIY_CALC_PROB_SYMBOL_NAME);
+        gDiyProbStart = loadDiySymbol<decltype(gDiyProbStart)>(
+            DIY_START_CALC_PROB_SYMBOL_NAME);
+        gDiyProbStop = loadDiySymbol<decltype(gDiyProbStop)>(
+            DIY_FINISH_CALC_PROB_SYMBOL_NAME);
+      }
+    },
+    std::numeric_limits<int>::max());
 
 class BeamSearchControlCallbacks {
 public:
@@ -1281,10 +1283,9 @@ void RecurrentGradientMachine::beamSearch(size_t batchSize) {
       std::vector<std::vector<int>*> prefixes;
       prefixes.resize(paths.size());
       std::transform(
-          paths.begin(),
-          paths.end(),
-          prefixes.begin(),
-          [](const Path& p) { return const_cast<std::vector<int>*>(&p.ids); });
+          paths.begin(), paths.end(), prefixes.begin(), [](const Path& p) {
+            return const_cast<std::vector<int>*>(&p.ids);
+          });
       beamSearchCtrlCallbacks_->beamSearchCandidateAdjust(
           prefixes, frames_[machineCur].get(), i);
     }
diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h
index 369c8c3d988ca0cac4147220231ac66bf3538776..db7d8aff6d3150dd272a924c20e16bfe28d11442 100644
--- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h
+++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h
@@ -14,9 +14,9 @@ limitations under the License. */
 
 #pragma once
 
+#include <functional>
 #include "GradientMachine.h"
 #include "NeuralNetwork.h"
-#include <functional>
 
 #include "paddle/utils/Locks.h"
 
diff --git a/paddle/gserver/layers/BatchNormBaseLayer.cpp b/paddle/gserver/layers/BatchNormBaseLayer.cpp
index 51463f111890ca0608dab760b6f320578cef15b3..1ceaaaa206ee3cbc5421238574c7f310011ccaa5 100644
--- a/paddle/gserver/layers/BatchNormBaseLayer.cpp
+++ b/paddle/gserver/layers/BatchNormBaseLayer.cpp
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Stat.h"
-#include "Layer.h"
 #include "BatchNormBaseLayer.h"
 #include "BatchNormalizationLayer.h"
+#include "Layer.h"
+#include "paddle/utils/Stat.h"
 #ifndef PADDLE_ONLY_CPU
 #include "CudnnBatchNormLayer.h"
 #endif
diff --git a/paddle/gserver/layers/BatchNormBaseLayer.h b/paddle/gserver/layers/BatchNormBaseLayer.h
index f5a555a6d040e687d36ff45c9b0825649dd72131..75bda95de1472b08538b48072ddf9ea607b83299 100644
--- a/paddle/gserver/layers/BatchNormBaseLayer.h
+++ b/paddle/gserver/layers/BatchNormBaseLayer.h
@@ -14,8 +14,8 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/utils/Stat.h"
 #include "Layer.h"
+#include "paddle/utils/Stat.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/BatchNormalizationLayer.h b/paddle/gserver/layers/BatchNormalizationLayer.h
index 56be4735683df933251944ac1a6b3246269692f8..052c2077322be59f9d41966c1c8b6ab20c8f85bb 100644
--- a/paddle/gserver/layers/BatchNormalizationLayer.h
+++ b/paddle/gserver/layers/BatchNormalizationLayer.h
@@ -14,8 +14,8 @@ limitations under the License. */
 
 #pragma once
 
-#include "Layer.h"
 #include "BatchNormBaseLayer.h"
+#include "Layer.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/ConcatenateLayer.cpp b/paddle/gserver/layers/ConcatenateLayer.cpp
index f6b3d86b8ccef6d0c1f9eb4c50369b65b078278d..d19adace7d58af16736fc2b6e536f5fd69a19863 100644
--- a/paddle/gserver/layers/ConcatenateLayer.cpp
+++ b/paddle/gserver/layers/ConcatenateLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Stat.h"
 #include "Layer.h"
 #include "Projection.h"
+#include "paddle/utils/Stat.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/ContextProjection.cpp b/paddle/gserver/layers/ContextProjection.cpp
index 6080aa51b9955d4577c3732e1d6fad1e94a41f79..7ac56e3a2ab2a2a7f2219b8bfd34c16a84c427c0 100644
--- a/paddle/gserver/layers/ContextProjection.cpp
+++ b/paddle/gserver/layers/ContextProjection.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Stat.h"
 #include "ContextProjection.h"
+#include "paddle/utils/Stat.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/ConvBaseLayer.cpp b/paddle/gserver/layers/ConvBaseLayer.cpp
index 473ca24a94c9560243a6bf96b8b85efcd54acb84..7b234dc2a6663dc677affcae7dc6306c104c1250 100644
--- a/paddle/gserver/layers/ConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ConvBaseLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "ConvBaseLayer.h"
 #include "paddle/math/MathUtils.h"
+#include "paddle/utils/Logging.h"
 namespace paddle {
 
 bool ConvBaseLayer::init(const LayerMap& layerMap,
diff --git a/paddle/gserver/layers/ConvOperator.cpp b/paddle/gserver/layers/ConvOperator.cpp
index 3ede98ba4b91f52238dc4f9740788c53cfe7c21e..f943410dee0dc2f3d356c9d7d8f61398fe2871c8 100644
--- a/paddle/gserver/layers/ConvOperator.cpp
+++ b/paddle/gserver/layers/ConvOperator.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/math/Matrix.h"
-#include "paddle/math/MathUtils.h"
 #include "Operator.h"
+#include "paddle/math/MathUtils.h"
+#include "paddle/math/Matrix.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/ConvProjection.cpp b/paddle/gserver/layers/ConvProjection.cpp
index e72dc37ec8023e3f275af5097e253266959d9ea0..aa634b3287dedfae7b573e76270ac75e91418da8 100644
--- a/paddle/gserver/layers/ConvProjection.cpp
+++ b/paddle/gserver/layers/ConvProjection.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Stat.h"
 #include "ConvProjection.h"
+#include "paddle/utils/Stat.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/ConvShiftLayer.cpp b/paddle/gserver/layers/ConvShiftLayer.cpp
index 527d885d865290e9a7cf44c701d4b4f4450adfa4..9bfb1ab7a47b11a6793159aefcb4f9fa12b81a6b 100644
--- a/paddle/gserver/layers/ConvShiftLayer.cpp
+++ b/paddle/gserver/layers/ConvShiftLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "Layer.h"
 #include "paddle/math/Matrix.h"
+#include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 
 namespace paddle {
diff --git a/paddle/gserver/layers/ConvexCombinationLayer.cpp b/paddle/gserver/layers/ConvexCombinationLayer.cpp
index 57ff95fe37e2a19bc0c1bd7835d5fe932a01f797..3f4d77a2fe069f239db8cd099dd0d472d6ce3ccc 100644
--- a/paddle/gserver/layers/ConvexCombinationLayer.cpp
+++ b/paddle/gserver/layers/ConvexCombinationLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "Layer.h"
 #include "paddle/math/Matrix.h"
+#include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 
 namespace paddle {
diff --git a/paddle/gserver/layers/CosSimVecMatLayer.cpp b/paddle/gserver/layers/CosSimVecMatLayer.cpp
index e8a7f671ee70414cb02d31e428660cba589e3a2b..ad490b0b8c4656c1eabf519233f2386b4b6e9417 100644
--- a/paddle/gserver/layers/CosSimVecMatLayer.cpp
+++ b/paddle/gserver/layers/CosSimVecMatLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "Layer.h"
 #include "paddle/math/Matrix.h"
+#include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 
 namespace paddle {
diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp
index 90cd473c424a9cedf0a2c154165a4a287f980972..7e9519f6b3af50bf47b660b285c3593087f80271 100644
--- a/paddle/gserver/layers/CostLayer.cpp
+++ b/paddle/gserver/layers/CostLayer.cpp
@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <memory>
+#include "CostLayer.h"
 #include <algorithm>
-#include "paddle/utils/Logging.h"
 #include <cmath>
-#include "CostLayer.h"
+#include <memory>
+#include "paddle/utils/Logging.h"
 
 #include "paddle/math/SparseMatrix.h"
 
diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.cpp b/paddle/gserver/layers/CudnnBatchNormLayer.cpp
index d44c217105afb6cc1e2bba5cef904ae5277bffea..09dac05a7ad7a80bd6b9e12e8f7f060310d516c8 100644
--- a/paddle/gserver/layers/CudnnBatchNormLayer.cpp
+++ b/paddle/gserver/layers/CudnnBatchNormLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Stat.h"
-#include "Layer.h"
 #include "CudnnBatchNormLayer.h"
+#include "Layer.h"
+#include "paddle/utils/Stat.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.h b/paddle/gserver/layers/CudnnBatchNormLayer.h
index a52a683e15481f4a72037872d47dcfa5a852fc6a..b1e7d2082f1443313bfc858a17adfd737ecff98f 100644
--- a/paddle/gserver/layers/CudnnBatchNormLayer.h
+++ b/paddle/gserver/layers/CudnnBatchNormLayer.h
@@ -14,9 +14,9 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/utils/Stat.h"
-#include "Layer.h"
 #include "BatchNormBaseLayer.h"
+#include "Layer.h"
+#include "paddle/utils/Stat.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/CudnnConvLayer.cpp b/paddle/gserver/layers/CudnnConvLayer.cpp
index 6e28d5eb4294e24bd330023ec0700d0ed1dd4007..978c2c1479c64ab2cdebaaff7394059b3d033ab6 100644
--- a/paddle/gserver/layers/CudnnConvLayer.cpp
+++ b/paddle/gserver/layers/CudnnConvLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "CudnnConvLayer.h"
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
-#include "CudnnConvLayer.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/CudnnConvLayer.h b/paddle/gserver/layers/CudnnConvLayer.h
index 6317fab6f89266ffeef9dd332fad140166a96c0b..b869c695bd753076c6501a1253fcad22139ccadf 100644
--- a/paddle/gserver/layers/CudnnConvLayer.h
+++ b/paddle/gserver/layers/CudnnConvLayer.h
@@ -14,10 +14,10 @@ limitations under the License. */
 
 #pragma once
 
+#include <vector>
 #include "ConvBaseLayer.h"
-#include "paddle/math/Matrix.h"
 #include "Projection.h"
-#include <vector>
+#include "paddle/math/Matrix.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/CudnnPoolLayer.cpp b/paddle/gserver/layers/CudnnPoolLayer.cpp
index d0e71c63457ef476ad9428e7011b9338f6ee7b37..4adb2d4709e585a6fec052435c33714d6e3a3f0e 100644
--- a/paddle/gserver/layers/CudnnPoolLayer.cpp
+++ b/paddle/gserver/layers/CudnnPoolLayer.cpp
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "CudnnPoolLayer.h"
+#include "paddle/math/Matrix.h"
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
-#include "paddle/math/Matrix.h"
-#include "CudnnPoolLayer.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/EosIdCheckLayer.cpp b/paddle/gserver/layers/EosIdCheckLayer.cpp
index dc3c6e6b644e14cff4fda5793c1fbe871c20c113..fa53e2e4cfc8a220eeb2a637d7fe759f1744f9d5 100644
--- a/paddle/gserver/layers/EosIdCheckLayer.cpp
+++ b/paddle/gserver/layers/EosIdCheckLayer.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "Layer.h"
+#include "paddle/utils/Logging.h"
 
 namespace paddle {
 /**
diff --git a/paddle/gserver/layers/ExpandConvBaseLayer.h b/paddle/gserver/layers/ExpandConvBaseLayer.h
index e14f6e6f4460f65a33503aa5f812dc504338b6b4..8445642217cf3e83441ddd9beec80f99faf946bc 100644
--- a/paddle/gserver/layers/ExpandConvBaseLayer.h
+++ b/paddle/gserver/layers/ExpandConvBaseLayer.h
@@ -14,9 +14,9 @@ limitations under the License. */
 
 #pragma once
 
+#include <vector>
 #include "ConvBaseLayer.h"
 #include "paddle/math/Matrix.h"
-#include <vector>
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/ExpandConvLayer.cpp b/paddle/gserver/layers/ExpandConvLayer.cpp
index dcc78399602264ead8e32ad50dde9bbbeff0606b..f9267b81a7d4264f5f43552e3d54a45e4b212e00 100644
--- a/paddle/gserver/layers/ExpandConvLayer.cpp
+++ b/paddle/gserver/layers/ExpandConvLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "ExpandConvLayer.h"
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
-#include "ExpandConvLayer.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/ExpandConvLayer.h b/paddle/gserver/layers/ExpandConvLayer.h
index 6f8504b50a935faadd4ed567e13fc5d858f8e7ab..de81a017e1bac38a5717e8c83a028f5408c0e084 100644
--- a/paddle/gserver/layers/ExpandConvLayer.h
+++ b/paddle/gserver/layers/ExpandConvLayer.h
@@ -14,9 +14,9 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/math/Matrix.h"
 #include <vector>
 #include "ExpandConvBaseLayer.h"
+#include "paddle/math/Matrix.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/ExpandConvTransLayer.cpp b/paddle/gserver/layers/ExpandConvTransLayer.cpp
index cd4965c3c59e7c73cfd2141611ceef5088c89fad..520586b13889790c94a3e29902a4ea0ee55e8555 100644
--- a/paddle/gserver/layers/ExpandConvTransLayer.cpp
+++ b/paddle/gserver/layers/ExpandConvTransLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "ExpandConvTransLayer.h"
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
-#include "ExpandConvTransLayer.h"
 
 /* The implementation of the convTransLayer is basically a swap of forward and
  * backward of the original convLayer.
diff --git a/paddle/gserver/layers/ExpandConvTransLayer.h b/paddle/gserver/layers/ExpandConvTransLayer.h
index fa9d7fb481cb5328fb9f2d23003672dbe0ff3af6..4a527d67995e255c65fea1f310551f8de5630030 100644
--- a/paddle/gserver/layers/ExpandConvTransLayer.h
+++ b/paddle/gserver/layers/ExpandConvTransLayer.h
@@ -14,9 +14,9 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/math/Matrix.h"
 #include <vector>
 #include "ExpandConvBaseLayer.h"
+#include "paddle/math/Matrix.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/FullyConnectedLayer.cpp b/paddle/gserver/layers/FullyConnectedLayer.cpp
index d2a028dd8060a6865f25ae636f7074ff933abe6a..89afe33c36697f8d57885043ed68cdf26576e358 100644
--- a/paddle/gserver/layers/FullyConnectedLayer.cpp
+++ b/paddle/gserver/layers/FullyConnectedLayer.cpp
@@ -13,11 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "FullyConnectedLayer.h"
+#include <algorithm>
+#include <vector>
+#include "paddle/math/SparseMatrix.h"
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
-#include "paddle/math/SparseMatrix.h"
-#include <vector>
-#include <algorithm>
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/GatedRecurrentLayer.cpp b/paddle/gserver/layers/GatedRecurrentLayer.cpp
index 01b210ba70ae6273e47bb5e603afef7e97881f39..930d9a056164e7c677adb53b7b67901364da1309 100644
--- a/paddle/gserver/layers/GatedRecurrentLayer.cpp
+++ b/paddle/gserver/layers/GatedRecurrentLayer.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "Layer.h"
 #include "GatedRecurrentLayer.h"
+#include "Layer.h"
 #include "paddle/utils/Stat.h"
 
 namespace paddle {
@@ -386,8 +386,9 @@ void GatedRecurrentLayer::backwardBatch(int batchSize, MatrixPtr inputGrad) {
       {
         batchSize = outputGradTmp->getHeight();
         gruValue.prevOutValue =
-            (n == 0 ? nullptr : (batchValue_->getBatchValue(n - 1, batchSize))
-                                    ->getData());
+            (n == 0
+                 ? nullptr
+                 : (batchValue_->getBatchValue(n - 1, batchSize))->getData());
         gruGrad.prevOutGrad =
             (n == 0 ? nullptr
                     : (batchGrad_->getBatchValue(n - 1, batchSize))->getData());
diff --git a/paddle/gserver/layers/GatedRecurrentLayer.h b/paddle/gserver/layers/GatedRecurrentLayer.h
index e099b4d18b17ab628730157885dfd45da2cc2f8d..25770ce57fbaa4d16c9454d824800f2f0c7f957d 100644
--- a/paddle/gserver/layers/GatedRecurrentLayer.h
+++ b/paddle/gserver/layers/GatedRecurrentLayer.h
@@ -14,10 +14,10 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/math/Matrix.h"
-#include "SequenceToBatch.h"
 #include "GruCompute.h"
 #include "Layer.h"
+#include "SequenceToBatch.h"
+#include "paddle/math/Matrix.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/GruCompute.cpp b/paddle/gserver/layers/GruCompute.cpp
index 7d4e8001a88c6df1ceef1b876bc09f35f1c3bb61..06907768e98f4bad952706cffbbd65d1f86cc6df 100644
--- a/paddle/gserver/layers/GruCompute.cpp
+++ b/paddle/gserver/layers/GruCompute.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Util.h"
 #include "GruCompute.h"
 #include "hl_recurrent_apply.cuh"
+#include "paddle/utils/Util.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/GruCompute.h b/paddle/gserver/layers/GruCompute.h
index 2a5da72068ea1638488cbc3b47351abd782f9e53..42c0019319ac9f20f9c3349fb2429c30f03d682b 100644
--- a/paddle/gserver/layers/GruCompute.h
+++ b/paddle/gserver/layers/GruCompute.h
@@ -14,9 +14,9 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/utils/TypeDefs.h"
 #include "ModelConfig.pb.h"
 #include "hl_gpu.h"
+#include "paddle/utils/TypeDefs.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/GruStepLayer.cpp b/paddle/gserver/layers/GruStepLayer.cpp
index c48b5e40e67bfc4d803bb2d6f1e3276b1d0086a4..4a1006aa941f396c233a0cecfc38228f1f9fafe1 100644
--- a/paddle/gserver/layers/GruStepLayer.cpp
+++ b/paddle/gserver/layers/GruStepLayer.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "Layer.h"
 #include "GruCompute.h"
+#include "Layer.h"
 #include "paddle/utils/Stat.h"
 
 namespace paddle {
diff --git a/paddle/gserver/layers/IdentityProjection.cpp b/paddle/gserver/layers/IdentityProjection.cpp
index 8660631b5aa10742acdefe17b287fe18d17dc167..f1d41a33d40f120d5de8b2bfe9cf3271eefa08be 100644
--- a/paddle/gserver/layers/IdentityProjection.cpp
+++ b/paddle/gserver/layers/IdentityProjection.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Stat.h"
 #include "Projection.h"
+#include "paddle/utils/Stat.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/InterpolationLayer.cpp b/paddle/gserver/layers/InterpolationLayer.cpp
index 94d4614b21b7ea6b2923eda926a3596127cc281c..44fe1fb1fea4203a4a1cac67c581b13adda65966 100644
--- a/paddle/gserver/layers/InterpolationLayer.cpp
+++ b/paddle/gserver/layers/InterpolationLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "Layer.h"
 #include "paddle/math/Matrix.h"
+#include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 
 namespace paddle {
diff --git a/paddle/gserver/layers/Layer.cpp b/paddle/gserver/layers/Layer.cpp
index 3c539f30768a1cafd3c777567a82d165efef7ea5..c9e121047b5fa1297cfca7c268205594f079e1e3 100644
--- a/paddle/gserver/layers/Layer.cpp
+++ b/paddle/gserver/layers/Layer.cpp
@@ -14,15 +14,15 @@ limitations under the License. */
 
 #include "paddle/utils/Util.h"
 
-#include "paddle/utils/Logging.h"
 #include "paddle/math/SparseMatrix.h"
+#include "paddle/utils/Logging.h"
 
 #include "AddtoLayer.h"
+#include "CRFLayer.h"
 #include "CosSimLayer.h"
 #include "CostLayer.h"
-#include "ExpandConvLayer.h"
-#include "CRFLayer.h"
 #include "DataLayer.h"
+#include "ExpandConvLayer.h"
 #include "FullyConnectedLayer.h"
 #include "HierarchicalSigmoidLayer.h"
 #include "MaxLayer.h"
diff --git a/paddle/gserver/layers/Layer.h b/paddle/gserver/layers/Layer.h
index 6609e16c4cf21cd4eba3114d3ca178a6acd14a29..172e558b82945296ef8a50d464c03efbfd597e0d 100644
--- a/paddle/gserver/layers/Layer.h
+++ b/paddle/gserver/layers/Layer.h
@@ -14,18 +14,18 @@ limitations under the License. */
 
 #pragma once
 
-#include <memory>
-#include <functional>
 #include <paddle/parameter/Argument.h>
-#include "paddle/utils/ClassRegistrar.h"
+#include <functional>
+#include <memory>
+#include "ModelConfig.pb.h"
 #include "paddle/math/CpuSparseMatrix.h"
 #include "paddle/parameter/Parameter.h"
+#include "paddle/utils/ClassRegistrar.h"
 #include "paddle/utils/Util.h"
-#include "ModelConfig.pb.h"
 
-#include "paddle/gserver/activations/ActivationFunction.h"
 #include <paddle/parameter/ParallelParameter.h>
 #include <paddle/parameter/Weight.h>
+#include "paddle/gserver/activations/ActivationFunction.h"
 
 /// Macro for registering a layer type.
 /// Example: REGISTER_LAYER(crf_error, CRFDecodingErrorLayer);
diff --git a/paddle/gserver/layers/LinearChainCRF.cpp b/paddle/gserver/layers/LinearChainCRF.cpp
index c6414c822eb5ec0743d4cef58b2959253e658f37..af550c7a0154802a93bacccab500695bdad36542 100644
--- a/paddle/gserver/layers/LinearChainCRF.cpp
+++ b/paddle/gserver/layers/LinearChainCRF.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <algorithm>
 #include "LinearChainCRF.h"
+#include <algorithm>
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/LinearChainCTC.cpp b/paddle/gserver/layers/LinearChainCTC.cpp
index 60e814fc3074ab91fa61013f66edf367861187e5..cb2b249110dbd736a46a713480eca12e59cb391b 100644
--- a/paddle/gserver/layers/LinearChainCTC.cpp
+++ b/paddle/gserver/layers/LinearChainCTC.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <math.h>
 #include "LinearChainCTC.h"
+#include <math.h>
 #include <limits>
 
 namespace paddle {
diff --git a/paddle/gserver/layers/LstmCompute.cpp b/paddle/gserver/layers/LstmCompute.cpp
index 18f79969588e41604ec5af9cecdf1bbe9152a25f..4c4297096423762355a5ee028cac252432cc1956 100644
--- a/paddle/gserver/layers/LstmCompute.cpp
+++ b/paddle/gserver/layers/LstmCompute.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Util.h"
-#include "hl_recurrent_apply.cuh"
 #include "LstmCompute.h"
+#include "hl_recurrent_apply.cuh"
+#include "paddle/utils/Util.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/LstmCompute.h b/paddle/gserver/layers/LstmCompute.h
index 9b7aee19dd2a0f439186603b96b9d32e1ec7c9b7..140a4c6ecf5cfaf1045cec3ca2db5d4f2e54aca4 100644
--- a/paddle/gserver/layers/LstmCompute.h
+++ b/paddle/gserver/layers/LstmCompute.h
@@ -14,9 +14,9 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/utils/TypeDefs.h"
 #include "ModelConfig.pb.h"
 #include "hl_gpu.h"
+#include "paddle/utils/TypeDefs.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/LstmLayer.cpp b/paddle/gserver/layers/LstmLayer.cpp
index 975edcfe7fe400110f7abb99902f087b483fa746..452091eff42083537f37d89b8f8464851f2e36db 100644
--- a/paddle/gserver/layers/LstmLayer.cpp
+++ b/paddle/gserver/layers/LstmLayer.cpp
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "LstmLayer.h"
-#include "paddle/math/Matrix.h"
 #include "paddle/math/BaseMatrix.h"
+#include "paddle/math/Matrix.h"
 #include "paddle/utils/Stat.h"
 
 P_DECLARE_bool(prev_batch_state);
diff --git a/paddle/gserver/layers/LstmLayer.h b/paddle/gserver/layers/LstmLayer.h
index 16c62aa88d8e9170d23c87013751fabd244cf590..f49df2c412f05f74da455d41cdf7c9bd4b9ec2e2 100644
--- a/paddle/gserver/layers/LstmLayer.h
+++ b/paddle/gserver/layers/LstmLayer.h
@@ -15,10 +15,10 @@ limitations under the License. */
 #pragma once
 
 #include "Layer.h"
-#include "paddle/math/Matrix.h"
-#include "paddle/math/BaseMatrix.h"
-#include "SequenceToBatch.h"
 #include "LstmCompute.h"
+#include "SequenceToBatch.h"
+#include "paddle/math/BaseMatrix.h"
+#include "paddle/math/Matrix.h"
 namespace paddle {
 
 /**
diff --git a/paddle/gserver/layers/MDLstmLayer.cpp b/paddle/gserver/layers/MDLstmLayer.cpp
index 9d3797d16f9514c9ecd27cef58b7567658c9db4a..1243c12889542103f65b427da8f549e852773c5c 100644
--- a/paddle/gserver/layers/MDLstmLayer.cpp
+++ b/paddle/gserver/layers/MDLstmLayer.cpp
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "LstmLayer.h"
-#include "paddle/math/Matrix.h"
 #include "paddle/math/BaseMatrix.h"
+#include "paddle/math/Matrix.h"
 
 namespace paddle {
 
@@ -318,7 +318,7 @@ void MDLstmLayer::forward(PassType passType) {
   CHECK_EQ(starts[numSequences], batchSize);
 
   int* dimsData = input.cpuSequenceDims->getData();
-  CHECK_EQ(int(input.cpuSequenceDims->getSize()), numDims_ * numSequences);
+  CHECK_EQ(int(input.cpuSequenceDims->getSize()), numDims_* numSequences);
 
   for (int i = 0; i < numSequences; i++) {
     std::vector<int> dims;
diff --git a/paddle/gserver/layers/MaxOutLayer.cpp b/paddle/gserver/layers/MaxOutLayer.cpp
index 4fb99ce2a2941094e93e27e24b8930bb1c1611a8..3a86a95321d8843338267df374dae169271410f5 100644
--- a/paddle/gserver/layers/MaxOutLayer.cpp
+++ b/paddle/gserver/layers/MaxOutLayer.cpp
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "MaxOutLayer.h"
-#include "hl_gpu.h"
 #include "hl_cnn.h"
+#include "hl_gpu.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/MixedLayer.cpp b/paddle/gserver/layers/MixedLayer.cpp
index 490b217347f22a4840e44c89e4c84147358504ae..2525b1984b80a4200923c007d3021d468745133e 100644
--- a/paddle/gserver/layers/MixedLayer.cpp
+++ b/paddle/gserver/layers/MixedLayer.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Stat.h"
 #include "MixedLayer.h"
+#include "paddle/utils/Stat.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/MixedLayer.h b/paddle/gserver/layers/MixedLayer.h
index d73ba6b7a1f9a2894066bbb68f934ccb777995b1..9655a152c7bc96fb3941fcbd9db4ff71a59e4ebe 100644
--- a/paddle/gserver/layers/MixedLayer.h
+++ b/paddle/gserver/layers/MixedLayer.h
@@ -15,8 +15,8 @@ limitations under the License. */
 #pragma once
 
 #include "Layer.h"
-#include "Projection.h"
 #include "Operator.h"
+#include "Projection.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/MultinomialSampler.h b/paddle/gserver/layers/MultinomialSampler.h
index 6e50f8738e316a0bbc3dac1b715af1ec1288145f..677b047029305549084770bdb5eadfeaafbfac8a 100644
--- a/paddle/gserver/layers/MultinomialSampler.h
+++ b/paddle/gserver/layers/MultinomialSampler.h
@@ -14,8 +14,8 @@ limitations under the License. */
 
 #pragma once
 
+#include <memory>
 #include <random>
-
 #include "paddle/utils/TypeDefs.h"
 
 namespace paddle {
@@ -32,6 +32,17 @@ class MultinomialSampler {
 public:
   MultinomialSampler(const real* prob, int size);
 
+  //! protobuf always using double.
+  static MultinomialSampler* create(const double* prob, int size) {
+#ifdef PADDLE_TYPE_DOUBLE
+    return new MultinomialSampler(prob, size);
+#else
+    std::unique_ptr<real[]> tmp(new real[size]);
+    std::copy(prob, prob + size, tmp.get());
+    return new MultinomialSampler(tmp.get(), size);
+#endif
+  }
+
   /**
    * @brief Generate a random sample.
    * @param g is a random number engine. See <random>.
diff --git a/paddle/gserver/layers/MultiplexLayer.cpp b/paddle/gserver/layers/MultiplexLayer.cpp
index dc4a1ec321635634682684958a336d740820d75c..d09720c5255747df11d4d7367f67a245e63e6846 100644
--- a/paddle/gserver/layers/MultiplexLayer.cpp
+++ b/paddle/gserver/layers/MultiplexLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "Layer.h"
 #include "paddle/math/Matrix.h"
+#include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 
 namespace paddle {
diff --git a/paddle/gserver/layers/NCELayer.cpp b/paddle/gserver/layers/NCELayer.cpp
index 540db46545ef03010d6138954070283ec32e5577..5ab765247f63dfe6e6651ca4d27dc7183a9f33e1 100644
--- a/paddle/gserver/layers/NCELayer.cpp
+++ b/paddle/gserver/layers/NCELayer.cpp
@@ -99,8 +99,8 @@ public:
 
     if (config_.neg_sampling_dist_size()) {
       CHECK_EQ(numClasses_, config_.neg_sampling_dist_size());
-      sampler_.reset(new MultinomialSampler(config_.neg_sampling_dist().data(),
-                                            numClasses_));
+      sampler_.reset(MultinomialSampler::create(
+          config_.neg_sampling_dist().data(), numClasses_));
     }
 
     return true;
diff --git a/paddle/gserver/layers/NormLayer.cpp b/paddle/gserver/layers/NormLayer.cpp
index b8682a14228538da662c1a5f4e5d3d020866dc80..3db0af2515ee9f64aa6c0b0a441e88562d9e398e 100644
--- a/paddle/gserver/layers/NormLayer.cpp
+++ b/paddle/gserver/layers/NormLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "NormLayer.h"
 #include "NormProjectionLayer.h"
+#include "paddle/utils/Logging.h"
 namespace paddle {
 
 REGISTER_LAYER_CREATE_FUNC(norm, &NormLayer::create);
diff --git a/paddle/gserver/layers/NormLayer.h b/paddle/gserver/layers/NormLayer.h
index aedbb95b4fbb0366ce05b840372d736f6cea4b1a..86255b231b1eee578e81f31d76fd66bb845b10b7 100644
--- a/paddle/gserver/layers/NormLayer.h
+++ b/paddle/gserver/layers/NormLayer.h
@@ -16,8 +16,8 @@ limitations under the License. */
 
 #include <vector>
 #include "Layer.h"
-#include "paddle/math/Matrix.h"
 #include "NormLayer.h"
+#include "paddle/math/Matrix.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/NormProjectionLayer.cpp b/paddle/gserver/layers/NormProjectionLayer.cpp
index ea301292e0dcc71f2368550d41d43f0989ddc439..934fc31e0acf96263654f4d74a1a4394578986cc 100644
--- a/paddle/gserver/layers/NormProjectionLayer.cpp
+++ b/paddle/gserver/layers/NormProjectionLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "NormProjectionLayer.h"
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
-#include "NormProjectionLayer.h"
 
 namespace paddle {
 size_t CMRProjectionNormLayer::getSize() {
diff --git a/paddle/gserver/layers/NormProjectionLayer.h b/paddle/gserver/layers/NormProjectionLayer.h
index 0db8e2551f06d935289ae3a3631c087161d8dab1..4f7b638334afe3832e03537486f3ffc4dbbdcd9d 100644
--- a/paddle/gserver/layers/NormProjectionLayer.h
+++ b/paddle/gserver/layers/NormProjectionLayer.h
@@ -14,9 +14,9 @@ limitations under the License. */
 
 #pragma once
 
+#include <vector>
 #include "NormLayer.h"
 #include "paddle/math/Matrix.h"
-#include <vector>
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/Operator.h b/paddle/gserver/layers/Operator.h
index b0586b59e916bf85d804d7ed719775f2d2a95433..6fd331382f243039fa38b2762b2d5edede60d868 100644
--- a/paddle/gserver/layers/Operator.h
+++ b/paddle/gserver/layers/Operator.h
@@ -14,11 +14,11 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/parameter/Parameter.h"
 #include "ModelConfig.pb.h"
+#include "paddle/parameter/Parameter.h"
 
-#include "paddle/parameter/Argument.h"
 #include "Layer.h"
+#include "paddle/parameter/Argument.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/OuterProdLayer.cpp b/paddle/gserver/layers/OuterProdLayer.cpp
index 42587dcce54fb766c8ec35d0b8b536bb6d829f78..cf9a008318e9d8dd50d1f401576082c07680f6c4 100644
--- a/paddle/gserver/layers/OuterProdLayer.cpp
+++ b/paddle/gserver/layers/OuterProdLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "Layer.h"
 #include "paddle/math/Matrix.h"
+#include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 
 namespace paddle {
diff --git a/paddle/gserver/layers/PoolLayer.cpp b/paddle/gserver/layers/PoolLayer.cpp
index 36e396487ef7d03157b3a5733dd20b470f27f7e3..96d5c54accc047b685502a178de2d290f3158731 100644
--- a/paddle/gserver/layers/PoolLayer.cpp
+++ b/paddle/gserver/layers/PoolLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "PoolLayer.h"
 #include "PoolProjectionLayer.h"
+#include "paddle/utils/Logging.h"
 #ifndef PADDLE_ONLY_CPU
 #include "CudnnPoolLayer.h"
 #endif
diff --git a/paddle/gserver/layers/PoolLayer.h b/paddle/gserver/layers/PoolLayer.h
index c05d7a364d15e4f569499774878976e0c394215b..318b89d7c2bce896d183eba8c48c230d962918a5 100644
--- a/paddle/gserver/layers/PoolLayer.h
+++ b/paddle/gserver/layers/PoolLayer.h
@@ -14,10 +14,10 @@ limitations under the License. */
 
 #pragma once
 
+#include <vector>
 #include "Layer.h"
-#include "paddle/math/Matrix.h"
 #include "paddle/math/MathUtils.h"
-#include <vector>
+#include "paddle/math/Matrix.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/PoolProjectionLayer.cpp b/paddle/gserver/layers/PoolProjectionLayer.cpp
index 392c548d45554ea7c119ff94973dfd30deae8392..ed5011ab8990620acb12f3ca6c488ce403336d45 100644
--- a/paddle/gserver/layers/PoolProjectionLayer.cpp
+++ b/paddle/gserver/layers/PoolProjectionLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "PoolProjectionLayer.h"
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
-#include "PoolProjectionLayer.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/PowerLayer.cpp b/paddle/gserver/layers/PowerLayer.cpp
index eb692492709bee82303eb890294fac03a69781d0..64fecab5b08354ceea8b290b78eede72d24a98a2 100644
--- a/paddle/gserver/layers/PowerLayer.cpp
+++ b/paddle/gserver/layers/PowerLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "Layer.h"
 #include "paddle/math/Matrix.h"
+#include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 
 namespace paddle {
diff --git a/paddle/gserver/layers/RecurrentLayer.cpp b/paddle/gserver/layers/RecurrentLayer.cpp
index 0832eeaa104c590a6c01202c6b84cfb22f3f1691..9f3bf76a2dcf42b0ede0f21a241e83de39c5944b 100644
--- a/paddle/gserver/layers/RecurrentLayer.cpp
+++ b/paddle/gserver/layers/RecurrentLayer.cpp
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "Layer.h"
-#include "paddle/utils/Stat.h"
 #include "SequenceToBatch.h"
 #include "paddle/utils/CommandLineParser.h"
+#include "paddle/utils/Stat.h"
 
 P_DEFINE_bool(rnn_use_batch, false, "Using the batch method for calculation.");
 
diff --git a/paddle/gserver/layers/RecurrentLayerGroup.cpp b/paddle/gserver/layers/RecurrentLayerGroup.cpp
index 5cb42206238c9a28f4bf958f367a8b2101935429..af8dd61d84e2e53ca26dc054d0516e62ab7aa216 100644
--- a/paddle/gserver/layers/RecurrentLayerGroup.cpp
+++ b/paddle/gserver/layers/RecurrentLayerGroup.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/gserver/layers/Layer.h"
 #include <functional>
+#include "paddle/gserver/layers/Layer.h"
 
 #include "paddle/gserver/gradientmachines/RecurrentGradientMachine.h"
 #include "paddle/utils/Stat.h"
diff --git a/paddle/gserver/layers/ResizeLayer.cpp b/paddle/gserver/layers/ResizeLayer.cpp
index e79732155abe53ffec43ef7ce73f43cef4ad4094..7fcb3adea01b9d16394ee90b751b10902dc3a190 100644
--- a/paddle/gserver/layers/ResizeLayer.cpp
+++ b/paddle/gserver/layers/ResizeLayer.cpp
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "Layer.h"
-#include "paddle/math/Matrix.h"
 #include "paddle/math/BaseMatrix.h"
+#include "paddle/math/Matrix.h"
 
 namespace paddle {
 /**
diff --git a/paddle/gserver/layers/ScalingLayer.cpp b/paddle/gserver/layers/ScalingLayer.cpp
index 013bff6b986b32a9e9eedeb3780efe8ae16d178d..7f0084be6b57f5ce8245609e64c744c1a049a925 100644
--- a/paddle/gserver/layers/ScalingLayer.cpp
+++ b/paddle/gserver/layers/ScalingLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "Layer.h"
 #include "paddle/math/Matrix.h"
+#include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 
 namespace paddle {
diff --git a/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp b/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp
index 75d9fa8a97959976cd51a59ea6797a9144ae198b..9200a01eee3be8ab61b6181ec337b2c3c70c5966 100644
--- a/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp
+++ b/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp
@@ -13,11 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "SelectiveFullyConnectedLayer.h"
+#include <algorithm>
+#include <vector>
+#include "paddle/math/SparseMatrix.h"
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
-#include "paddle/math/SparseMatrix.h"
-#include <vector>
-#include <algorithm>
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/SequenceConcatLayer.cpp b/paddle/gserver/layers/SequenceConcatLayer.cpp
index d3e0e16e9692ac8aefe96b02125363f4a4ad7dc5..069bc26e602ff7d925b4115d12388b6716676b29 100644
--- a/paddle/gserver/layers/SequenceConcatLayer.cpp
+++ b/paddle/gserver/layers/SequenceConcatLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "Layer.h"
 #include "paddle/math/Matrix.h"
+#include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 
 namespace paddle {
diff --git a/paddle/gserver/layers/SequencePoolLayer.cpp b/paddle/gserver/layers/SequencePoolLayer.cpp
index 856c889e3befe6915f202358d36a81c6985e9dc5..35260ca912d5d0e00213ffb7074bd8963da265da 100644
--- a/paddle/gserver/layers/SequencePoolLayer.cpp
+++ b/paddle/gserver/layers/SequencePoolLayer.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "SequencePoolLayer.h"
+#include "paddle/utils/Logging.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/SequenceReshapeLayer.cpp b/paddle/gserver/layers/SequenceReshapeLayer.cpp
index 4b90424215ceaffba318665fdcc3a2329f0c75cd..23924b0490851ad3c3c74d77e7abd8b0af8fc234 100644
--- a/paddle/gserver/layers/SequenceReshapeLayer.cpp
+++ b/paddle/gserver/layers/SequenceReshapeLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "Layer.h"
 #include "paddle/math/Matrix.h"
+#include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 
 namespace paddle {
diff --git a/paddle/gserver/layers/SequenceToBatch.cpp b/paddle/gserver/layers/SequenceToBatch.cpp
index c12ed821975b466b955412ef560ff886419c2f5e..5fa7b6f4881b9582b540a5b1bfe849220cc2a4ea 100644
--- a/paddle/gserver/layers/SequenceToBatch.cpp
+++ b/paddle/gserver/layers/SequenceToBatch.cpp
@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <vector>
-#include <algorithm>
 #include "SequenceToBatch.h"
-#include <iostream>
 #include <string.h>
+#include <algorithm>
+#include <iostream>
+#include <vector>
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/SequenceToBatch.h b/paddle/gserver/layers/SequenceToBatch.h
index fe9b34b224b8d17d454788bc07048f5a9ca2b3e8..17e735a135cba8b43caf0ed9e06bb53903b5cd6a 100644
--- a/paddle/gserver/layers/SequenceToBatch.h
+++ b/paddle/gserver/layers/SequenceToBatch.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
-#include "paddle/math/Vector.h"
 #include "paddle/math/Matrix.h"
+#include "paddle/math/Vector.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/SlopeInterceptLayer.cpp b/paddle/gserver/layers/SlopeInterceptLayer.cpp
index 5c00e54f8c1e3f2103a04a7b719bec44ebbbdc13..b678f414b6d76fa26818cb379fb0f0fb8fc7ec09 100644
--- a/paddle/gserver/layers/SlopeInterceptLayer.cpp
+++ b/paddle/gserver/layers/SlopeInterceptLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "Layer.h"
 #include "paddle/math/Matrix.h"
+#include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 
 namespace paddle {
diff --git a/paddle/gserver/layers/SubSequenceLayer.cpp b/paddle/gserver/layers/SubSequenceLayer.cpp
index 8b3545639193fe575db17078a1bfda0817db8a74..c52fbee26232ad6eb09f84315a57c73e6aa02eb0 100644
--- a/paddle/gserver/layers/SubSequenceLayer.cpp
+++ b/paddle/gserver/layers/SubSequenceLayer.cpp
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "Layer.h"
 #include "paddle/math/Matrix.h"
 #include "paddle/math/Vector.h"
+#include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 
 namespace paddle {
diff --git a/paddle/gserver/layers/SumToOneNormLayer.cpp b/paddle/gserver/layers/SumToOneNormLayer.cpp
index e6759171cbda2a4d18c2c13f9b05a7fd1e9ac126..aa99b49380d3682ccf3d89220c0c68f22e458271 100644
--- a/paddle/gserver/layers/SumToOneNormLayer.cpp
+++ b/paddle/gserver/layers/SumToOneNormLayer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "Layer.h"
 #include "paddle/math/Matrix.h"
+#include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
 
 namespace paddle {
diff --git a/paddle/gserver/layers/TransLayer.cpp b/paddle/gserver/layers/TransLayer.cpp
index 5cbaaf8f0863f9f36693e6ad6ec4ab639716e574..d1fa90f38415c53bd1c56df4a6c4be0508004bc6 100644
--- a/paddle/gserver/layers/TransLayer.cpp
+++ b/paddle/gserver/layers/TransLayer.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "TransLayer.h"
+#include "paddle/utils/Logging.h"
 namespace paddle {
 
 REGISTER_LAYER(trans, TransLayer);
diff --git a/paddle/gserver/layers/TransLayer.h b/paddle/gserver/layers/TransLayer.h
index 8189700759090c1080c4676023c434158bcb10c9..b43fa1ebfb003226daed724b4ede3006545e8b07 100644
--- a/paddle/gserver/layers/TransLayer.h
+++ b/paddle/gserver/layers/TransLayer.h
@@ -14,9 +14,9 @@ limitations under the License. */
 
 #pragma once
 
+#include <vector>
 #include "Layer.h"
 #include "paddle/math/Matrix.h"
-#include <vector>
 
 namespace paddle {
 /**
diff --git a/paddle/gserver/layers/TransposedFullMatrixProjection.cpp b/paddle/gserver/layers/TransposedFullMatrixProjection.cpp
index 8282584ab4bb744fe2279ac38e49f1c7eadd29ef..3f7ff0488207564e3ebbd5a467f42b46af3b31ff 100644
--- a/paddle/gserver/layers/TransposedFullMatrixProjection.cpp
+++ b/paddle/gserver/layers/TransposedFullMatrixProjection.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Stat.h"
 #include "Projection.h"
+#include "paddle/utils/Stat.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/ValidationLayer.cpp b/paddle/gserver/layers/ValidationLayer.cpp
index f029ea4c51257b4bba7e545f723f0f3a2042f22a..5127bcaba336b72dc76c832892e057724aeb3471 100644
--- a/paddle/gserver/layers/ValidationLayer.cpp
+++ b/paddle/gserver/layers/ValidationLayer.cpp
@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <memory>
 #include <algorithm>
 #include <fstream>
+#include <memory>
 
-#include "paddle/utils/Logging.h"
 #include "ValidationLayer.h"
+#include "paddle/utils/Logging.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/ValidationLayer.h b/paddle/gserver/layers/ValidationLayer.h
index f9c61503aaa0963166a45f107cd5b801e97d4d84..471055429d34bee591cf7e66cd28221a8ebd83ed 100644
--- a/paddle/gserver/layers/ValidationLayer.h
+++ b/paddle/gserver/layers/ValidationLayer.h
@@ -15,8 +15,8 @@ limitations under the License. */
 #pragma once
 #include <memory>
 
-#include "paddle/gserver/evaluators/Evaluator.h"
 #include "Layer.h"
+#include "paddle/gserver/evaluators/Evaluator.h"
 
 P_DECLARE_int32(trainer_id);
 
diff --git a/paddle/gserver/layers/WarpCTCLayer.cpp b/paddle/gserver/layers/WarpCTCLayer.cpp
index 23ca5257b6d2c8c6e88ad4bb92de6dc3f18fafe3..94e926a8d8f678c91b5c0614a78ba829869ec150 100644
--- a/paddle/gserver/layers/WarpCTCLayer.cpp
+++ b/paddle/gserver/layers/WarpCTCLayer.cpp
@@ -31,7 +31,6 @@ bool WarpCTCLayer::init(const LayerMap& layerMap,
   CHECK_EQ(numClasses_, inputLayers_[0]->getSize());
 
   blank_ = config_.blank();
-  CHECK_GE(blank_, 0UL);
   CHECK_LT(blank_, numClasses_);
 
   normByTimes_ = config_.norm_by_times();
diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h
index 2b8f334f19391111eb321d71e520e046b12013ca..62ac2d160fd916c5bb114341a442eac7df114c99 100644
--- a/paddle/gserver/tests/LayerGradUtil.h
+++ b/paddle/gserver/tests/LayerGradUtil.h
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
-#include "paddle/trainer/Trainer.h"
-#include "paddle/gserver/layers/DataLayer.h"
 #include "ModelConfig.pb.h"
+#include "paddle/gserver/layers/DataLayer.h"
+#include "paddle/trainer/Trainer.h"
 
 #include "TestUtil.h"
 using namespace std;  // NOLINT
diff --git a/paddle/gserver/tests/TestUtil.cpp b/paddle/gserver/tests/TestUtil.cpp
index dc007116977b48a22bc4d96b3f6c05c45ea9e837..e656da5b8f7c0f9ebbc094c0e1548423ea060f50 100644
--- a/paddle/gserver/tests/TestUtil.cpp
+++ b/paddle/gserver/tests/TestUtil.cpp
@@ -14,8 +14,8 @@ limitations under the License. */
 
 #include "TestUtil.h"
 
-#include "paddle/utils/CommandLineParser.h"
 #include "paddle/math/SparseMatrix.h"
+#include "paddle/utils/CommandLineParser.h"
 
 P_DEFINE_int32(fixed_seq_length, 0, "Produce some sequence of fixed length");
 
@@ -63,8 +63,8 @@ MatrixPtr makeRandomSparseMatrix(size_t height,
       std::dynamic_pointer_cast<GpuSparseMatrix>(mat)->copyFrom(
           ids.data(), indices.data(), data.data(), HPPL_STREAM_DEFAULT);
     } else {
-      std::dynamic_pointer_cast<CpuSparseMatrix>(mat)
-          ->copyFrom(ids.data(), indices.data(), data.data());
+      std::dynamic_pointer_cast<CpuSparseMatrix>(mat)->copyFrom(
+          ids.data(), indices.data(), data.data());
     }
     return mat;
   } else {
@@ -80,8 +80,8 @@ MatrixPtr makeRandomSparseMatrix(size_t height,
       std::dynamic_pointer_cast<GpuSparseMatrix>(mat)->copyFrom(
           ids.data(), indices.data(), data.data(), HPPL_STREAM_DEFAULT);
     } else {
-      std::dynamic_pointer_cast<CpuSparseMatrix>(mat)
-          ->copyFrom(ids.data(), indices.data(), data.data());
+      std::dynamic_pointer_cast<CpuSparseMatrix>(mat)->copyFrom(
+          ids.data(), indices.data(), data.data());
     }
     return mat;
   }
diff --git a/paddle/gserver/tests/test_ActivationGrad.cpp b/paddle/gserver/tests/test_ActivationGrad.cpp
index 0181d62519212b8978fae96901067429504ac361..20a6126d0b69f71eecc439854c8f97f94ec53de5 100644
--- a/paddle/gserver/tests/test_ActivationGrad.cpp
+++ b/paddle/gserver/tests/test_ActivationGrad.cpp
@@ -13,14 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include <gtest/gtest.h>
-#include <vector>
 #include <string>
-#include "paddle/gserver/layers/DataLayer.h"
+#include <vector>
 #include "ModelConfig.pb.h"
+#include "paddle/gserver/layers/DataLayer.h"
 #include "paddle/trainer/Trainer.h"
 
-#include "TestUtil.h"
 #include "LayerGradUtil.h"
+#include "TestUtil.h"
 
 using namespace paddle;  // NOLINT
 using namespace std;     // NOLINT
diff --git a/paddle/gserver/tests/test_BatchNorm.cpp b/paddle/gserver/tests/test_BatchNorm.cpp
index 8575999aba17e5fea1051f36b40ff91821319aac..3bd4e321b7d073055ea8e9d97020379276de8cdf 100644
--- a/paddle/gserver/tests/test_BatchNorm.cpp
+++ b/paddle/gserver/tests/test_BatchNorm.cpp
@@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include <gtest/gtest.h>
-#include <vector>
 #include <string>
-#include "paddle/gserver/layers/DataLayer.h"
+#include <vector>
 #include "ModelConfig.pb.h"
+#include "paddle/gserver/layers/DataLayer.h"
+#include "paddle/gserver/layers/ExpandConvTransLayer.h"
 #include "paddle/trainer/Trainer.h"
 #include "paddle/utils/GlobalConstants.h"
-#include "paddle/gserver/layers/ExpandConvTransLayer.h"
 
-#include "TestUtil.h"
 #include "LayerGradUtil.h"
+#include "TestUtil.h"
 
 using namespace paddle;  // NOLINT
 using namespace std;     // NOLINT
@@ -35,80 +35,87 @@ P_DECLARE_bool(prev_batch_state);
 
 // Test that the batchNormLayer can be followed by a ConvLayer
 TEST(Layer, batchNorm) {
-    FLAGS_use_gpu = false;
-    TestConfig configBN;
-    const int CHANNELS = 6272;
-    const int IMG_SIZE = 1;
-    configBN.layerConfig.set_type("batch_norm");
-    configBN.layerConfig.set_name("bn");
-    configBN.layerConfig.set_size(CHANNELS * IMG_SIZE * IMG_SIZE);
-    configBN.layerConfig.set_active_type("relu");
-    configBN.biasSize = CHANNELS;
-    configBN.inputDefs.push_back({INPUT_DATA, "layer_0",
+  FLAGS_use_gpu = false;
+  TestConfig configBN;
+  const int CHANNELS = 6272;
+  const int IMG_SIZE = 1;
+  configBN.layerConfig.set_type("batch_norm");
+  configBN.layerConfig.set_name("bn");
+  configBN.layerConfig.set_size(CHANNELS * IMG_SIZE * IMG_SIZE);
+  configBN.layerConfig.set_active_type("relu");
+  configBN.biasSize = CHANNELS;
+  configBN.inputDefs.push_back({INPUT_DATA,
+                                "layer_0",
                                 /* dim= */ IMG_SIZE * IMG_SIZE * CHANNELS,
                                 /* paraSize= */ CHANNELS});
 
-    configBN.inputDefs.push_back({INPUT_DATA, "layer_1_running_mean",
-                                    1, CHANNELS});
-    configBN.inputDefs.back().isStatic = true;
-    configBN.inputDefs.push_back({INPUT_DATA, "layer_2_running_var",
-                                    1, CHANNELS});
-    configBN.inputDefs.back().isStatic = true;
-
-    LayerInputConfig* input = configBN.layerConfig.add_inputs();
-    configBN.layerConfig.add_inputs();
-    configBN.layerConfig.add_inputs();
-
-    ImageConfig* img_conf = input->mutable_image_conf();
-    img_conf->set_channels(CHANNELS);
-    img_conf->set_img_size(IMG_SIZE);
-
-    // Setting up conv-layer config
-    TestConfig config;
-    config.biasSize = 64;
-    config.layerConfig.set_type("exconv");
-    config.layerConfig.set_num_filters(64);
-    config.layerConfig.set_partial_sum(1);
-    config.layerConfig.set_shared_biases(true);
-
-    config.inputDefs.push_back({INPUT_DATA, "bn", 6272, 204800});
-    input = config.layerConfig.add_inputs();
-    ConvConfig* conv = input->mutable_conv_conf();
-    conv->set_filter_size(5);
-    conv->set_filter_size_y(5);
-    conv->set_channels(128);
-    conv->set_padding(1);
-    conv->set_padding_y(1);
-    conv->set_stride(2);
-    conv->set_stride_y(2);
-    conv->set_groups(1);
-    conv->set_filter_channels(conv->channels() / conv->groups());
-    conv->set_img_size(7);
-    conv->set_output_x(3);
-    config.layerConfig.set_size(conv->output_x() * conv->output_x() *
-                                config.layerConfig.num_filters());
-    config.layerConfig.set_name("conv");
-
-    // data layer initialize
-    std::vector<DataLayerPtr> dataLayers;
-    LayerMap layerMap;
-    vector<Argument> datas;
-    initDataLayer(configBN, &dataLayers, &datas, &layerMap, "batch_norm",
-                  100, false, false);
-    // test layer initialize
-    std::vector<ParameterPtr> parameters;
-    LayerPtr bnLayer;
-    initTestLayer(configBN, &layerMap, &parameters, &bnLayer);
-
-    std::vector<ParameterPtr> parameters2;
-    LayerPtr convLayer;
-    initTestLayer(config, &layerMap, &parameters2, &convLayer);
-
-    bnLayer->forward(PASS_GC);
-    convLayer->forward(PASS_GC);
-
-    CHECK_EQ(convLayer->getOutputValue()->getHeight(), 100);
-    CHECK_EQ(convLayer->getOutputValue()->getWidth(), 576);
+  configBN.inputDefs.push_back(
+      {INPUT_DATA, "layer_1_running_mean", 1, CHANNELS});
+  configBN.inputDefs.back().isStatic = true;
+  configBN.inputDefs.push_back(
+      {INPUT_DATA, "layer_2_running_var", 1, CHANNELS});
+  configBN.inputDefs.back().isStatic = true;
+
+  LayerInputConfig* input = configBN.layerConfig.add_inputs();
+  configBN.layerConfig.add_inputs();
+  configBN.layerConfig.add_inputs();
+
+  ImageConfig* img_conf = input->mutable_image_conf();
+  img_conf->set_channels(CHANNELS);
+  img_conf->set_img_size(IMG_SIZE);
+
+  // Setting up conv-layer config
+  TestConfig config;
+  config.biasSize = 64;
+  config.layerConfig.set_type("exconv");
+  config.layerConfig.set_num_filters(64);
+  config.layerConfig.set_partial_sum(1);
+  config.layerConfig.set_shared_biases(true);
+
+  config.inputDefs.push_back({INPUT_DATA, "bn", 6272, 204800});
+  input = config.layerConfig.add_inputs();
+  ConvConfig* conv = input->mutable_conv_conf();
+  conv->set_filter_size(5);
+  conv->set_filter_size_y(5);
+  conv->set_channels(128);
+  conv->set_padding(1);
+  conv->set_padding_y(1);
+  conv->set_stride(2);
+  conv->set_stride_y(2);
+  conv->set_groups(1);
+  conv->set_filter_channels(conv->channels() / conv->groups());
+  conv->set_img_size(7);
+  conv->set_output_x(3);
+  config.layerConfig.set_size(conv->output_x() * conv->output_x() *
+                              config.layerConfig.num_filters());
+  config.layerConfig.set_name("conv");
+
+  // data layer initialize
+  std::vector<DataLayerPtr> dataLayers;
+  LayerMap layerMap;
+  vector<Argument> datas;
+  initDataLayer(configBN,
+                &dataLayers,
+                &datas,
+                &layerMap,
+                "batch_norm",
+                100,
+                false,
+                false);
+  // test layer initialize
+  std::vector<ParameterPtr> parameters;
+  LayerPtr bnLayer;
+  initTestLayer(configBN, &layerMap, &parameters, &bnLayer);
+
+  std::vector<ParameterPtr> parameters2;
+  LayerPtr convLayer;
+  initTestLayer(config, &layerMap, &parameters2, &convLayer);
+
+  bnLayer->forward(PASS_GC);
+  convLayer->forward(PASS_GC);
+
+  CHECK_EQ(convLayer->getOutputValue()->getHeight(), 100);
+  CHECK_EQ(convLayer->getOutputValue()->getWidth(), 576);
 }
 
 int main(int argc, char** argv) {
diff --git a/paddle/gserver/tests/test_ConvTrans.cpp b/paddle/gserver/tests/test_ConvTrans.cpp
index 3af3f08f40166de2aa5b23fb0b5abd3e3fcbf4b3..83100e3bec7e61c7d7751754ace760729e9adb27 100644
--- a/paddle/gserver/tests/test_ConvTrans.cpp
+++ b/paddle/gserver/tests/test_ConvTrans.cpp
@@ -13,17 +13,17 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include <gtest/gtest.h>
-#include <vector>
 #include <string>
-#include "paddle/gserver/layers/DataLayer.h"
+#include <vector>
 #include "ModelConfig.pb.h"
-#include "paddle/trainer/Trainer.h"
-#include "paddle/utils/GlobalConstants.h"
+#include "paddle/gserver/layers/DataLayer.h"
 #include "paddle/gserver/layers/ExpandConvTransLayer.h"
 #include "paddle/math/MathUtils.h"
+#include "paddle/trainer/Trainer.h"
+#include "paddle/utils/GlobalConstants.h"
 
-#include "TestUtil.h"
 #include "LayerGradUtil.h"
+#include "TestUtil.h"
 
 using namespace paddle;  // NOLINT
 using namespace std;     // NOLINT
diff --git a/paddle/gserver/tests/test_ConvUnify.cpp b/paddle/gserver/tests/test_ConvUnify.cpp
index d59acf96acc84b4cf4982656dadd27a89fac78b8..02763406a34da16da52fa0247c6c469c2418a914 100644
--- a/paddle/gserver/tests/test_ConvUnify.cpp
+++ b/paddle/gserver/tests/test_ConvUnify.cpp
@@ -13,17 +13,17 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include <gtest/gtest.h>
-#include <vector>
 #include <string>
-#include "paddle/gserver/layers/DataLayer.h"
+#include <vector>
 #include "ModelConfig.pb.h"
-#include "paddle/trainer/Trainer.h"
-#include "paddle/utils/GlobalConstants.h"
+#include "paddle/gserver/layers/DataLayer.h"
 #include "paddle/gserver/layers/ExpandConvTransLayer.h"
 #include "paddle/math/MathUtils.h"
+#include "paddle/trainer/Trainer.h"
+#include "paddle/utils/GlobalConstants.h"
 
-#include "TestUtil.h"
 #include "LayerGradUtil.h"
+#include "TestUtil.h"
 
 using namespace paddle;  // NOLINT
 using namespace std;     // NOLINT
@@ -36,10 +36,17 @@ P_DECLARE_bool(prev_batch_state);
 
 // Do one forward pass of convTrans layer and check to see if its output
 // matches the given result
-MatrixPtr doOneConvTest(size_t imgSize, size_t output_x, size_t stride,
-                    size_t padding, size_t filter_size, size_t channel,
-                    size_t numfilters, size_t groups, MatrixPtr& inputData,
-                    real* param, bool useGpu) {
+MatrixPtr doOneConvTest(size_t imgSize,
+                        size_t output_x,
+                        size_t stride,
+                        size_t padding,
+                        size_t filter_size,
+                        size_t channel,
+                        size_t numfilters,
+                        size_t groups,
+                        MatrixPtr& inputData,
+                        real* param,
+                        bool useGpu) {
   TestConfig config;
   config.biasSize = numfilters;
   if (useGpu) {
@@ -51,11 +58,10 @@ MatrixPtr doOneConvTest(size_t imgSize, size_t output_x, size_t stride,
   config.layerConfig.set_partial_sum(1);
   config.layerConfig.set_shared_biases(true);
 
-  size_t weightSize = channel* filter_size * filter_size *
-      config.layerConfig.num_filters() / groups;
-  config.inputDefs.push_back({INPUT_DATA, "layer_0",
-                              imgSize * imgSize * channel,
-                              weightSize});
+  size_t weightSize = channel * filter_size * filter_size *
+                      config.layerConfig.num_filters() / groups;
+  config.inputDefs.push_back(
+      {INPUT_DATA, "layer_0", imgSize * imgSize * channel, weightSize});
   LayerInputConfig* input = config.layerConfig.add_inputs();
   ConvConfig* conv = input->mutable_conv_conf();
   conv->set_filter_size(filter_size);
@@ -66,7 +72,7 @@ MatrixPtr doOneConvTest(size_t imgSize, size_t output_x, size_t stride,
   conv->set_stride(stride);
   conv->set_stride_y(stride);
   conv->set_groups(groups);
-  conv->set_filter_channels(channel/groups);
+  conv->set_filter_channels(channel / groups);
   conv->set_img_size(imgSize);
   conv->set_output_x(output_x);
 
@@ -77,8 +83,8 @@ MatrixPtr doOneConvTest(size_t imgSize, size_t output_x, size_t stride,
   std::vector<DataLayerPtr> dataLayers;
   LayerMap layerMap;
   vector<Argument> datas;
-  initDataLayer(config, &dataLayers, &datas, &layerMap, "conv",
-                1, false, useGpu);
+  initDataLayer(
+      config, &dataLayers, &datas, &layerMap, "conv", 1, false, useGpu);
   dataLayers[0]->getOutputValue()->zeroMem();
   dataLayers[0]->getOutputValue()->copyFrom(*inputData);
 
@@ -88,106 +94,124 @@ MatrixPtr doOneConvTest(size_t imgSize, size_t output_x, size_t stride,
   initTestLayer(config, &layerMap, &parameters, &convLayer);
   convLayer->getBiasParameter()->zeroMem();
   convLayer->getParameters()[0]->zeroMem();
-  convLayer->getParameters()[0]->getBuf(PARAMETER_VALUE)->copyFrom(param,
-      weightSize);
+  convLayer->getParameters()[0]
+      ->getBuf(PARAMETER_VALUE)
+      ->copyFrom(param, weightSize);
   convLayer->forward(PASS_GC);
 
   return convLayer->getOutputValue();
 }
 
 TEST(Layer, convParaUnified) {
-  #ifndef PADDLE_ONLY_CPU
-    MatrixPtr input, resultCpu, resultGpu;
-    input = Matrix::create(1, 4 * 4, false, false);
-    float inputData[] = {1, 2, 3, 4,
-                         5, 6, 7, 8,
-                         9, 10, 11, 12,
-                         13, 14, 15, 16};
-    float param[] = {1, 2, 3, 4, 5, 6, 7, 8, 9,
-                     9, 8, 7, 6, 5, 4, 3, 2, 1};
-
-    input->setData(inputData);
-
-    resultCpu = doOneConvTest(/* imgSize */ 4,
-                   /* output_x */ 2,
-                   /* stride */ 1,
-                   /* padding */ 0,
-                   /* filter_size */ 3,
-                   /*channel*/ 1,
-                   /*numfilters*/ 2,
-                   /*groups*/ 1,
-                   input, param, false);
-
-    resultGpu = doOneConvTest(/* imgSize */ 4,
-                       /* output_x */ 2,
-                       /* stride */ 1,
-                       /* padding */ 0,
-                       /* filter_size */ 3,
-                       /*channel*/ 1,
-                       /*numfilters*/ 2,
-                       /*groups*/ 1,
-                       input, param, true);
-    checkMatrixEqual(resultCpu, resultGpu);
-
-    input = Matrix::create(1, 3 * 3 * 2, false, false);
-    float inputData2[] = {1, 2, 3,
-                          4, 5, 6,
-                          7, 8, 9,
-
-                          10, 11, 12,
-                          13, 14, 15,
-                          16, 17, 18};
-    float param2[] = {1, 2, 3, 4, 5, 6, 7, 8,
-                      8, 7, 6, 5, 4, 3, 2, 1};
-
-    input->setData(inputData2);
-
-    resultCpu = doOneConvTest(/* imgSize */ 3,
-                   /* output_x */ 2,
-                   /* stride */ 1,
-                   /* padding */ 0,
-                   /* filter_size */ 2,
-                   /*channel*/ 2,
-                   /*numfilters*/ 2,
-                   /*groups*/ 1,
-                   input, param2, false);
-
-    resultGpu = doOneConvTest(/* imgSize */ 3,
-                       /* output_x */ 2,
-                       /* stride */ 1,
-                       /* padding */ 0,
-                       /* filter_size */ 2,
-                       /*channel*/ 2,
-                       /*numfilters*/ 2,
-                       /*groups*/ 1,
-                       input, param2, true);
-    checkMatrixEqual(resultCpu, resultGpu);
-
-
-    float param3[] = {1, 2, 3, 4,
-                      4, 3, 2, 1};
-
-    resultCpu = doOneConvTest(/* imgSize */ 3,
-                   /* output_x */ 2,
-                   /* stride */ 1,
-                   /* padding */ 0,
-                   /* filter_size */ 2,
-                   /*channel*/ 2,
-                   /*numfilters*/ 2,
-                   /*groups*/ 2,
-                   input, param3, false);
-
-    resultGpu = doOneConvTest(/* imgSize */ 3,
-                       /* output_x */ 2,
-                       /* stride */ 1,
-                       /* padding */ 0,
-                       /* filter_size */ 2,
-                       /*channel*/ 2,
-                       /*numfilters*/ 2,
-                       /*groups*/ 2,
-                       input, param3, true);
-    checkMatrixEqual(resultCpu, resultGpu);
-  #endif
+#ifndef PADDLE_ONLY_CPU
+  MatrixPtr input, resultCpu, resultGpu;
+  input = Matrix::create(1, 4 * 4, false, false);
+  float inputData[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  float param[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 8, 7, 6, 5, 4, 3, 2, 1};
+
+  input->setData(inputData);
+
+  resultCpu = doOneConvTest(/* imgSize */ 4,
+                            /* output_x */ 2,
+                            /* stride */ 1,
+                            /* padding */ 0,
+                            /* filter_size */ 3,
+                            /*channel*/ 1,
+                            /*numfilters*/ 2,
+                            /*groups*/ 1,
+                            input,
+                            param,
+                            false);
+
+  resultGpu = doOneConvTest(/* imgSize */ 4,
+                            /* output_x */ 2,
+                            /* stride */ 1,
+                            /* padding */ 0,
+                            /* filter_size */ 3,
+                            /*channel*/ 1,
+                            /*numfilters*/ 2,
+                            /*groups*/ 1,
+                            input,
+                            param,
+                            true);
+  checkMatrixEqual(resultCpu, resultGpu);
+
+  input = Matrix::create(1, 3 * 3 * 2, false, false);
+  float inputData2[] = {1,
+                        2,
+                        3,
+                        4,
+                        5,
+                        6,
+                        7,
+                        8,
+                        9,
+
+                        10,
+                        11,
+                        12,
+                        13,
+                        14,
+                        15,
+                        16,
+                        17,
+                        18};
+  float param2[] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1};
+
+  input->setData(inputData2);
+
+  resultCpu = doOneConvTest(/* imgSize */ 3,
+                            /* output_x */ 2,
+                            /* stride */ 1,
+                            /* padding */ 0,
+                            /* filter_size */ 2,
+                            /*channel*/ 2,
+                            /*numfilters*/ 2,
+                            /*groups*/ 1,
+                            input,
+                            param2,
+                            false);
+
+  resultGpu = doOneConvTest(/* imgSize */ 3,
+                            /* output_x */ 2,
+                            /* stride */ 1,
+                            /* padding */ 0,
+                            /* filter_size */ 2,
+                            /*channel*/ 2,
+                            /*numfilters*/ 2,
+                            /*groups*/ 1,
+                            input,
+                            param2,
+                            true);
+  checkMatrixEqual(resultCpu, resultGpu);
+
+  float param3[] = {1, 2, 3, 4, 4, 3, 2, 1};
+
+  resultCpu = doOneConvTest(/* imgSize */ 3,
+                            /* output_x */ 2,
+                            /* stride */ 1,
+                            /* padding */ 0,
+                            /* filter_size */ 2,
+                            /*channel*/ 2,
+                            /*numfilters*/ 2,
+                            /*groups*/ 2,
+                            input,
+                            param3,
+                            false);
+
+  resultGpu = doOneConvTest(/* imgSize */ 3,
+                            /* output_x */ 2,
+                            /* stride */ 1,
+                            /* padding */ 0,
+                            /* filter_size */ 2,
+                            /*channel*/ 2,
+                            /*numfilters*/ 2,
+                            /*groups*/ 2,
+                            input,
+                            param3,
+                            true);
+  checkMatrixEqual(resultCpu, resultGpu);
+#endif
 }
 
 int main(int argc, char** argv) {
diff --git a/paddle/gserver/tests/test_Evaluator.cpp b/paddle/gserver/tests/test_Evaluator.cpp
index 2c20f3a52f8f10f91d7ac7dcdd21dc3f93d9e227..7a930aebcf4ae7ab163c497d4d9545fdcf4f8eb5 100644
--- a/paddle/gserver/tests/test_Evaluator.cpp
+++ b/paddle/gserver/tests/test_Evaluator.cpp
@@ -15,8 +15,8 @@ limitations under the License. */
 #include <gtest/gtest.h>
 #include <vector>
 #include "ModelConfig.pb.h"
-#include "paddle/trainer/Trainer.h"
 #include "TestUtil.h"
+#include "paddle/trainer/Trainer.h"
 
 using namespace paddle;  // NOLINT
 using namespace std;     // NOLINT
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index 7983d9fe64c61648a2939ddc610a0f819e338577..9f8b197df554a67ebcdd3cff2c5d7d91bfc1932d 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -17,8 +17,8 @@ limitations under the License. */
 #include <vector>
 #include "ModelConfig.pb.h"
 #include "paddle/gserver/layers/DataLayer.h"
-#include "paddle/trainer/Trainer.h"
 #include "paddle/math/MathUtils.h"
+#include "paddle/trainer/Trainer.h"
 
 #include "LayerGradUtil.h"
 #include "TestUtil.h"
diff --git a/paddle/gserver/tests/test_MultinomialSampler.cpp b/paddle/gserver/tests/test_MultinomialSampler.cpp
index fc164da8eab993dd8659b7f9cf34d2ef654c37de..eadf40ade091ae8b3e19d7dc6c999288e8e88c1b 100644
--- a/paddle/gserver/tests/test_MultinomialSampler.cpp
+++ b/paddle/gserver/tests/test_MultinomialSampler.cpp
@@ -20,8 +20,8 @@ limitations under the License. */
 #undef PADDLE_DISABLE_TIMER
 #include "paddle/utils/Stat.h"
 
-#include "paddle/utils/Util.h"
 #include "paddle/gserver/layers/MultinomialSampler.h"
+#include "paddle/utils/Util.h"
 
 using namespace paddle;  // NOLINT
 using namespace std;     // NOLINT
diff --git a/paddle/gserver/tests/test_NetworkCompare.cpp b/paddle/gserver/tests/test_NetworkCompare.cpp
index ff6b5ab0d040bc09f03613f05123c46e720e4681..baa55aa0252cb63db7c4aa92cf9b6933199273fa 100644
--- a/paddle/gserver/tests/test_NetworkCompare.cpp
+++ b/paddle/gserver/tests/test_NetworkCompare.cpp
@@ -13,14 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #undef PADDLE_DISABLE_TIMER
+#include <gtest/gtest.h>
 #include <paddle/utils/PythonUtil.h>
-#include <cstdlib>
 #include <algorithm>
-#include <gtest/gtest.h>
+#include <cstdlib>
 
+#include "TestUtil.h"
 #include "paddle/trainer/Trainer.h"
 #include "paddle/utils/Stat.h"
-#include "TestUtil.h"
 
 using namespace paddle;  // NOLINT
 using namespace std;     // NOLINT
diff --git a/paddle/gserver/tests/test_ProtoDataProvider.cpp b/paddle/gserver/tests/test_ProtoDataProvider.cpp
index d5b8017cd117ab24b5f987cf34c9cbb87925007b..d421b6e2f2536e266883508ff29cbec731c9d7e3 100644
--- a/paddle/gserver/tests/test_ProtoDataProvider.cpp
+++ b/paddle/gserver/tests/test_ProtoDataProvider.cpp
@@ -17,8 +17,8 @@ limitations under the License. */
 
 #include <gtest/gtest.h>
 
-#include "paddle/utils/Util.h"
 #include "paddle/gserver/dataproviders/ProtoDataProvider.h"
+#include "paddle/utils/Util.h"
 
 #include "TestUtil.h"
 
diff --git a/paddle/gserver/tests/test_RecurrentLayer.cpp b/paddle/gserver/tests/test_RecurrentLayer.cpp
index 3f26b710e92b78f295402f1ba53e01242ab0a486..cd96ca7c848afd7a2aa38df3343bee102aa5e83a 100644
--- a/paddle/gserver/tests/test_RecurrentLayer.cpp
+++ b/paddle/gserver/tests/test_RecurrentLayer.cpp
@@ -13,11 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include <gtest/gtest.h>
-#include <vector>
 #include <paddle/utils/Version.h>
+#include <vector>
+#include "ModelConfig.pb.h"
 #include "paddle/gserver/layers/DataLayer.h"
 #include "paddle/gserver/layers/Layer.h"
-#include "ModelConfig.pb.h"
 
 #include "TestUtil.h"
 
@@ -220,8 +220,8 @@ TEST(Layer, RecurrentLayer) {
 }
 
 #define protected public
-#include "paddle/gserver/layers/LstmLayer.h"
 #include "paddle/gserver/layers/GatedRecurrentLayer.h"
+#include "paddle/gserver/layers/LstmLayer.h"
 template <class T>
 class TestRecurrentLayer {
 public:
diff --git a/paddle/gserver/tests/test_SelectiveFCLayer.cpp b/paddle/gserver/tests/test_SelectiveFCLayer.cpp
index c588f69446e86965d6bd3a4c47cf0da1337d4a5a..4f3a95a535b0a47df88ac5ba7a367d25e57e5f74 100644
--- a/paddle/gserver/tests/test_SelectiveFCLayer.cpp
+++ b/paddle/gserver/tests/test_SelectiveFCLayer.cpp
@@ -12,17 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include <gtest/gtest.h>
+#include <math.h>
 #include <paddle/utils/PythonUtil.h>
+#include <algorithm>
 #include <cstdlib>
 #include <ctime>
-#include <math.h>
-#include <gtest/gtest.h>
-#include <algorithm>
+#include "ModelConfig.pb.h"
 #include "paddle/gserver/layers/DataLayer.h"
-#include "paddle/gserver/layers/Layer.h"
 #include "paddle/gserver/layers/FullyConnectedLayer.h"
+#include "paddle/gserver/layers/Layer.h"
 #include "paddle/gserver/layers/SelectiveFullyConnectedLayer.h"
-#include "ModelConfig.pb.h"
 #include "paddle/math/CpuSparseMatrix.h"
 #include "paddle/trainer/Trainer.h"
 
diff --git a/paddle/gserver/tests/test_WarpCTCLayer.cpp b/paddle/gserver/tests/test_WarpCTCLayer.cpp
index e526a27906aee1990fa3e9da85cf9258af26776b..700425412cebe8416f900fa702503db98722c0ee 100644
--- a/paddle/gserver/tests/test_WarpCTCLayer.cpp
+++ b/paddle/gserver/tests/test_WarpCTCLayer.cpp
@@ -14,11 +14,11 @@ limitations under the License. */
 
 #include <gtest/gtest.h>
 #include <paddle/utils/Version.h>
-#include "paddle/gserver/layers/Layer.h"
-#include "paddle/gserver/layers/DataLayer.h"
+#include "ModelConfig.pb.h"
 #include "paddle/gserver/layers/CTCLayer.h"
+#include "paddle/gserver/layers/DataLayer.h"
+#include "paddle/gserver/layers/Layer.h"
 #include "paddle/gserver/layers/WarpCTCLayer.h"
-#include "ModelConfig.pb.h"
 
 #include "TestUtil.h"
 
diff --git a/paddle/math/Allocator.h b/paddle/math/Allocator.h
index 4d0a1506bee3d905cd2c352e0d5395feab6e1212..666a8b8368e3e2ebc522902c176d7491d2920d2a 100644
--- a/paddle/math/Allocator.h
+++ b/paddle/math/Allocator.h
@@ -14,8 +14,8 @@ limitations under the License. */
 
 #pragma once
 
-#include <mutex>
 #include <stdlib.h>
+#include <mutex>
 #include "hl_gpu.h"
 #include "paddle/utils/Logging.h"
 
diff --git a/paddle/math/BaseMatrix.h b/paddle/math/BaseMatrix.h
index 368557bb26b3b437600d658e3f88ddfede973dca..2933c20fbad930248c41969d88d45cf397b9dcf8 100644
--- a/paddle/math/BaseMatrix.h
+++ b/paddle/math/BaseMatrix.h
@@ -13,10 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
-#include <cstddef>
 #include <stdint.h>
-#include "paddle/utils/TypeDefs.h"
+#include <cstddef>
 #include "TensorExpression.h"
+#include "paddle/utils/TypeDefs.h"
 
 namespace paddle {
 
diff --git a/paddle/math/CpuSparseMatrix.cpp b/paddle/math/CpuSparseMatrix.cpp
index 324c7ec0ca8c69618acc7be652f497291b1af39b..b5d5b6ef615829fc1e24ccd417e2f0b3312f072d 100644
--- a/paddle/math/CpuSparseMatrix.cpp
+++ b/paddle/math/CpuSparseMatrix.cpp
@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "hl_gpu.h"
 #include "CpuSparseMatrix.h"
 #include "SparseMatrix.h"
+#include "float.h"
+#include "hl_gpu.h"
 #include "paddle/math/MathUtils.h"
 #include "paddle/utils/Util.h"
-#include "float.h"
 
 namespace paddle {
 
@@ -656,9 +656,9 @@ void CpuSparseMatrix::trimFrom(const CpuSparseMatrix& src) {
   if (format_ == SPARSE_CSR) {
     int* srcCols = src.getCols();
     size_t numLessWidth =
-        std::count_if(srcCols,
-                      srcCols + src.getElementCnt(),
-                      [this](size_t n) { return n < this->width_; });
+        std::count_if(srcCols, srcCols + src.getElementCnt(), [this](size_t n) {
+          return n < this->width_;
+        });
     resize(height_, width_, numLessWidth, valueType_, format_);
     rows_[0] = 0;
     size_t index = 0;
diff --git a/paddle/math/MathFunctions.cpp b/paddle/math/MathFunctions.cpp
index 037525b40233848e6f1d82cc32dbc2209fe52fc0..d7aa1184872d5a6129becca1f6e282776c9dbe15 100644
--- a/paddle/math/MathFunctions.cpp
+++ b/paddle/math/MathFunctions.cpp
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "MathFunctions.h"
-#include "hl_matrix_ops.cuh"
 #include "hl_matrix_apply.cuh"
+#include "hl_matrix_ops.cuh"
 
 namespace paddle {
 
diff --git a/paddle/math/MathUtils.cpp b/paddle/math/MathUtils.cpp
index 1fb7655c5a04b82db72aefb8b3147020de8f4630..5bbc3e4e3725f186373072440a93f967178e0b27 100644
--- a/paddle/math/MathUtils.cpp
+++ b/paddle/math/MathUtils.cpp
@@ -14,8 +14,8 @@ limitations under the License. */
 
 #include "MathUtils.h"
 #include <algorithm>
-#include "paddle/utils/Logging.h"
 #include "Vector.h"
+#include "paddle/utils/Logging.h"
 
 namespace paddle {
 
diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h
index 395143a4b1af84882f51b40a494b64c7260542df..5685cb7bcbbb6b90687790953d676e3792f36f36 100644
--- a/paddle/math/Matrix.h
+++ b/paddle/math/Matrix.h
@@ -14,20 +14,20 @@ limitations under the License. */
 
 #pragma once
 
+#include <stdint.h>
 #include <memory>
 #include <thread>
-#include <stdint.h>
 
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/ThreadLocal.h"
 
 #include <hl_gpu.h>
 
+#include "BaseMatrix.h"
 #include "MemoryHandle.h"
-#include "paddle/utils/TypeDefs.h"
 #include "Vector.h"
 #include "paddle/utils/ThreadLocal.h"
-#include "BaseMatrix.h"
+#include "paddle/utils/TypeDefs.h"
 
 namespace paddle {
 
@@ -408,7 +408,7 @@ public:
     LOG(FATAL) << "Not implemented";
   }
 
-  virtual void addBias(Matrix& b, real scale, bool sharedBias) {
+  void addBias(Matrix& b, real scale, bool sharedBias) {
     if (!sharedBias) {
       addBias(b, scale);
     } else {
@@ -425,7 +425,7 @@ public:
     LOG(FATAL) << "Not implemented";
   }
 
-  virtual void collectBias(Matrix& a, real scale, bool sharedBias) {
+  void collectBias(Matrix& a, real scale, bool sharedBias) {
     if (!sharedBias) {
       collectBias(a, scale);
     } else {
diff --git a/paddle/math/MatrixBitCode.cpp b/paddle/math/MatrixBitCode.cpp
index 6390d4b6a5284dee15e65d3347b7d2c5b2b0a163..cea912d3ca02715c203814d13529aadfd9d3b7fb 100644
--- a/paddle/math/MatrixBitCode.cpp
+++ b/paddle/math/MatrixBitCode.cpp
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
-#include "paddle/utils/Util.h"
 #include "Matrix.h"
 #include "hl_gpu.h"
+#include "paddle/utils/Logging.h"
+#include "paddle/utils/Util.h"
 
 namespace paddle {
 
diff --git a/paddle/math/MemoryHandle.cpp b/paddle/math/MemoryHandle.cpp
index 4c4a827b23aa2c1c7cc8be1e69660bf3b780816f..84afb5944c3ea4aa3b8f44646b23d18b2903281b 100644
--- a/paddle/math/MemoryHandle.cpp
+++ b/paddle/math/MemoryHandle.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <cmath>
 #include "MemoryHandle.h"
+#include <cmath>
 #include "Storage.h"
 
 namespace paddle {
diff --git a/paddle/math/PoolAllocator.h b/paddle/math/PoolAllocator.h
index 1544cb2cfca830c608d78c0f93032416820d604c..c06efa9ac77a5659b242d039c38455e2ee9b0db6 100644
--- a/paddle/math/PoolAllocator.h
+++ b/paddle/math/PoolAllocator.h
@@ -14,11 +14,11 @@ limitations under the License. */
 
 #pragma once
 
+#include <map>
 #include <memory>
 #include <mutex>
-#include <vector>
 #include <unordered_map>
-#include <map>
+#include <vector>
 #include "Allocator.h"
 
 namespace paddle {
diff --git a/paddle/math/SparseMatrix.cpp b/paddle/math/SparseMatrix.cpp
index d2779cc9f51a8440598d6d31b428f240363d7b26..9154503c2132a740aaa42f90eb7061156403ac00 100644
--- a/paddle/math/SparseMatrix.cpp
+++ b/paddle/math/SparseMatrix.cpp
@@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "SparseMatrix.h"
 #include <algorithm>
+#include <iostream>
 #include <vector>
 #include "hl_gpu.h"
-#include "SparseMatrix.h"
-#include "paddle/utils/Util.h"
 #include "hl_top_k.h"
-#include <iostream>
+#include "paddle/utils/Util.h"
 
 namespace paddle {
 
@@ -537,11 +537,9 @@ void GpuSparseMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
     dataVec.emplace_back(
         rows.getData()[i], cols_full.getData()[i], value.getData()[i]);
   }
-  std::sort(dataVec.begin(),
-            dataVec.end(),
-            [](Element a, Element b) {
-              return a.row < b.row || (a.row == b.row && a.col < b.col);
-            });
+  std::sort(dataVec.begin(), dataVec.end(), [](Element a, Element b) {
+    return a.row < b.row || (a.row == b.row && a.col < b.col);
+  });
 
   /*get sorted data, row index, and col index, put them in the right place*/
   cols.resize(height_ + 1);
diff --git a/paddle/math/SparseMatrix.h b/paddle/math/SparseMatrix.h
index f8d9ffc29fb721d5af2df3cc3f133419636d8fef..bd96a3301ded2fd89bd31b94f42b0cb4718cbcb7 100644
--- a/paddle/math/SparseMatrix.h
+++ b/paddle/math/SparseMatrix.h
@@ -14,8 +14,8 @@ limitations under the License. */
 
 #pragma once
 #include <cstddef>
-#include "Matrix.h"
 #include "CpuSparseMatrix.h"
+#include "Matrix.h"
 
 namespace paddle {
 
diff --git a/paddle/math/SparseRowMatrix.h b/paddle/math/SparseRowMatrix.h
index 2fee1b39fe34af534ce1a739da0f35de6826581b..badb4b9c1cce4d93c24aac47c8ed742c4d7d38fa 100644
--- a/paddle/math/SparseRowMatrix.h
+++ b/paddle/math/SparseRowMatrix.h
@@ -14,10 +14,10 @@ limitations under the License. */
 
 #pragma once
 
-#include <algorithm>
 #include <string.h>
-#include "paddle/utils/CommandLineParser.h"
+#include <algorithm>
 #include "Matrix.h"
+#include "paddle/utils/CommandLineParser.h"
 #include "paddle/utils/Util.h"
 
 P_DECLARE_bool(allow_inefficient_sparse_update);
diff --git a/paddle/math/Storage.cpp b/paddle/math/Storage.cpp
index 0170b4efb80284ba930dbbd9fb0bf0ef9b6e2d6c..f9a2c12cd539ab4785847a58d3fedfc384e05232 100644
--- a/paddle/math/Storage.cpp
+++ b/paddle/math/Storage.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Util.h"
-#include "Allocator.h"
 #include "Storage.h"
+#include "Allocator.h"
+#include "paddle/utils/Util.h"
 
 P_DEFINE_int32(pool_limit_size,
                536870912,
diff --git a/paddle/math/Storage.h b/paddle/math/Storage.h
index 36583201827fd53dd26d40dd15d1f03e6b0498fa..06a66b5f14643153f82a1596096fc28d3e47e3fd 100644
--- a/paddle/math/Storage.h
+++ b/paddle/math/Storage.h
@@ -16,8 +16,8 @@ limitations under the License. */
 
 #include <mutex>
 #include <vector>
-#include "paddle/utils/Locks.h"
 #include "PoolAllocator.h"
+#include "paddle/utils/Locks.h"
 
 namespace paddle {
 
diff --git a/paddle/math/TensorEvaluate.h b/paddle/math/TensorEvaluate.h
index 346ed7ab13a82599e9c5e06723dcaed82659aafd..9de2099b850d1723fe085eeed97c5b141629eec1 100644
--- a/paddle/math/TensorEvaluate.h
+++ b/paddle/math/TensorEvaluate.h
@@ -15,8 +15,8 @@ limitations under the License. */
 #pragma once
 
 #include <algorithm>
-#include "paddle/utils/Logging.h"
 #include "hl_base.h"
+#include "paddle/utils/Logging.h"
 
 namespace paddle {
 
diff --git a/paddle/math/TensorExpression.h b/paddle/math/TensorExpression.h
index 7f28ad83bb078ed257aaa0e22bca85597c9ca486..9bd789e8c511f33d8415e421281e99eb10fc63fe 100644
--- a/paddle/math/TensorExpression.h
+++ b/paddle/math/TensorExpression.h
@@ -13,11 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
-#include <cstddef>
 #include <stdint.h>
-#include "paddle/utils/TypeDefs.h"
-#include "paddle/utils/Logging.h"
+#include <cstddef>
 #include "hl_tensor_ops.h"
+#include "paddle/utils/Logging.h"
+#include "paddle/utils/TypeDefs.h"
 
 namespace paddle {
 
diff --git a/paddle/math/TrainingAlgorithmOp.h b/paddle/math/TrainingAlgorithmOp.h
index 2dc56f69e5cf1a31038ca737ffe6a8e43c32fa9e..881a8d72d888083ad87a536c127009d68c51076e 100644
--- a/paddle/math/TrainingAlgorithmOp.h
+++ b/paddle/math/TrainingAlgorithmOp.h
@@ -14,8 +14,8 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/utils/Logging.h"
 #include "BaseMatrix.h"
+#include "paddle/utils/Logging.h"
 
 namespace paddle {
 
diff --git a/paddle/math/Vector.cpp b/paddle/math/Vector.cpp
index 484f4c925255c524fbde3d513098ca14c870a12b..eaa1cdce305c2f9d7a517e9e8c8606dc1f70780b 100644
--- a/paddle/math/Vector.cpp
+++ b/paddle/math/Vector.cpp
@@ -12,17 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Util.h"
 #include "Vector.h"
+#include "paddle/utils/Util.h"
 
 #include <memory>
-#include "paddle/utils/Logging.h"
-#include "paddle/utils/ThreadLocal.h"
-#include "paddle/utils/Thread.h"
-#include "paddle/utils/Flags.h"
 #include "Matrix.h"
 #include "hl_gpu.h"
 #include "hl_table_apply.h"
+#include "paddle/utils/Flags.h"
+#include "paddle/utils/Logging.h"
+#include "paddle/utils/Thread.h"
+#include "paddle/utils/ThreadLocal.h"
 
 namespace paddle {
 
@@ -754,8 +754,7 @@ void ParallelCpuVectorT<real>::exec(SyncThreadPool::JobFunc func) {
 }
 
 template <class T>
-CpuGpuVectorT<T>::CpuGpuVectorT(size_t size, bool useGpu)
-    : sync_(nullptr) {
+CpuGpuVectorT<T>::CpuGpuVectorT(size_t size, bool useGpu) : sync_(nullptr) {
   if (!useGpu) {
     cpuVectorT_ = std::make_shared<CpuVectorT<T>>(size);
   } else {
diff --git a/paddle/math/Vector.h b/paddle/math/Vector.h
index 535580ac3739b99bee420dd0c834807e6f7b8b35..8a24103bd4107035c8068c24ec3be6ec06957112 100644
--- a/paddle/math/Vector.h
+++ b/paddle/math/Vector.h
@@ -14,15 +14,15 @@ limitations under the License. */
 
 #pragma once
 
-#include <memory>
 #include <cmath>
+#include <memory>
 
 #include <hl_gpu.h>
 
-#include "MemoryHandle.h"
-#include "paddle/utils/TypeDefs.h"
 #include "BaseMatrix.h"
+#include "MemoryHandle.h"
 #include "paddle/utils/Thread.h"
+#include "paddle/utils/TypeDefs.h"
 
 namespace paddle {
 
diff --git a/paddle/math/tests/CMakeLists.txt b/paddle/math/tests/CMakeLists.txt
index fe5177291c21c3505c3694201b36b54397150ccf..a3ea078509704f305672d0b02d272de0f6c97f51 100644
--- a/paddle/math/tests/CMakeLists.txt
+++ b/paddle/math/tests/CMakeLists.txt
@@ -16,12 +16,10 @@ add_simple_unittest(test_CpuGpuVector)
 add_simple_unittest(test_Allocator)
 
 if(WITH_GPU)
-    if(COMPILER_SUPPORT_CXX11)
-    	CUDA_ADD_EXECUTABLE(test_Tensor test_Tensor.cu)
-		link_paddle_test(test_Tensor)
-        CUDA_ADD_EXECUTABLE(test_lazyAssign test_lazyAssign.cu)
-        link_paddle_test(test_lazyAssign)
-    endif()
+    CUDA_ADD_EXECUTABLE(test_Tensor test_Tensor.cu)
+    link_paddle_test(test_Tensor)
+    CUDA_ADD_EXECUTABLE(test_lazyAssign test_lazyAssign.cu)
+    link_paddle_test(test_lazyAssign)
 else()
     compile_cu_as_cpp(test_Tensor.cu)
     add_unittest(test_Tensor test_Tensor.cu)
diff --git a/paddle/math/tests/OriginalOptimizerApi.h b/paddle/math/tests/OriginalOptimizerApi.h
index ddcdd6bb5122a2316d3b29bc23b054c9ba2a1882..0188372771d97942a0761c673d40d040528ff59a 100644
--- a/paddle/math/tests/OriginalOptimizerApi.h
+++ b/paddle/math/tests/OriginalOptimizerApi.h
@@ -14,8 +14,8 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/utils/GlobalConstants.h"
 #include "paddle/math/Vector.h"
+#include "paddle/utils/GlobalConstants.h"
 
 using namespace paddle;  // NOLINT
 
diff --git a/paddle/math/tests/TestUtils.h b/paddle/math/tests/TestUtils.h
index 5f9fab7245004f8449c3865b76f5f29f7f47d646..c3020961880484a7944f8cc61377a4f08122e403 100644
--- a/paddle/math/tests/TestUtils.h
+++ b/paddle/math/tests/TestUtils.h
@@ -40,9 +40,9 @@ limitations under the License. */
 */
 
 #include <gtest/gtest.h>
+#include "TensorCheck.h"
 #include "paddle/math/Matrix.h"
 #include "paddle/math/SparseMatrix.h"
-#include "TensorCheck.h"
 
 namespace autotest {
 
diff --git a/paddle/math/tests/test_Allocator.cpp b/paddle/math/tests/test_Allocator.cpp
index 440fcda0fe1fd8ef2f395148eb430058c80986df..33e0952efedddec16acf6153209e14f18fd48134 100644
--- a/paddle/math/tests/test_Allocator.cpp
+++ b/paddle/math/tests/test_Allocator.cpp
@@ -13,11 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include <gtest/gtest.h>
-#include "paddle/utils/Util.h"
 #include "paddle/utils/Logging.h"
+#include "paddle/utils/Util.h"
 #define private public
-#include "paddle/math/MemoryHandle.h"
 #include "paddle/math/Allocator.h"
+#include "paddle/math/MemoryHandle.h"
 #include "paddle/math/PoolAllocator.h"
 
 using namespace paddle;  // NOLINT
diff --git a/paddle/math/tests/test_BaseMatrix.cpp b/paddle/math/tests/test_BaseMatrix.cpp
index a4683918cade77c1e99461a668fb76b6c5eac726..cc7c1e7eb2734605cb278a4b97cab22bdba1594e 100644
--- a/paddle/math/tests/test_BaseMatrix.cpp
+++ b/paddle/math/tests/test_BaseMatrix.cpp
@@ -20,8 +20,8 @@ limitations under the License. */
  */
 
 #include <gtest/gtest.h>
-#include "paddle/math/BaseMatrix.h"
 #include "TestUtils.h"
+#include "paddle/math/BaseMatrix.h"
 
 using paddle::BaseMatrix;
 using paddle::Matrix;
diff --git a/paddle/math/tests/test_CpuGpuVector.cpp b/paddle/math/tests/test_CpuGpuVector.cpp
index c671735875b671c3959e28ec54b78d7f4ba3ea41..624fa20ca58bca3f16fa567487bbaa5d9656e1b1 100644
--- a/paddle/math/tests/test_CpuGpuVector.cpp
+++ b/paddle/math/tests/test_CpuGpuVector.cpp
@@ -14,10 +14,10 @@ limitations under the License. */
 
 #ifndef PADDLE_ONLY_CPU
 
-#include "paddle/utils/Util.h"
+#include <gtest/gtest.h>
 #include "paddle/math/Vector.h"
+#include "paddle/utils/Util.h"
 #include "test_matrixUtil.h"
-#include <gtest/gtest.h>
 
 using namespace paddle;  // NOLINT
 
diff --git a/paddle/math/tests/test_ExecViaCpu.cpp b/paddle/math/tests/test_ExecViaCpu.cpp
index b328ebf554cb2903f48e07754d296c3f8e96221e..27216ddb58eccd7fd52e121e795baf463ea69f51 100644
--- a/paddle/math/tests/test_ExecViaCpu.cpp
+++ b/paddle/math/tests/test_ExecViaCpu.cpp
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <paddle/utils/PythonUtil.h>
 #include <gtest/gtest.h>
-#include <vector>
+#include <paddle/utils/PythonUtil.h>
 #include <paddle/utils/Util.h>
+#include <vector>
 #include "paddle/math/SparseMatrix.h"
 
 using namespace paddle;  // NOLINT
diff --git a/paddle/math/tests/test_GpuProfiler.cpp b/paddle/math/tests/test_GpuProfiler.cpp
index e5fd6f4523e0bbbf54f48949ca88235e07122fcb..d490078d909e7940e83a6f461f9386eeda02f53c 100644
--- a/paddle/math/tests/test_GpuProfiler.cpp
+++ b/paddle/math/tests/test_GpuProfiler.cpp
@@ -14,12 +14,12 @@ limitations under the License. */
 
 #ifndef PADDLE_ONLY_CPU
 
-#include "paddle/utils/Util.h"
-#include "paddle/math/Matrix.h"
-#include "paddle/math/SparseMatrix.h"
 #include <gtest/gtest.h>
 #include "paddle/gserver/tests/TestUtil.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/SparseMatrix.h"
 #include "paddle/utils/Stat.h"
+#include "paddle/utils/Util.h"
 
 using namespace paddle;  // NOLINT
 using namespace std;     // NOLINT
@@ -52,7 +52,9 @@ void MatrixCheckErr(const Matrix& matrix1, const Matrix& matrix2) {
   EXPECT_EQ(count, 0) << "There are " << count << " different element.";
 }
 
-void testBilinearFwdBwd(int numSamples, int imgSizeH, int imgSizeW,
+void testBilinearFwdBwd(int numSamples,
+                        int imgSizeH,
+                        int imgSizeW,
                         int channels) {
   int inWidth = imgSizeH * imgSizeW * channels;
   int outWidth = 2 * imgSizeH * 2 * imgSizeW * channels;
@@ -73,10 +75,22 @@ void testBilinearFwdBwd(int numSamples, int imgSizeH, int imgSizeW,
   {
     // nvprof: GPU Proflier
     REGISTER_GPU_PROFILER("testBilinearFwdBwd");
-    target->bilinearForward(*input, imgSizeH, imgSizeW,
-        2 * imgSizeH, 2 * imgSizeW, channels, ratioH, ratioW);
-    targetGpu->bilinearForward(*inputGpu, imgSizeH, imgSizeW,
-        2 * imgSizeH, 2 * imgSizeW, channels, ratioH, ratioW);
+    target->bilinearForward(*input,
+                            imgSizeH,
+                            imgSizeW,
+                            2 * imgSizeH,
+                            2 * imgSizeW,
+                            channels,
+                            ratioH,
+                            ratioW);
+    targetGpu->bilinearForward(*inputGpu,
+                               imgSizeH,
+                               imgSizeW,
+                               2 * imgSizeH,
+                               2 * imgSizeW,
+                               channels,
+                               ratioH,
+                               ratioW);
   }
 
   // check
@@ -88,8 +102,8 @@ void testBilinearFwdBwd(int numSamples, int imgSizeH, int imgSizeW,
   MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true);
 
   MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false);
-  MatrixPtr targetGpuGrad = GpuMatrix::create(numSamples, outWidth, false,
-                                              true);
+  MatrixPtr targetGpuGrad =
+      GpuMatrix::create(numSamples, outWidth, false, true);
   MatrixPtr targetCheckGrad =
       CpuMatrix::create(numSamples, inWidth, false, false);
 
@@ -98,10 +112,22 @@ void testBilinearFwdBwd(int numSamples, int imgSizeH, int imgSizeW,
   inputGpuGrad->copyFrom(*inputGrad);
   targetGpuGrad->copyFrom(*targetGrad);
 
-  inputGrad->bilinearBackward(*targetGrad, 2 * imgSizeH, 2 * imgSizeW,
-      imgSizeH, imgSizeW, channels, ratioH, ratioW);
-  inputGpuGrad->bilinearBackward(*targetGpuGrad, 2 * imgSizeH, 2 * imgSizeW,
-      imgSizeH, imgSizeW, channels, ratioH, ratioW);
+  inputGrad->bilinearBackward(*targetGrad,
+                              2 * imgSizeH,
+                              2 * imgSizeW,
+                              imgSizeH,
+                              imgSizeW,
+                              channels,
+                              ratioH,
+                              ratioW);
+  inputGpuGrad->bilinearBackward(*targetGpuGrad,
+                                 2 * imgSizeH,
+                                 2 * imgSizeW,
+                                 imgSizeH,
+                                 imgSizeW,
+                                 channels,
+                                 ratioH,
+                                 ratioW);
 
   // check
   targetCheckGrad->copyFrom(*inputGpuGrad);
@@ -116,8 +142,9 @@ TEST(Profiler, testBilinearFwdBwd) {
     // nvprof: GPU Proflier
     REGISTER_GPU_PROFILER("testBilinearFwdBwd");
     // Paddle built-in timer
-    REGISTER_TIMER_INFO("testBilinearFwdBwd",
-      "numSamples = 10, channels = 16, imgSizeX = 64, imgSizeY = 64");
+    REGISTER_TIMER_INFO(
+        "testBilinearFwdBwd",
+        "numSamples = 10, channels = 16, imgSizeX = 64, imgSizeY = 64");
     testBilinearFwdBwd(numSamples, imgSize, imgSize, channels);
   }
   globalStat.printAllStatus();
@@ -128,8 +155,9 @@ int main(int argc, char** argv) {
   initMain(argc, argv);
 
   // nvprof: GPU Proflier
-  REGISTER_GPU_PROFILER("RecursiveProfilingTest",
-    "numSamples = 10, channels = 16, imgSizeX = 64, imgSizeY = 64");
+  REGISTER_GPU_PROFILER(
+      "RecursiveProfilingTest",
+      "numSamples = 10, channels = 16, imgSizeX = 64, imgSizeY = 64");
 
   return RUN_ALL_TESTS();
 }
diff --git a/paddle/math/tests/test_SIMDFunctions.cpp b/paddle/math/tests/test_SIMDFunctions.cpp
index 2c54121d996ce1778a060552bf5cf33aaae93a64..f62843310d886ba7d449e793066b19a7cc7bd5a9 100644
--- a/paddle/math/tests/test_SIMDFunctions.cpp
+++ b/paddle/math/tests/test_SIMDFunctions.cpp
@@ -17,10 +17,10 @@ limitations under the License. */
 
 #include <gtest/gtest.h>
 
-#include <random>
-#include <functional>
 #include <algorithm>
+#include <functional>
 #include <memory>
+#include <random>
 
 #include <stdlib.h>
 #include <time.h>
diff --git a/paddle/math/tests/test_TrainingAlgorithm.cpp b/paddle/math/tests/test_TrainingAlgorithm.cpp
index 93a930cc2f0f90887c649cf4411adbab2f99ed38..1bf6a0cc43ea16c955b1b1cd7ef61d2e7100726a 100644
--- a/paddle/math/tests/test_TrainingAlgorithm.cpp
+++ b/paddle/math/tests/test_TrainingAlgorithm.cpp
@@ -13,11 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include <gtest/gtest.h>
-#include "paddle/utils/Util.h"
-#include "paddle/math/TrainingAlgorithmOp.h"
 #include "OriginalOptimizerApi.h"
-#include "TensorCheck.h"
 #include "PerfUtils.h"
+#include "TensorCheck.h"
+#include "paddle/math/TrainingAlgorithmOp.h"
+#include "paddle/utils/Util.h"
 
 using namespace paddle;  // NOLINT
 
diff --git a/paddle/math/tests/test_batchTranspose.cpp b/paddle/math/tests/test_batchTranspose.cpp
index 88631c62b893d54a3b1ca16317fbf42faa332be7..9925e24dc14294ec70806ffd9cc496ea01beaa43 100644
--- a/paddle/math/tests/test_batchTranspose.cpp
+++ b/paddle/math/tests/test_batchTranspose.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "test_matrixUtil.h"
 #include "hl_batch_transpose.h"
+#include "test_matrixUtil.h"
 
 using namespace paddle;  // NOLINT
 
diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp
index 713792d82b3c569d26375780cc19fa0bd6cca391..62de5b25e4cc803d9ccc605fba29a1d29a3ea69c 100644
--- a/paddle/math/tests/test_matrixCompare.cpp
+++ b/paddle/math/tests/test_matrixCompare.cpp
@@ -16,13 +16,13 @@ limitations under the License. */
 /// This unittest checks GpuMatrix/CpuMatrix get same result, so disable when
 /// only cpu version.
 
-#include "paddle/utils/Util.h"
-#include "paddle/math/Matrix.h"
-#include "paddle/math/SparseMatrix.h"
 #include <gtest/gtest.h>
+#include "TensorCheck.h"
 #include "paddle/gserver/tests/TestUtil.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/SparseMatrix.h"
 #include "paddle/utils/Stat.h"
-#include "TensorCheck.h"
+#include "paddle/utils/Util.h"
 
 using namespace paddle;  // NOLINT
 using namespace std;     // NOLINT
diff --git a/paddle/math/tests/test_perturbation.cpp b/paddle/math/tests/test_perturbation.cpp
index eaf4dfea664d0e405f39fbc021bcae98d6e3ca83..60ebae015381a3901c14d0cd4c1225e54ac5726f 100644
--- a/paddle/math/tests/test_perturbation.cpp
+++ b/paddle/math/tests/test_perturbation.cpp
@@ -14,10 +14,10 @@ limitations under the License. */
 
 #ifndef PADDLE_ONLY_CPU
 
-#include <cmath>
+#include <cuda_runtime.h>
 #include <gtest/gtest.h>
+#include <cmath>
 #include <vector>
-#include <cuda_runtime.h>
 #include "hl_cuda.h"
 #include "hl_perturbation_util.cuh"
 
diff --git a/paddle/math/tests/test_sparseMatrixCompare.cpp b/paddle/math/tests/test_sparseMatrixCompare.cpp
index eff2c502bb88e454a57d3ae534ca60caccf93d4a..6f6de238bacaade85d728b7d773145326229015a 100644
--- a/paddle/math/tests/test_sparseMatrixCompare.cpp
+++ b/paddle/math/tests/test_sparseMatrixCompare.cpp
@@ -17,10 +17,10 @@ limitations under the License. */
 //  so disable when
 /// only cpu version.
 
-#include "paddle/utils/Util.h"
+#include <gtest/gtest.h>
 #include "paddle/math/Matrix.h"
+#include "paddle/utils/Util.h"
 #include "test_matrixUtil.h"
-#include <gtest/gtest.h>
 
 using namespace paddle;  // NOLINT
 using namespace std;     // NOLINT
diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp
index b632a11bbdac07a9ce45f38aebe076f72a7b0870..e91daa371768e6f7f3846f95027cc6320926052c 100644
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -551,11 +551,10 @@ void Argument::getSeqInfo(std::vector<SeqInfo>* seqInfo) const {
     }
     seqInfo->push_back(info);
   }
-  std::sort(seqInfo->begin(),
-            seqInfo->end(),
-            [](const SeqInfo& a, const SeqInfo& b) {
-              return a.topLevelLength > b.topLevelLength;
-            });
+  std::sort(
+      seqInfo->begin(), seqInfo->end(), [](const SeqInfo& a, const SeqInfo& b) {
+        return a.topLevelLength > b.topLevelLength;
+      });
 }
 
 void Argument::checkSubset() const {
diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h
index 69d57a28c058173494d54c22a3eda33f6e339db9..afd2de0202bf0f14ec3d4c5b856455a3488e41f6 100644
--- a/paddle/parameter/Argument.h
+++ b/paddle/parameter/Argument.h
@@ -18,9 +18,9 @@ limitations under the License. */
 
 #include "paddle/math/Matrix.h"
 #include "paddle/math/Vector.h"
+#include "paddle/parameter/Parameter.h"
 #include "paddle/utils/Locks.h"
 #include "paddle/utils/Util.h"
-#include "paddle/parameter/Parameter.h"
 
 namespace paddle {
 
diff --git a/paddle/parameter/FirstOrderOptimizer.cpp b/paddle/parameter/FirstOrderOptimizer.cpp
index 17268d37150c10dc4524184c05fa1f935f31847d..630f15c8cfbe6cca16094d0eb6677702339aa730 100644
--- a/paddle/parameter/FirstOrderOptimizer.cpp
+++ b/paddle/parameter/FirstOrderOptimizer.cpp
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Util.h"
-#include "paddle/utils/Flags.h"
-#include "paddle/math/TrainingAlgorithmOp.h"
 #include "FirstOrderOptimizer.h"
+#include "paddle/math/TrainingAlgorithmOp.h"
+#include "paddle/utils/Flags.h"
+#include "paddle/utils/Util.h"
 
 #include <cmath>
 
diff --git a/paddle/parameter/ParallelParameter.cpp b/paddle/parameter/ParallelParameter.cpp
index b3182306a4115a5d5d9ad32f57437c66da8a95d9..cea77e5b1787c25ecb9ccd42e948bf90973fd4cb 100644
--- a/paddle/parameter/ParallelParameter.cpp
+++ b/paddle/parameter/ParallelParameter.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include <fstream>
+#include "paddle/utils/Logging.h"
 
 #include "ParallelParameter.h"
 
diff --git a/paddle/parameter/ParallelParameter.h b/paddle/parameter/ParallelParameter.h
index b0fe82d3c40ef1c670642d405da46b1ac51223b5..417e386dc74d308a6c0aefa2640f0f37de8dbf1f 100644
--- a/paddle/parameter/ParallelParameter.h
+++ b/paddle/parameter/ParallelParameter.h
@@ -16,19 +16,19 @@ limitations under the License. */
 
 #include <stdint.h>
 
+#include <sys/time.h>
+#include <unistd.h>
 #include <iostream>
 #include <string>
 #include <vector>
-#include <sys/time.h>
-#include <unistd.h>
 
 #include "hl_gpu.h"
-#include "paddle/utils/Flags.h"
-#include "paddle/utils/Locks.h"
+#include "paddle/math/Vector.h"
 #include "paddle/parameter/Parameter.h"
 #include "paddle/parameter/ParameterUpdateFunctions.h"
+#include "paddle/utils/Flags.h"
+#include "paddle/utils/Locks.h"
 #include "paddle/utils/TypeDefs.h"
-#include "paddle/math/Vector.h"
 
 #include "ParameterConfig.pb.h"
 
diff --git a/paddle/parameter/Parameter.cpp b/paddle/parameter/Parameter.cpp
index 3b06650e0ca0b36fb7e764e7ef924b5592242518..986ae1539b6ef7745e94be6101e94b40c287be94 100644
--- a/paddle/parameter/Parameter.cpp
+++ b/paddle/parameter/Parameter.cpp
@@ -12,19 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "Parameter.h"
 #include <fstream>
-#include "paddle/math/MathUtils.h"
 #include "AverageOptimizer.h"
 #include "FirstOrderOptimizer.h"
-#include "Parameter.h"
-#include "paddle/utils/Logging.h"
 #include "OptimizerFunctions.h"
 #include "OptimizerWithRegularizer.h"
 #include "ParameterUpdateFunctions.h"
-#include "paddle/math/SparseRowMatrix.h"
-#include "paddle/math/CpuSparseMatrix.h"
 #include "hl_gpu.h"
+#include "paddle/math/CpuSparseMatrix.h"
+#include "paddle/math/MathUtils.h"
+#include "paddle/math/SparseRowMatrix.h"
 #include "paddle/utils/CommandLineParser.h"
+#include "paddle/utils/Logging.h"
 
 P_DEFINE_int32(enable_grad_share,
                (100 * 1024 * 1024),
diff --git a/paddle/parameter/Parameter.h b/paddle/parameter/Parameter.h
index 6b0600517a95bd82a113a4b6133254a6515559c6..532c6770e596c33dfe7fd42f32157b2c6c19e18e 100644
--- a/paddle/parameter/Parameter.h
+++ b/paddle/parameter/Parameter.h
@@ -23,14 +23,14 @@ limitations under the License. */
 #include "ParameterConfig.pb.h"
 #include "TrainerConfig.pb.h"
 
+#include "ParameterUpdaterHook.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/Vector.h"
+#include "paddle/utils/GlobalConstants.h"
 #include "paddle/utils/Locks.h"
+#include "paddle/utils/ThreadLocal.h"
 #include "paddle/utils/TypeDefs.h"
-#include "paddle/math/Vector.h"
-#include "paddle/math/Matrix.h"
 #include "paddle/utils/Util.h"
-#include "paddle/utils/ThreadLocal.h"
-#include "ParameterUpdaterHook.h"
-#include "paddle/utils/GlobalConstants.h"
 
 namespace paddle {
 
diff --git a/paddle/parameter/ParameterUpdateFunctions.h b/paddle/parameter/ParameterUpdateFunctions.h
index 7374843d80de7c2c93d3310b07f09ce1620910d2..2d277e47e7eafc118fa37343e93e8a331a260aa9 100644
--- a/paddle/parameter/ParameterUpdateFunctions.h
+++ b/paddle/parameter/ParameterUpdateFunctions.h
@@ -14,8 +14,8 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/utils/TypeDefs.h"
 #include "paddle/math/Vector.h"
+#include "paddle/utils/TypeDefs.h"
 
 namespace paddle {
 
diff --git a/paddle/parameter/ParameterUpdaterBase.cpp b/paddle/parameter/ParameterUpdaterBase.cpp
index b938270ce1884674b513b326192adb58af5ec9f4..49e2ae2b393f4a5e6c0986bc5e645011f5a3eca1 100644
--- a/paddle/parameter/ParameterUpdaterBase.cpp
+++ b/paddle/parameter/ParameterUpdaterBase.cpp
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <fstream>
-#include "paddle/utils/Logging.h"
 #include "ParameterUpdaterBase.h"
+#include <fstream>
 #include "hl_gpu.h"
+#include "paddle/utils/Logging.h"
 
 namespace paddle {
 
diff --git a/paddle/parameter/ParameterUpdaterHook.cpp b/paddle/parameter/ParameterUpdaterHook.cpp
index 466560c4376444f96927211d330f900cd4d66287..f826e8448c666bb3305c150f2bd95aade23223fb 100644
--- a/paddle/parameter/ParameterUpdaterHook.cpp
+++ b/paddle/parameter/ParameterUpdaterHook.cpp
@@ -14,16 +14,16 @@ limitations under the License. */
 
 #include "ParameterUpdaterHook.h"
 
+#include <atomic>
 #include <fstream>
-#include <unordered_map>
 #include <mutex>
-#include <atomic>
 #include <thread>
+#include <unordered_map>
 
 #include "paddle/math/Vector.h"
 #include "paddle/parameter/Parameter.h"
-#include "paddle/utils/Util.h"
 #include "paddle/utils/Flags.h"
+#include "paddle/utils/Util.h"
 
 namespace paddle {
 
@@ -156,7 +156,8 @@ private:
 
 static WeakKVCache<std::pair<std::string, int>,
                    IParameterUpdaterHook,
-                   StringIntPairHasher> g_hookCache_;
+                   StringIntPairHasher>
+    g_hookCache_;
 
 /**
  * ParameterUpdaterHook actually factory method.
diff --git a/paddle/parameter/Regularizer.cpp b/paddle/parameter/Regularizer.cpp
index 4420ee00311d57ae0832529d16645351ccb852b0..8511900150363a2247d508833eeb42b2d87beec1 100644
--- a/paddle/parameter/Regularizer.cpp
+++ b/paddle/parameter/Regularizer.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Util.h"
-#include "paddle/utils/Flags.h"
 #include "Regularizer.h"
+#include "paddle/utils/Flags.h"
+#include "paddle/utils/Util.h"
 
 namespace paddle {
 
diff --git a/paddle/parameter/Weight.cpp b/paddle/parameter/Weight.cpp
index f366a2b53f4a527fbb7ca9e0ecfe7ff3084b289e..3738a58d7f84081db9b6179cef9361322553a627 100644
--- a/paddle/parameter/Weight.cpp
+++ b/paddle/parameter/Weight.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Logging.h"
 #include "Weight.h"
+#include "paddle/utils/Logging.h"
 
 namespace paddle {
 
diff --git a/paddle/parameter/tests/test_common.cpp b/paddle/parameter/tests/test_common.cpp
index 4e4d0ccfa26a06e1516a03b5dbd61fad64513691..aa57a6346917b259dbb89f6ad2340fb8db28f3e3 100644
--- a/paddle/parameter/tests/test_common.cpp
+++ b/paddle/parameter/tests/test_common.cpp
@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <stdlib.h>
 #include <paddle/utils/Util.h>
+#include <stdlib.h>
 
 #include <gtest/gtest.h>
-#include <paddle/utils/Flags.h>
 #include <paddle/parameter/ParameterUpdateFunctions.h>
+#include <paddle/utils/Flags.h>
 #include <paddle/utils/Stat.h>
 #include <paddle/utils/Thread.h>
 
diff --git a/paddle/pserver/BaseClient.cpp b/paddle/pserver/BaseClient.cpp
index 62fafc18918c631e8b13dd4bf49e4db1cf203f96..a43def98c528c2cfbc65a40aa54b5d4a49961a34 100644
--- a/paddle/pserver/BaseClient.cpp
+++ b/paddle/pserver/BaseClient.cpp
@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <vector>
+#include "BaseClient.h"
 #include <string.h>
-#include "paddle/utils/Stat.h"
+#include <vector>
 #include "paddle/utils/CommandLineParser.h"
-#include "BaseClient.h"
+#include "paddle/utils/Stat.h"
 
 P_DECLARE_string(pservers);
 
diff --git a/paddle/pserver/BaseClient.h b/paddle/pserver/BaseClient.h
index 5924f80684ee031c403e72ad4f1c7d84011b431c..262afafbe2d61305a158d945fac2d3b265012cbd 100644
--- a/paddle/pserver/BaseClient.h
+++ b/paddle/pserver/BaseClient.h
@@ -14,11 +14,11 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/pserver/ProtoServer.h"
+#include "ParameterService.pb.h"
 #include "paddle/math/Matrix.h"
+#include "paddle/pserver/ProtoServer.h"
 #include "paddle/utils/Queue.h"
 #include "paddle/utils/TypeDefs.h"
-#include "ParameterService.pb.h"
 
 namespace paddle {
 
diff --git a/paddle/pserver/LightNetwork.cpp b/paddle/pserver/LightNetwork.cpp
index 9a398d4f455516f630040831623bec3fdc0a4bae..329dfb0fb38e8ad377539d8af6be228595baa719 100644
--- a/paddle/pserver/LightNetwork.cpp
+++ b/paddle/pserver/LightNetwork.cpp
@@ -12,23 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <sys/types.h>
-#include <sys/socket.h>
+#include <fcntl.h>
 #include <netdb.h>
 #include <netinet/in.h>
 #include <netinet/tcp.h>
-#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
 
 #include <arpa/inet.h>
-#include <sys/ioctl.h>
 #include <net/if.h>
 #include <net/if_arp.h>
+#include <sys/ioctl.h>
 #include <sstream>
 
 #include "LightNetwork.h"
-#include "paddle/utils/Util.h"
-#include "paddle/utils/StringUtil.h"
 #include "RDMANetwork.h"
+#include "paddle/utils/StringUtil.h"
+#include "paddle/utils/Util.h"
 
 /// quick ack can reduce the latency of small message
 P_DEFINE_bool(small_messages,
diff --git a/paddle/pserver/LightNetwork.h b/paddle/pserver/LightNetwork.h
index 7aff007a2704570c627672f14dc64a677262bb02..c4a06deb940e8f39af2fcb6de54de1b6cb2d1483 100644
--- a/paddle/pserver/LightNetwork.h
+++ b/paddle/pserver/LightNetwork.h
@@ -16,10 +16,10 @@ limitations under the License. */
 
 #include "SocketChannel.h"
 
+#include <atomic>
 #include <memory>
 #include <thread>
 #include <vector>
-#include <atomic>
 
 #include "paddle/utils/Thread.h"
 
diff --git a/paddle/pserver/ParameterClient2.cpp b/paddle/pserver/ParameterClient2.cpp
index 31418822b3f83aed471cdb970dbdbcaf928e7f15..86fd1c5276c97f2de86b8c8eb8627721611d099c 100644
--- a/paddle/pserver/ParameterClient2.cpp
+++ b/paddle/pserver/ParameterClient2.cpp
@@ -15,34 +15,27 @@ limitations under the License. */
 #include <unistd.h>
 
 #include "ParameterClient2.h"
-#include "paddle/utils/StringUtil.h"
+#include "paddle/math/SparseRowMatrix.h"
 #include "paddle/utils/Flags.h"
 #include "paddle/utils/Stat.h"
-#include "paddle/math/SparseRowMatrix.h"
+#include "paddle/utils/StringUtil.h"
 
 P_DEFINE_string(pservers, "127.0.0.1", "Comma separated addresses of pservers");
 P_DEFINE_int32(parallel_thread_num, 1, "Thread number for parameter send");
 
 namespace paddle {
 
-template <class T>
-void copyToRepeatedField(google::protobuf::RepeatedField<T>* dest,
-                         const T* src,
+template <typename T1, typename T2>
+void copyToRepeatedField(google::protobuf::RepeatedField<T1>* dest,
+                         const T2* src,
                          size_t size) {
   dest->Clear();
   dest->Reserve(size);
-
   for (size_t i = 0; i < size; ++i) {
     dest->AddAlreadyReserved(src[i]);
   }
 }
 
-template <class T>
-void copyToRepeatedField(const std::vector<T>& src,
-                         google::protobuf::RepeatedField<T>* dest) {
-  copyToRepeatedField(dest, &src[0], src.size());
-}
-
 ParameterClient2::ParameterClient2(bool separate, int port, int numPorts)
     : BaseClient(separate, numPorts), port_(port) {
 #ifndef PADDLE_DISABLE_TIMER
@@ -618,6 +611,8 @@ void PreparedOperations::addOperationHelper(Operation* op, CpuMatrixPtr mat) {
       pmat.mutable_values(), mat->getData(), pmat.num_cols() * pmat.num_rows());
 }
 
+static inline real addTwo(real a, double b) { return a + b; }
+
 void ParameterClient2::doOperation(PreparedOperations& ops,
                                    bool waitForGradient,
                                    bool sendBackGradient,
@@ -682,8 +677,11 @@ void ParameterClient2::doOperation(PreparedOperations& ops,
         CpuVectorPtr rvec = resultVectors[i];
         if (!rvec) continue;
         CHECK_EQ(rvec->getSize(), (size_t)vec.dim());
-        CpuVector avec(rvec->getSize(), const_cast<real*>(vec.values().data()));
-        rvec->add(avec);
+        std::transform(rvec->getData(),
+                       rvec->getData() + rvec->getSize(),
+                       vec.values().data(),
+                       rvec->getData(),
+                       addTwo);
       }
 
       CHECK_EQ(resultMatrices.size(), (size_t)result.matrices_size());
@@ -693,11 +691,12 @@ void ParameterClient2::doOperation(PreparedOperations& ops,
         if (!rmat) continue;
         CHECK_EQ(rmat->getHeight(), (size_t)mat.num_rows());
         CHECK_EQ(rmat->getWidth(), (size_t)mat.num_cols());
-        CpuMatrixPtr amat =
-            std::make_shared<CpuMatrix>(const_cast<real*>(mat.values().data()),
-                                        rmat->getHeight(),
-                                        rmat->getWidth());
-        rmat->add(*amat);
+
+        std::transform(rmat->getData(),
+                       rmat->getData() + rmat->getElementCnt(),
+                       mat.values().data(),
+                       rmat->getData(),
+                       addTwo);
       }
     }
   }
diff --git a/paddle/pserver/ParameterClient2.h b/paddle/pserver/ParameterClient2.h
index 0f180722e329ca62cc2598a4080fe8f173e87398..52553949498e1938c6dbbf8004946692cd1bfe0b 100644
--- a/paddle/pserver/ParameterClient2.h
+++ b/paddle/pserver/ParameterClient2.h
@@ -16,23 +16,23 @@ limitations under the License. */
 
 #include <atomic>
 #include <mutex>
-#include <vector>
 #include <unordered_map>
+#include <vector>
 
-#include "paddle/utils/Locks.h"
 #include "paddle/math/Matrix.h"
+#include "paddle/math/Vector.h"
 #include "paddle/parameter/Parameter.h"
+#include "paddle/pserver/BaseClient.h"
+#include "paddle/utils/Flags.h"
+#include "paddle/utils/Locks.h"
 #include "paddle/utils/Queue.h"
 #include "paddle/utils/TypeDefs.h"
 #include "paddle/utils/Util.h"
-#include "paddle/math/Vector.h"
-#include "paddle/utils/Flags.h"
-#include "paddle/pserver/BaseClient.h"
 
 #include "ParameterService.pb.h"
 
-#include "SparseParameterDistribution.h"
 #include "ProtoServer.h"
+#include "SparseParameterDistribution.h"
 
 P_DECLARE_int32(parallel_thread_num);
 
diff --git a/paddle/pserver/ParameterServer2.cpp b/paddle/pserver/ParameterServer2.cpp
index ac70efc64f99f3bfef5a6d12dc7ff560bd10d25d..2cb4c93535ffe2012abb45628cc0cfc329846fd9 100644
--- a/paddle/pserver/ParameterServer2.cpp
+++ b/paddle/pserver/ParameterServer2.cpp
@@ -21,14 +21,14 @@ limitations under the License. */
 
 #include "paddle/parameter/AverageOptimizer.h"
 #include "paddle/parameter/FirstOrderOptimizer.h"
-#include "paddle/utils/Flags.h"
 #include "paddle/parameter/OptimizerFunctions.h"
 #include "paddle/parameter/OptimizerWithRegularizer.h"
-#include "paddle/parameter/ParameterUpdateFunctions.h"
 #include "paddle/parameter/ParameterOptimizer.h"
+#include "paddle/parameter/ParameterUpdateFunctions.h"
 #include "paddle/parameter/Regularizer.h"
-#include "paddle/utils/Stat.h"
+#include "paddle/utils/Flags.h"
 #include "paddle/utils/GlobalConstants.h"
+#include "paddle/utils/Stat.h"
 
 P_DEFINE_int32(pserver_num_threads, 1, "number of threads for sync op exec");
 P_DEFINE_double(async_lagged_ratio_min,
diff --git a/paddle/pserver/ParameterServer2.h b/paddle/pserver/ParameterServer2.h
index 47122f363218e5956e1617df1765942644923ae7..61c139981e479cf6528dfb44ab6ae2f82b33bcc5 100644
--- a/paddle/pserver/ParameterServer2.h
+++ b/paddle/pserver/ParameterServer2.h
@@ -15,24 +15,24 @@ limitations under the License. */
 #pragma once
 
 #include <atomic>
+#include <limits>
 #include <mutex>
 #include <string>
-#include <vector>
-#include <unordered_map>
 #include <type_traits>
-#include <limits>
+#include <unordered_map>
+#include <vector>
 
 #include <stddef.h>
 #include <stdlib.h>
 
-#include "paddle/utils/Locks.h"
 #include "paddle/math/Matrix.h"
+#include "paddle/math/Vector.h"
 #include "paddle/parameter/Parameter.h"
 #include "paddle/parameter/ParameterOptimizer.h"
+#include "paddle/utils/Locks.h"
+#include "paddle/utils/Stat.h"
 #include "paddle/utils/ThreadLocal.h"
 #include "paddle/utils/TypeDefs.h"
-#include "paddle/math/Vector.h"
-#include "paddle/utils/Stat.h"
 
 #include "ParameterService.pb.h"
 
diff --git a/paddle/pserver/ParameterServer2Main.cpp b/paddle/pserver/ParameterServer2Main.cpp
index 1ba9b48c2382d97aeabcb33aee3b60720bab62d6..ffc521f2c143d95ff07c3825e0a746cb31743d9b 100644
--- a/paddle/pserver/ParameterServer2Main.cpp
+++ b/paddle/pserver/ParameterServer2Main.cpp
@@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Util.h"
-#include "paddle/utils/StringUtil.h"
 #include <fstream>
+#include "paddle/utils/StringUtil.h"
+#include "paddle/utils/Util.h"
 
-#include "paddle/utils/Flags.h"
 #include "ParameterServer2.h"
 #include "RDMANetwork.h"
+#include "paddle/utils/Flags.h"
 
 using namespace paddle;  // NOLINT
 
diff --git a/paddle/pserver/ProtoServer.h b/paddle/pserver/ProtoServer.h
index 97b7bf167d5c6564aa01da18a46fe4bfe1c9a9b5..3acdcc27dab532f964dc97636be020138180e780 100644
--- a/paddle/pserver/ProtoServer.h
+++ b/paddle/pserver/ProtoServer.h
@@ -100,7 +100,8 @@ protected:
                              ResponseCallback callback);
 
   typedef std::function<void(std::unique_ptr<MsgReader> msgReader,
-                             ResponseCallback callback)> ServiceFunction;
+                             ResponseCallback callback)>
+      ServiceFunction;
 
   /**
    * @brief register one RPC function in function mapping
diff --git a/paddle/pserver/SocketChannel.cpp b/paddle/pserver/SocketChannel.cpp
index f3e74257f6f8f8d83aac1b6f196f69ca7ff5c9fd..05998891649cee30e23e556d9311c3a383f43e10 100644
--- a/paddle/pserver/SocketChannel.cpp
+++ b/paddle/pserver/SocketChannel.cpp
@@ -14,11 +14,11 @@ limitations under the License. */
 
 #include "SocketChannel.h"
 
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/socket.h>
 #include <netdb.h>
 #include <netinet/in.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
 #include <unistd.h>
 #include "RDMANetwork.h"
 
diff --git a/paddle/pserver/SparseParameterDistribution.h b/paddle/pserver/SparseParameterDistribution.h
index dc63b065a7e91ece23a9ffbdbeea8683c7341e25..24b14106cf64060afa61ecede9e981301ea5634a 100644
--- a/paddle/pserver/SparseParameterDistribution.h
+++ b/paddle/pserver/SparseParameterDistribution.h
@@ -15,8 +15,8 @@ limitations under the License. */
 #pragma once
 #include <unistd.h>
 
-#include "paddle/utils/Logging.h"
 #include <atomic>
+#include "paddle/utils/Logging.h"
 
 namespace paddle {
 
diff --git a/paddle/pserver/test/SocketTest.cpp b/paddle/pserver/test/SocketTest.cpp
index 528f5e381eed0679388389633fa7e86822514811..6e63c4f67848bea118adde2e3ef0b0c5b64086c9 100644
--- a/paddle/pserver/test/SocketTest.cpp
+++ b/paddle/pserver/test/SocketTest.cpp
@@ -14,11 +14,11 @@ limitations under the License. */
 
 #include "paddle/utils/Util.h"
 
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/socket.h>
 #include <netdb.h>
 #include <netinet/in.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
 
 #include <thread>
 
diff --git a/paddle/pserver/test/test_ParameterServer2.cpp b/paddle/pserver/test/test_ParameterServer2.cpp
index 493b6d060ca14a970548c6f200615d707be92d37..4257a2308d727fc60cb5b2e7e507ec86c90cd96b 100644
--- a/paddle/pserver/test/test_ParameterServer2.cpp
+++ b/paddle/pserver/test/test_ParameterServer2.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include <gtest/gtest.h>
 #include <paddle/pserver/ParameterClient2.h>
 #include <paddle/pserver/ParameterServer2.h>
-#include <gtest/gtest.h>
 #include <paddle/utils/Flags.h>
 #include <paddle/utils/Util.h>
 
diff --git a/paddle/pserver/test/test_ProtoServer.cpp b/paddle/pserver/test/test_ProtoServer.cpp
index cfed0d30d3c2109d8ab5ca15e5855a3551b9ef87..3880dde5e3fb8a986a8fa870cb92a58d138d43de 100644
--- a/paddle/pserver/test/test_ProtoServer.cpp
+++ b/paddle/pserver/test/test_ProtoServer.cpp
@@ -16,10 +16,10 @@ limitations under the License. */
 
 #include <gtest/gtest.h>
 
-#include "paddle/utils/Stat.h"
+#include "ParameterService.pb.h"
 #include "paddle/math/Vector.h"
 #include "paddle/pserver/ProtoServer.h"
-#include "ParameterService.pb.h"
+#include "paddle/utils/Stat.h"
 
 P_DEFINE_string(server_addr, "127.0.0.1", "Server address");
 P_DEFINE_int64(dim, 50000000, "Data size");
diff --git a/paddle/py_paddle/util.py b/paddle/py_paddle/util.py
index d6bbf9a5a924db5e03b5acdb8ca2fe627ce5dd2c..ce105d249aaf3e838443d3e0cf5996fe8c783a22 100644
--- a/paddle/py_paddle/util.py
+++ b/paddle/py_paddle/util.py
@@ -559,10 +559,10 @@ def __monkey_patch_trainer__():
 
 
 def monkeypatches():
-    patches = [__monkeypatch_init_paddle__,
-               __monkeypatch_gradient_machine__,
-               __monkey_patch_protobuf_objects__,
-               __monkey_patch_parameter__,
-               __monkey_patch_trainer__]
+    patches = [
+        __monkeypatch_init_paddle__, __monkeypatch_gradient_machine__,
+        __monkey_patch_protobuf_objects__, __monkey_patch_parameter__,
+        __monkey_patch_trainer__
+    ]
     for patch in patches:
         patch()
diff --git a/paddle/scripts/travis/docs.sh b/paddle/scripts/travis/docs.sh
index c2a4809d75b97a9d8d8b83cf197e90bd62b48603..0bbb76a8a3caa27da0911af0fe87df7fbff617b4 100755
--- a/paddle/scripts/travis/docs.sh
+++ b/paddle/scripts/travis/docs.sh
@@ -47,17 +47,20 @@ if [ $? -eq 0 ]; then
 fi
 set -e
 
-# Commit
-git add .
-git config user.name "Travis CI"
-git config user.email "paddle-dev@baidu.com"
-git commit -m "Deploy to GitHub Pages: ${SHA}"
-
-# Set ssh private key
-openssl aes-256-cbc -K $SSL_KEY -iv $SSL_IV -in ../../paddle/scripts/travis/deploy_key.enc -out deploy_key -d
-chmod 600 deploy_key
-eval `ssh-agent -s`
-ssh-add deploy_key
-
-# Push
-git push $SSH_REPO $TARGET_BRANCH
+if [ -n $SSL_KEY ]; then  # Only push updated docs for github.com/PaddlePaddle/Paddle.
+  # Commit
+  git add .
+  git config user.name "Travis CI"
+  git config user.email "paddle-dev@baidu.com"
+  git commit -m "Deploy to GitHub Pages: ${SHA}"
+
+  # Set ssh private key
+  openssl aes-256-cbc -K $SSL_KEY -iv $SSL_IV -in ../../paddle/scripts/travis/deploy_key.enc -out deploy_key -d
+  chmod 600 deploy_key
+  eval `ssh-agent -s`
+  ssh-add deploy_key
+
+  # Push
+  git push $SSH_REPO $TARGET_BRANCH
+
+fi
diff --git a/paddle/scripts/travis/main.sh b/paddle/scripts/travis/main.sh
index c49d4546c24ac9304cd6f3c5940ed3d1d32ebb3d..13f2552d29db38041a73edca0acd202945c67484 100755
--- a/paddle/scripts/travis/main.sh
+++ b/paddle/scripts/travis/main.sh
@@ -5,6 +5,8 @@ if [ ${JOB} == "BUILD_AND_TEST" ]; then
   ./build_and_test.sh
 elif [ ${JOB} == "DOCS" ]; then
   ./docs.sh
+elif [ ${JOB} == "PRE_COMMIT" ]; then
+  ./precommit.sh
 else
   echo Unknown job ${JOB}
   exit 1
diff --git a/paddle/scripts/travis/precommit.sh b/paddle/scripts/travis/precommit.sh
new file mode 100755
index 0000000000000000000000000000000000000000..5ad84f1821646382818974d7de354d509fdc6ac7
--- /dev/null
+++ b/paddle/scripts/travis/precommit.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+function abort(){
+    echo "Your commit not fit PaddlePaddle code style" 1>&2
+    echo "Please use pre-commit scripts to auto-format your code" 1>&2
+    exit 1
+}
+
+trap 'abort' 0
+set -e
+source common.sh
+cd ..
+export PATH=/usr/bin:$PATH
+pre-commit install
+clang-format --version
+pre-commit run -a
+
+trap : 0
diff --git a/paddle/trainer/MergeModel.cpp b/paddle/trainer/MergeModel.cpp
index 8cb2873feb13e0ce420bc5133f14ecde6c3bfe76..1cf29a39b92cc26fa6706f857edc1eb2dff29a21 100644
--- a/paddle/trainer/MergeModel.cpp
+++ b/paddle/trainer/MergeModel.cpp
@@ -14,10 +14,10 @@ limitations under the License. */
 
 #include <memory>
 
-#include "paddle/utils/PythonUtil.h"
-#include "paddle/pserver/ParameterServer2.h"
 #include "ParamUtil.h"
 #include "Trainer.h"
+#include "paddle/pserver/ParameterServer2.h"
+#include "paddle/utils/PythonUtil.h"
 
 P_DEFINE_string(model_dir, "", "Directory for separated model files");
 P_DEFINE_string(model_file, "", "File for merged model file");
diff --git a/paddle/trainer/ParamUtil.cpp b/paddle/trainer/ParamUtil.cpp
index 200417ebfc51a2b07e785f994ce7e92d58ab01d3..ffbca42e106591ddeb2cefcfafbeb408c544371b 100644
--- a/paddle/trainer/ParamUtil.cpp
+++ b/paddle/trainer/ParamUtil.cpp
@@ -17,22 +17,22 @@ limitations under the License. */
 #include <fenv.h>
 #include <stdio.h>
 
-#include <iostream>
 #include <iomanip>
-#include <sstream>
+#include <iostream>
 #include <limits>
+#include <sstream>
 
 #include <google/protobuf/text_format.h>
 #include <paddle/utils/Version.h>
 
+#include "paddle/utils/GlobalConstants.h"
 #include "paddle/utils/PythonUtil.h"
 #include "paddle/utils/Stat.h"
 #include "paddle/utils/Util.h"
-#include "paddle/utils/GlobalConstants.h"
 
+#include "TesterConfig.h"
 #include "paddle/gserver/gradientmachines/NeuralNetwork.h"
 #include "paddle/gserver/layers/ValidationLayer.h"
-#include "TesterConfig.h"
 
 namespace paddle {
 
diff --git a/paddle/trainer/ParamUtil.h b/paddle/trainer/ParamUtil.h
index 8fa6fda75ca21c26e7de38fc3614a4794fb19a74..2e05595848760c9abd7d916003656c8103151abf 100644
--- a/paddle/trainer/ParamUtil.h
+++ b/paddle/trainer/ParamUtil.h
@@ -22,11 +22,11 @@ limitations under the License. */
 #include "paddle/gserver/dataproviders/DataProvider.h"
 #include "paddle/gserver/gradientmachines/GradientMachine.h"
 
+#include <stdlib.h>
+#include <fstream>
+#include "ParameterUpdater.h"
 #include "TrainerConfig.pb.h"
 #include "TrainerConfigHelper.h"
-#include "ParameterUpdater.h"
-#include <fstream>
-#include <stdlib.h>
 
 namespace paddle {
 
diff --git a/paddle/trainer/ParameterUpdater.h b/paddle/trainer/ParameterUpdater.h
index 81ac374425b4f9181b474c05fe21a6288af84456..e52b5cd318b4d647a4bd126adf2ecfaba08d8363 100644
--- a/paddle/trainer/ParameterUpdater.h
+++ b/paddle/trainer/ParameterUpdater.h
@@ -24,8 +24,8 @@ limitations under the License. */
 #include "paddle/parameter/Parameter.h"
 #include "paddle/parameter/ParameterUpdaterBase.h"
 
-#include "paddle/gserver/layers/Layer.h"
 #include "TrainerConfig.pb.h"
+#include "paddle/gserver/layers/Layer.h"
 
 #include <memory>
 #include <vector>
diff --git a/paddle/trainer/RemoteParameterUpdater.cpp b/paddle/trainer/RemoteParameterUpdater.cpp
index 702ea07f8ad34d64c4b6ecf932528b20696819bb..b7f7b93b8df091f7ccbe19a639295cba0554399a 100644
--- a/paddle/trainer/RemoteParameterUpdater.cpp
+++ b/paddle/trainer/RemoteParameterUpdater.cpp
@@ -14,8 +14,8 @@ limitations under the License. */
 
 #include "RemoteParameterUpdater.h"
 #include "Trainer.h"
-#include "paddle/utils/Stat.h"
 #include "paddle/utils/GlobalConstants.h"
+#include "paddle/utils/Stat.h"
 
 P_DECLARE_int32(trainer_id);
 P_DECLARE_string(save_dir);
diff --git a/paddle/trainer/RemoteParameterUpdater.h b/paddle/trainer/RemoteParameterUpdater.h
index 46ce4be1460380a6485b0ef98af49fc4abb7209c..66055c778e439a1edf7d1b6dd2e13b945fa73323 100644
--- a/paddle/trainer/RemoteParameterUpdater.h
+++ b/paddle/trainer/RemoteParameterUpdater.h
@@ -14,12 +14,12 @@ limitations under the License. */
 
 #pragma once
 
-#include <thread>
 #include <functional>
-#include "paddle/pserver/ParameterClient2.h"
+#include <thread>
 #include "ParameterUpdater.h"
-#include "paddle/utils/Util.h"
+#include "paddle/pserver/ParameterClient2.h"
 #include "paddle/utils/Queue.h"
+#include "paddle/utils/Util.h"
 
 namespace paddle {
 
diff --git a/paddle/trainer/Tester.h b/paddle/trainer/Tester.h
index ae7e0e93bff9c9c21f991ac6136a18ffd88fb176..e892744db278586f2fd5b3cb527aa7c17752c477 100644
--- a/paddle/trainer/Tester.h
+++ b/paddle/trainer/Tester.h
@@ -24,12 +24,12 @@ limitations under the License. */
 
 #include "TrainerConfig.pb.h"
 
-#include "ParameterUpdater.h"
+#include <stdlib.h>
+#include <fstream>
 #include "ParamUtil.h"
+#include "ParameterUpdater.h"
 #include "TesterConfig.h"
 #include "TrainerInternalConfig.h"
-#include <fstream>
-#include <stdlib.h>
 
 namespace paddle {
 
diff --git a/paddle/trainer/TesterConfig.h b/paddle/trainer/TesterConfig.h
index 9ff145a8a18b3e0704028a0c7ff94e31e2a216bf..68d4c931ff2df8e24acaa9fe6b35bfd613197c72 100644
--- a/paddle/trainer/TesterConfig.h
+++ b/paddle/trainer/TesterConfig.h
@@ -23,9 +23,9 @@ limitations under the License. */
 
 #include "TrainerConfig.pb.h"
 
-#include "ParameterUpdater.h"
-#include <fstream>
 #include <stdlib.h>
+#include <fstream>
+#include "ParameterUpdater.h"
 
 namespace paddle {
 
diff --git a/paddle/trainer/ThreadParameterUpdater.h b/paddle/trainer/ThreadParameterUpdater.h
index 492692dbe5b2209c7fc1eecc54f5bebea8d457a8..d01ac689f97f360b64d4e63032a804f1f24c83e2 100644
--- a/paddle/trainer/ThreadParameterUpdater.h
+++ b/paddle/trainer/ThreadParameterUpdater.h
@@ -14,13 +14,13 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/utils/Util.h"
 #include "paddle/parameter/AverageOptimizer.h"
 #include "paddle/parameter/FirstOrderOptimizer.h"
 #include "paddle/parameter/OptimizerFunctions.h"
 #include "paddle/parameter/OptimizerWithRegularizer.h"
 #include "paddle/parameter/Parameter.h"
 #include "paddle/parameter/Regularizer.h"
+#include "paddle/utils/Util.h"
 
 #include <memory>
 #include <vector>
diff --git a/paddle/trainer/Trainer.h b/paddle/trainer/Trainer.h
index f50b56143d314a2ad9493409e4d3674868520d53..cabbb4acd1135f74aca0a4a36116db412c2e89f9 100644
--- a/paddle/trainer/Trainer.h
+++ b/paddle/trainer/Trainer.h
@@ -22,13 +22,13 @@ limitations under the License. */
 #include "paddle/gserver/dataproviders/DataProvider.h"
 #include "paddle/gserver/gradientmachines/GradientMachine.h"
 
-#include "TrainerConfigHelper.h"
+#include <stdlib.h>
+#include <fstream>
+#include "ParamUtil.h"
 #include "ParameterUpdater.h"
-#include "TrainerInternal.h"
 #include "Tester.h"
-#include "ParamUtil.h"
-#include <fstream>
-#include <stdlib.h>
+#include "TrainerConfigHelper.h"
+#include "TrainerInternal.h"
 
 #ifdef PADDLE_METRIC_LEARNING
 #include "paddle/internals/metric_learning/MetricTrainer.h"
diff --git a/paddle/trainer/TrainerConfigHelper.h b/paddle/trainer/TrainerConfigHelper.h
index 2c5c492ce872bd76fec431b6c511caaf021cc0c2..f1366cc041b0d983e65a1bf5b02ec2128324c5a8 100644
--- a/paddle/trainer/TrainerConfigHelper.h
+++ b/paddle/trainer/TrainerConfigHelper.h
@@ -14,9 +14,9 @@ limitations under the License. */
 
 #pragma once
 
-#include <memory>
 #include <paddle/utils/Logging.h>
 #include <paddle/utils/Util.h>
+#include <memory>
 
 namespace paddle {
 
diff --git a/paddle/trainer/TrainerInternal.cpp b/paddle/trainer/TrainerInternal.cpp
index 1b49d4aa28b3166787e5c3f029e47dd97a9f1aef..f3b465b444167d4624a5e99c30e1257eda53ca2c 100644
--- a/paddle/trainer/TrainerInternal.cpp
+++ b/paddle/trainer/TrainerInternal.cpp
@@ -17,22 +17,22 @@ limitations under the License. */
 #include <fenv.h>
 #include <stdio.h>
 
-#include <iostream>
 #include <iomanip>
-#include <sstream>
+#include <iostream>
 #include <limits>
+#include <sstream>
 
 #include <google/protobuf/text_format.h>
 
+#include "paddle/gserver/gradientmachines/NeuralNetwork.h"
+#include "paddle/gserver/layers/ValidationLayer.h"
+#include "paddle/utils/GlobalConstants.h"
 #include "paddle/utils/PythonUtil.h"
 #include "paddle/utils/Stat.h"
 #include "paddle/utils/Util.h"
-#include "paddle/utils/GlobalConstants.h"
-#include "paddle/gserver/gradientmachines/NeuralNetwork.h"
-#include "paddle/gserver/layers/ValidationLayer.h"
 
-#include "ThreadParameterUpdater.h"
 #include "RemoteParameterUpdater.h"
+#include "ThreadParameterUpdater.h"
 
 namespace paddle {
 
diff --git a/paddle/trainer/TrainerInternal.h b/paddle/trainer/TrainerInternal.h
index b67711a7219ae476dc32b35c04e0c977c9791072..7018faab24744f7a087a53130acc56ec6314101e 100644
--- a/paddle/trainer/TrainerInternal.h
+++ b/paddle/trainer/TrainerInternal.h
@@ -17,15 +17,15 @@ limitations under the License. */
 #include "paddle/utils/Util.h"
 
 #include <stdio.h>
-#include <fstream>
 #include <stdlib.h>
+#include <fstream>
 
-#include "hl_gpu.h"
-#include "paddle/gserver/gradientmachines/GradientMachine.h"
-#include "TrainerConfig.pb.h"
 #include "ParameterUpdater.h"
+#include "TrainerConfig.pb.h"
 #include "TrainerConfigHelper.h"
 #include "TrainerInternalConfig.h"
+#include "hl_gpu.h"
+#include "paddle/gserver/gradientmachines/GradientMachine.h"
 
 namespace paddle {
 
diff --git a/paddle/trainer/TrainerInternalConfig.h b/paddle/trainer/TrainerInternalConfig.h
index fd6fdf45e60eb6d182edeecd7404b8bf3f79d5ba..b47692720efc2ed4f2db84f61ca81fcb52d234c0 100644
--- a/paddle/trainer/TrainerInternalConfig.h
+++ b/paddle/trainer/TrainerInternalConfig.h
@@ -23,10 +23,10 @@ limitations under the License. */
 
 #include "TrainerConfig.pb.h"
 
-#include "ParameterUpdater.h"
+#include <stdlib.h>
 #include <fstream>
 #include <sstream>
-#include <stdlib.h>
+#include "ParameterUpdater.h"
 
 namespace paddle {
 /**
diff --git a/paddle/trainer/TrainerMain.cpp b/paddle/trainer/TrainerMain.cpp
index 7a18f9836c8f6860b331cd49da5221cb135e6840..0a4d56b892a2a6753f83105170ad3162da59264c 100644
--- a/paddle/trainer/TrainerMain.cpp
+++ b/paddle/trainer/TrainerMain.cpp
@@ -13,10 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include <fenv.h>
+#include "paddle/pserver/ParameterServer2.h"
+#include "paddle/utils/Excepts.h"
 #include "paddle/utils/PythonUtil.h"
 #include "paddle/utils/StringUtil.h"
-#include "paddle/utils/Excepts.h"
-#include "paddle/pserver/ParameterServer2.h"
 
 #include "ParamUtil.h"
 #include "Trainer.h"
diff --git a/paddle/trainer/tests/picojson.h b/paddle/trainer/tests/picojson.h
index cb657d219e55c1e349ffb77a88945085b4149c78..23bfa164080a6ea392bb6ee15e7e2bec25257ce9 100644
--- a/paddle/trainer/tests/picojson.h
+++ b/paddle/trainer/tests/picojson.h
@@ -30,10 +30,10 @@
 #define picojson_h
 
 #include <algorithm>
+#include <cstddef>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
-#include <cstddef>
 #include <iostream>
 #include <iterator>
 #include <limits>
diff --git a/paddle/trainer/tests/test_Compare.cpp b/paddle/trainer/tests/test_Compare.cpp
index 07a47b2990ce0a95070321ef652d8e90bed26f69..63fa48540cca81f6b463b50c4af9ea3259a7301d 100644
--- a/paddle/trainer/tests/test_Compare.cpp
+++ b/paddle/trainer/tests/test_Compare.cpp
@@ -16,8 +16,8 @@ limitations under the License. */
 
 #include "paddle/trainer/Trainer.h"
 
-#include <cstdlib>
 #include <gtest/gtest.h>
+#include <cstdlib>
 
 using namespace paddle;  // NOLINT
 using namespace std;     // NOLINT
diff --git a/paddle/trainer/tests/test_CompareTwoNets.cpp b/paddle/trainer/tests/test_CompareTwoNets.cpp
index 7e5449dcba66645329a5bd1b9dad73cf10cc0a5a..8a4556721dda3f73fb11b36e5fdf798df3993ce8 100644
--- a/paddle/trainer/tests/test_CompareTwoNets.cpp
+++ b/paddle/trainer/tests/test_CompareTwoNets.cpp
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include <gtest/gtest.h>
 #include <paddle/utils/PythonUtil.h>
-#include <cstdlib>
 #include <algorithm>
-#include <gtest/gtest.h>
+#include <cstdlib>
 
 #include "paddle/trainer/Trainer.h"
 
diff --git a/paddle/trainer/tests/test_CompareTwoOpts.cpp b/paddle/trainer/tests/test_CompareTwoOpts.cpp
index 4d051b537cd6e00229b8c4db2472dc91607fd971..673ef289d8f5bfc0f1d6db58eb7d4e7ecba31ae3 100644
--- a/paddle/trainer/tests/test_CompareTwoOpts.cpp
+++ b/paddle/trainer/tests/test_CompareTwoOpts.cpp
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include <gtest/gtest.h>
 #include <paddle/utils/PythonUtil.h>
-#include <cstdlib>
 #include <algorithm>
-#include <gtest/gtest.h>
+#include <cstdlib>
 
 #include "paddle/trainer/Trainer.h"
 
diff --git a/paddle/trainer/tests/test_PyDataProviderWrapper.cpp b/paddle/trainer/tests/test_PyDataProviderWrapper.cpp
index 5c5c6d534692b1ce18b4bc5251707e1b4b39ae85..66ec65e340a435a7260028611828fb28845e0728 100644
--- a/paddle/trainer/tests/test_PyDataProviderWrapper.cpp
+++ b/paddle/trainer/tests/test_PyDataProviderWrapper.cpp
@@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #ifndef PADDLE_NO_PYTHON
+#include <DataConfig.pb.h>
 #include <gtest/gtest.h>
-#include <paddle/utils/PythonUtil.h>
 #include <paddle/gserver/dataproviders/DataProvider.h>
-#include <DataConfig.pb.h>
 #include <paddle/math/Matrix.h>
 #include <paddle/parameter/Argument.h>
+#include <paddle/utils/PythonUtil.h>
+#include <fstream>
+#include <typeinfo>
 #include <unordered_map>
 #include <unordered_set>
-#include <typeinfo>
-#include <fstream>
 #include "picojson.h"
 
 void checkEqual(const paddle::Argument& expect, const paddle::Argument& actual);
diff --git a/paddle/trainer/tests/test_TrainerOnePass.cpp b/paddle/trainer/tests/test_TrainerOnePass.cpp
index 1d9dce1b0e044c9445c5b559b9273a24c8fd8785..0b587ecce176d5b6e8e6e6c4a54cb21fc4e25a67 100644
--- a/paddle/trainer/tests/test_TrainerOnePass.cpp
+++ b/paddle/trainer/tests/test_TrainerOnePass.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <paddle/utils/PythonUtil.h>
 #include <paddle/utils/GlobalConstants.h>
+#include <paddle/utils/PythonUtil.h>
 #include "paddle/trainer/Trainer.h"
 #include "paddle/trainer/TrainerInternal.h"
 
diff --git a/paddle/trainer/tests/test_recurrent_machine_generation.cpp b/paddle/trainer/tests/test_recurrent_machine_generation.cpp
index b52acc2ca7c658700356e6038754e604df0cf7cd..7d8dfd788fd6a336379a96ea5324b8e6f60705ea 100644
--- a/paddle/trainer/tests/test_recurrent_machine_generation.cpp
+++ b/paddle/trainer/tests/test_recurrent_machine_generation.cpp
@@ -14,8 +14,8 @@ limitations under the License. */
 
 #include <fstream>
 
-#include <paddle/utils/PythonUtil.h>
 #include <paddle/trainer/Trainer.h>
+#include <paddle/utils/PythonUtil.h>
 
 #include <gtest/gtest.h>
 
diff --git a/paddle/utils/BarrierStat.cpp b/paddle/utils/BarrierStat.cpp
index 5040deefd074cbec86e86405f3466656355d1da1..9dde155aca0ec67cca7a0fb8ba9bce4732ffbfa7 100644
--- a/paddle/utils/BarrierStat.cpp
+++ b/paddle/utils/BarrierStat.cpp
@@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "paddle/utils/BarrierStat.h"
+#include <string.h>
 #include <sys/types.h>
-#include <iomanip>
 #include <algorithm>
-#include <string.h>
-#include "paddle/utils/Stat.h"
-#include "paddle/utils/BarrierStat.h"
+#include <iomanip>
 #include "paddle/utils/Flags.h"
+#include "paddle/utils/Stat.h"
 
 P_DEFINE_bool(log_barrier_abstract,
               true,
diff --git a/paddle/utils/BarrierStat.h b/paddle/utils/BarrierStat.h
index 3c5c0885d6ef71d2eac92cc1928ffe54dea73c96..a9c925eff66838d58d540d7be5476e6207a30bec 100644
--- a/paddle/utils/BarrierStat.h
+++ b/paddle/utils/BarrierStat.h
@@ -15,18 +15,17 @@ limitations under the License. */
 #pragma once
 
 #include <stdint.h>
-#include <string>
 #include <sys/time.h>
-#include <memory>
 #include <iostream>
+#include <list>
+#include <memory>
 #include <mutex>
+#include <string>
 #include <unordered_map>
-#include <list>
 
-#include "Logging.h"
 #include "Locks.h"
+#include "Logging.h"
 #include "ThreadLocal.h"
-#include "Stat.h"
 
 namespace paddle {
 
diff --git a/paddle/utils/CommandLineParser.cpp b/paddle/utils/CommandLineParser.cpp
index 14f83241c58c7ca0f7253c4020e3c9cd7bbf11de..51558b45a143c87be1524cabc0b8a98e8f8bc997 100644
--- a/paddle/utils/CommandLineParser.cpp
+++ b/paddle/utils/CommandLineParser.cpp
@@ -14,15 +14,15 @@ limitations under the License. */
 
 #include "CommandLineParser.h"
 #ifndef PADDLE_USE_GFLAGS
-#include "paddle/utils/StringUtil.h"
+#include <stdlib.h>
 #include <algorithm>
-#include <iostream>
 #include <iomanip>
-#include <stdlib.h>
+#include <iostream>
 #include <string>
-#include <vector>
-#include <utility>
 #include <tuple>
+#include <utility>
+#include <vector>
+#include "paddle/utils/StringUtil.h"
 
 namespace paddle {
 
@@ -46,16 +46,13 @@ template <>
 bool StringToValue<bool>(const std::string& content, bool* value) {
   std::string tmp = content;
 
-  std::transform(tmp.begin(),
-                 tmp.end(),
-                 tmp.begin(),
-                 [](char in) -> char {
-                   if (in <= 'Z' && in >= 'A') {
-                     return in - ('Z' - 'z');
-                   } else {
-                     return in;
-                   }
-                 });  // tolower.
+  std::transform(tmp.begin(), tmp.end(), tmp.begin(), [](char in) -> char {
+    if (in <= 'Z' && in >= 'A') {
+      return in - ('Z' - 'z');
+    } else {
+      return in;
+    }
+  });  // tolower.
 
   if (tmp == "true" || tmp == "1") {
     *value = true;
diff --git a/paddle/utils/CommandLineParser.h b/paddle/utils/CommandLineParser.h
index 3d25bc3b0b50a210b7de179427a15350fa3ac981..b4449c6f095f101847c029e02fb0cb087f12f754 100644
--- a/paddle/utils/CommandLineParser.h
+++ b/paddle/utils/CommandLineParser.h
@@ -14,10 +14,10 @@ limitations under the License. */
 
 #pragma once
 #ifndef PADDLE_USE_GFLAGS
-#include "DisableCopy.h"
+#include <stdint.h>
 #include <string>
 #include <vector>
-#include <stdint.h>
+#include "DisableCopy.h"
 
 namespace paddle {
 
diff --git a/paddle/utils/CpuId.cpp b/paddle/utils/CpuId.cpp
index 734b2e09246a56358160f9d1d090e10266bee2fa..8eefdd2980e7f56a836df6fd2ff8c31b81a55555 100644
--- a/paddle/utils/CpuId.cpp
+++ b/paddle/utils/CpuId.cpp
@@ -14,44 +14,48 @@ limitations under the License. */
 
 #ifdef _WIN32
 
+#include <intrin.h>
+
 /// for MSVC
-#define CPUID(info, x)  __cpuidex(info, x, 0)
+#define CPUID(info, x) __cpuidex(info, x, 0)
 
 #else
 
 #include <cpuid.h>
 
 /// for GCC/Clang
-#define CPUID(info, x)  __cpuid_count(x, 0, info[0], info[1], info[2], info[3])
+#define CPUID(info, x) __cpuid_count(x, 0, info[0], info[1], info[2], info[3])
 
 #endif
 
 namespace paddle {
 
 SIMDFlags::SIMDFlags() {
-    unsigned int cpuInfo[4];
-    // CPUID: https://en.wikipedia.org/wiki/CPUID
-    CPUID(cpuInfo, 0x00000001);
-    simd_flags_ |= cpuInfo[3] & (1 << 25) ? SIMD_SSE   : SIMD_NONE;
-    simd_flags_ |= cpuInfo[3] & (1 << 26) ? SIMD_SSE2  : SIMD_NONE;
-    simd_flags_ |= cpuInfo[2] & (1 << 0)  ? SIMD_SSE3  : SIMD_NONE;
-    simd_flags_ |= cpuInfo[2] & (1 << 9)  ? SIMD_SSSE3 : SIMD_NONE;
-    simd_flags_ |= cpuInfo[2] & (1 << 19) ? SIMD_SSE41 : SIMD_NONE;
-    simd_flags_ |= cpuInfo[2] & (1 << 20) ? SIMD_SSE42 : SIMD_NONE;
-    simd_flags_ |= cpuInfo[2] & (1 << 12) ? SIMD_FMA3  : SIMD_NONE;
-    simd_flags_ |= cpuInfo[2] & (1 << 28) ? SIMD_AVX   : SIMD_NONE;
-
-    CPUID(cpuInfo, 0x00000007);
-    simd_flags_ |= cpuInfo[1] & (1 << 5)  ? SIMD_AVX2  : SIMD_NONE;
-    simd_flags_ |= cpuInfo[1] & (1 << 16) ? SIMD_AVX512: SIMD_NONE;
-
-    CPUID(cpuInfo, 0x80000001);
-    simd_flags_ |= cpuInfo[2] & (1 << 16) ? SIMD_FMA4  : SIMD_NONE;
+  unsigned int cpuInfo[4];
+  // CPUID: https://en.wikipedia.org/wiki/CPUID
+  // clang-format off
+  CPUID(cpuInfo, 0x00000001);
+  simd_flags_ |= cpuInfo[3] & (1 << 25) ? SIMD_SSE   : SIMD_NONE;
+  simd_flags_ |= cpuInfo[3] & (1 << 26) ? SIMD_SSE2  : SIMD_NONE;
+  simd_flags_ |= cpuInfo[2] & (1 <<  0) ? SIMD_SSE3  : SIMD_NONE;
+  simd_flags_ |= cpuInfo[2] & (1 <<  9) ? SIMD_SSSE3 : SIMD_NONE;
+  simd_flags_ |= cpuInfo[2] & (1 << 19) ? SIMD_SSE41 : SIMD_NONE;
+  simd_flags_ |= cpuInfo[2] & (1 << 20) ? SIMD_SSE42 : SIMD_NONE;
+  simd_flags_ |= cpuInfo[2] & (1 << 12) ? SIMD_FMA3  : SIMD_NONE;
+  simd_flags_ |= cpuInfo[2] & (1 << 28) ? SIMD_AVX   : SIMD_NONE;
+
+  CPUID(cpuInfo, 0x00000007);
+  simd_flags_ |= cpuInfo[1] & (1 <<  5) ? SIMD_AVX2  : SIMD_NONE;
+  simd_flags_ |= cpuInfo[1] & (1 << 16) ? SIMD_AVX512: SIMD_NONE;
+
+  CPUID(cpuInfo, 0x80000001);
+  simd_flags_ |= cpuInfo[2] & (1 << 16) ? SIMD_FMA4  : SIMD_NONE;
+  // clang-fotmat on
 }
 
-SIMDFlags* SIMDFlags::instance() {
-    static SIMDFlags instance;
-    return &instance;
+SIMDFlags const* SIMDFlags::instance() {
+  static SIMDFlags instance;
+  return &instance;
 }
 
-}   // namespace paddle
+}  // namespace paddle
diff --git a/paddle/utils/CpuId.h b/paddle/utils/CpuId.h
index d15e58d1dddde3263826d22c0a26915a1f09ca71..7a354da75851ed7cca4e85e77714624634951f00 100644
--- a/paddle/utils/CpuId.h
+++ b/paddle/utils/CpuId.h
@@ -11,61 +11,90 @@ limitations under the License. */
 
 #pragma once
 
-#include <iostream>
 #include "DisableCopy.h"
 
 namespace paddle {
 
+// clang-format off
+enum simd_t {
+  SIMD_NONE   = 0,          ///< None
+  SIMD_SSE    = 1 << 0,     ///< SSE
+  SIMD_SSE2   = 1 << 1,     ///< SSE 2
+  SIMD_SSE3   = 1 << 2,     ///< SSE 3
+  SIMD_SSSE3  = 1 << 3,     ///< SSSE 3
+  SIMD_SSE41  = 1 << 4,     ///< SSE 4.1
+  SIMD_SSE42  = 1 << 5,     ///< SSE 4.2
+  SIMD_FMA3   = 1 << 6,     ///< FMA 3
+  SIMD_FMA4   = 1 << 7,     ///< FMA 4
+  SIMD_AVX    = 1 << 8,     ///< AVX
+  SIMD_AVX2   = 1 << 9,     ///< AVX 2
+  SIMD_AVX512 = 1 << 10,    ///< AVX 512
+};
+// clang-format on
+
 class SIMDFlags final {
 public:
-    DISABLE_COPY(SIMDFlags);
+  DISABLE_COPY(SIMDFlags);
 
-    SIMDFlags();
+  SIMDFlags();
 
-    static SIMDFlags* instance();
+  static SIMDFlags const* instance();
 
-    inline bool isSSE()   const { return simd_flags_ & SIMD_SSE;   }
-    inline bool isSSE2()  const { return simd_flags_ & SIMD_SSE2;  }
-    inline bool isSSE3()  const { return simd_flags_ & SIMD_SSE3;  }
-    inline bool isSSSE3() const { return simd_flags_ & SIMD_SSSE3; }
-    inline bool isSSE41() const { return simd_flags_ & SIMD_SSE41; }
-    inline bool isSSE42() const { return simd_flags_ & SIMD_SSE42; }
-    inline bool isFMA3()  const { return simd_flags_ & SIMD_FMA3;  }
-    inline bool isFMA4()  const { return simd_flags_ & SIMD_FMA4;  }
-    inline bool isAVX()   const { return simd_flags_ & SIMD_AVX;   }
-    inline bool isAVX2()  const { return simd_flags_ & SIMD_AVX2;  }
-    inline bool isAVX512()const { return simd_flags_ & SIMD_AVX512;}
+  inline bool check(int flags) const {
+    return !((simd_flags_ & flags) ^ flags);
+  }
 
 private:
-    enum simd_t {
-        SIMD_NONE     = 0,        ///< None
-        SIMD_SSE      = 1 << 0,   ///< SSE
-        SIMD_SSE2     = 1 << 1,   ///< SSE 2
-        SIMD_SSE3     = 1 << 2,   ///< SSE 3
-        SIMD_SSSE3    = 1 << 3,   ///< SSSE 3
-        SIMD_SSE41    = 1 << 4,   ///< SSE 4.1
-        SIMD_SSE42    = 1 << 5,   ///< SSE 4.2
-        SIMD_FMA3     = 1 << 6,   ///< FMA 3
-        SIMD_FMA4     = 1 << 7,   ///< FMA 4
-        SIMD_AVX      = 1 << 8,   ///< AVX
-        SIMD_AVX2     = 1 << 9,   ///< AVX 2
-        SIMD_AVX512   = 1 << 10,  ///< AVX 512
-    };
-
-    /// simd flags
-    int simd_flags_ = SIMD_NONE;
+  int simd_flags_ = SIMD_NONE;
 };
 
-#define HAS_SSE      SIMDFlags::instance()->isSSE()
-#define HAS_SSE2     SIMDFlags::instance()->isSSE2()
-#define HAS_SSE3     SIMDFlags::instance()->isSSE3()
-#define HAS_SSSE3    SIMDFlags::instance()->isSSSE3()
-#define HAS_SSE41    SIMDFlags::instance()->isSSE41()
-#define HAS_SSE42    SIMDFlags::instance()->isSSE42()
-#define HAS_FMA3     SIMDFlags::instance()->isFMA3()
-#define HAS_FMA4     SIMDFlags::instance()->isFMA4()
-#define HAS_AVX      SIMDFlags::instance()->isAVX()
-#define HAS_AVX2     SIMDFlags::instance()->isAVX2()
-#define HAS_AVX512   SIMDFlags::instance()->isAVX512()
+/**
+ * @brief   Check SIMD flags at runtime.
+ *
+ * For example.
+ * @code{.cpp}
+ *
+ * if (HAS_SIMD(SIMD_AVX2 | SIMD_FMA4)) {
+ *      avx2_fm4_stub();
+ * } else if (HAS_SIMD(SIMD_AVX)) {
+ *      avx_stub();
+ * }
+ *
+ * @endcode
+ */
+#define HAS_SIMD(__flags) SIMDFlags::instance()->check(__flags)
+
+/**
+ * @brief   Check SIMD flags at runtime.
+ *
+ * 1. Check all SIMD flags at runtime:
+ *
+ * @code{.cpp}
+ * if (HAS_AVX && HAS_AVX2) {
+ *      avx2_stub();
+ * }
+ * @endcod
+ *
+ * 2. Check one SIMD flag at runtime:
+ *
+ * @code{.cpp}
+ * if (HAS_SSE41 || HAS_SSE42) {
+ *      sse4_stub();
+ * }
+ * @endcode
+ */
+// clang-format off
+#define HAS_SSE     HAS_SIMD(SIMD_SSE)
+#define HAS_SSE2    HAS_SIMD(SIMD_SSE2)
+#define HAS_SSE3    HAS_SIMD(SIMD_SSE3)
+#define HAS_SSSE3   HAS_SIMD(SIMD_SSSE3)
+#define HAS_SSE41   HAS_SIMD(SIMD_SSE41)
+#define HAS_SSE42   HAS_SIMD(SIMD_SSE42)
+#define HAS_FMA3    HAS_SIMD(SIMD_FMA3)
+#define HAS_FMA4    HAS_SIMD(SIMD_FMA4)
+#define HAS_AVX     HAS_SIMD(SIMD_AVX)
+#define HAS_AVX2    HAS_SIMD(SIMD_AVX2)
+#define HAS_AVX512  HAS_SIMD(SIMD_AVX512)
+// clang-format on
 
-}   // namespace paddle
+}  // namespace paddle
diff --git a/paddle/utils/CustomStackTrace.cpp b/paddle/utils/CustomStackTrace.cpp
index 730788cb9893b93208ed6d55dbcd2231ee8495e1..083f5c509a26cd06d6fc6cea2a7587c7ef57d4e6 100644
--- a/paddle/utils/CustomStackTrace.cpp
+++ b/paddle/utils/CustomStackTrace.cpp
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "CustomStackTrace.h"
-#include "CommandLineParser.h"
 #include <iostream>
+#include "CommandLineParser.h"
 
 P_DEFINE_bool(
     layer_stack_error_only_current_thread,
diff --git a/paddle/utils/CustomStackTrace.h b/paddle/utils/CustomStackTrace.h
index 5686f3c84ce72622fbf2f894409965f2a0cab103..6992e856223494d6575ef3261d82cbdf4e375885 100644
--- a/paddle/utils/CustomStackTrace.h
+++ b/paddle/utils/CustomStackTrace.h
@@ -14,10 +14,10 @@ limitations under the License. */
 
 #pragma once
 
+#include <functional>
 #include <stack>
 #include <thread>
 #include <unordered_map>
-#include <functional>
 
 #include "ThreadLocal.h"
 
@@ -96,7 +96,8 @@ public:
    */
   typedef std::function<void(const std::thread::id& /*threadId*/,
                              bool* /*isPushing*/,
-                             const T& /*item*/)> DumpCallback;
+                             const T& /*item*/)>
+      DumpCallback;
 
   /**
    * Dump all thread stack, and all stack will be cleared.
diff --git a/paddle/utils/Logging.cpp b/paddle/utils/Logging.cpp
index 3c31633e58a2b958c804c0c6934830d8e3a928e4..20f32466a56ac4c67c16dfbea229bc78f9f4f6d9 100644
--- a/paddle/utils/Logging.cpp
+++ b/paddle/utils/Logging.cpp
@@ -22,13 +22,13 @@ limitations under the License. */
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <vector>
-#include <thread>
 #include <mutex>
+#include <thread>
+#include <vector>
 
-#include <sys/types.h>
-#include <sys/stat.h>
 #include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
 #include <unistd.h>
 
 namespace paddle {
diff --git a/paddle/utils/Logging.h b/paddle/utils/Logging.h
index c91ca9fecc5e74334f419a2e9631f9556945923c..4379289f6d1b4b5a1006bd723c2177a686ed89f6 100644
--- a/paddle/utils/Logging.h
+++ b/paddle/utils/Logging.h
@@ -18,8 +18,8 @@ limitations under the License. */
  */
 
 #pragma once
-#include <sstream>
 #include <memory>
+#include <sstream>
 #include <string>
 
 #ifndef PADDLE_USE_GLOG
diff --git a/paddle/utils/PythonUtil.cpp b/paddle/utils/PythonUtil.cpp
index a9c6a20997e78c51b71707f6290ca52dbe3614db..2ee4e4fb7ed0e2519d5e7182d6f3adc3431b493a 100644
--- a/paddle/utils/PythonUtil.cpp
+++ b/paddle/utils/PythonUtil.cpp
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "PythonUtil.h"
-#include <sstream>
 #include <signal.h>
+#include <sstream>
 
 namespace paddle {
 
diff --git a/paddle/utils/PythonUtil.h b/paddle/utils/PythonUtil.h
index 2cbc2fdd37cb46375ae37a4aa149f30440e38f0a..daebaffc855518425ae43942c22ec150d2e327f0 100644
--- a/paddle/utils/PythonUtil.h
+++ b/paddle/utils/PythonUtil.h
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
+// clang-format off
+#include "paddle/utils/Util.h"
 
 #ifndef PADDLE_NO_PYTHON
 // must include the following two blocks, otherwise,
@@ -33,13 +35,12 @@ limitations under the License. */
 #endif
 #include <Python.h>
 #include <frameobject.h>
-
 #endif
 
-#include "paddle/utils/Util.h"
 #include <stdarg.h>
-#include <mutex>
 #include <map>
+#include <mutex>
+// clang-format on
 
 namespace paddle {
 
diff --git a/paddle/utils/Queue.h b/paddle/utils/Queue.h
index 37748345a4b1036ca80c378368c1e858a001583d..f054738f87c02d2d749eec8d6c7bb55b506a6d91 100644
--- a/paddle/utils/Queue.h
+++ b/paddle/utils/Queue.h
@@ -142,9 +142,9 @@ public:
    */
   bool waitNotEmptyFor(int seconds) {
     std::unique_lock<std::mutex> lock(queueLock_);
-    return queueCV_.wait_for(lock,
-                             std::chrono::seconds(seconds),
-                             [this] { return numElements_ != 0; });
+    return queueCV_.wait_for(lock, std::chrono::seconds(seconds), [this] {
+      return numElements_ != 0;
+    });
   }
 
 private:
diff --git a/paddle/utils/Stat.cpp b/paddle/utils/Stat.cpp
index 01ea535cfd429daf3bc2e5906161fab42f8cd767..44acee249554e41f715314a3cd7eef29e3e6c5b0 100644
--- a/paddle/utils/Stat.cpp
+++ b/paddle/utils/Stat.cpp
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "Stat.h"
-#include "Util.h"
-#include <iomanip>
 #include <algorithm>
+#include <iomanip>
+#include "Util.h"
 
 namespace paddle {
 
@@ -207,10 +207,9 @@ static unsigned g_profileCount = 0;
 static std::recursive_mutex g_profileMutex;
 
 GpuProfiler::GpuProfiler(std::string statName, std::string info)
-  : guard_(g_profileMutex)  {
+    : guard_(g_profileMutex) {
   if (++g_profileCount == 1) {
-    LOG(INFO) << "Enable GPU Profiler Stat: ["
-              << statName << "] " << info;
+    LOG(INFO) << "Enable GPU Profiler Stat: [" << statName << "] " << info;
     hl_profiler_start();
   }
 }
diff --git a/paddle/utils/StringUtil.h b/paddle/utils/StringUtil.h
index 8a63ca23b4322673fe1102b819a7ce4765fd73a9..0b4f4c9113ae9d714b634b67931e51b408bbe777 100644
--- a/paddle/utils/StringUtil.h
+++ b/paddle/utils/StringUtil.h
@@ -14,9 +14,9 @@ limitations under the License. */
 
 #pragma once
 
+#include <sstream>
 #include <string>
 #include <vector>
-#include <sstream>
 #include "Logging.h"
 
 namespace paddle {
diff --git a/paddle/utils/Thread.h b/paddle/utils/Thread.h
index 435dff2f668e3efcfadee439045ad359f775b84f..ef36a8c5b2b0e95d759da8a781d781b71d067b7a 100644
--- a/paddle/utils/Thread.h
+++ b/paddle/utils/Thread.h
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
-#include "Util.h"
-#include "Logging.h"
 #include <thread>
+#include "Logging.h"
+#include "Util.h"
 
 #include "Queue.h"
 #include "ThreadLocal.h"
diff --git a/paddle/utils/ThreadLocal.cpp b/paddle/utils/ThreadLocal.cpp
index c9b32784d9baba9e6c9275c75bf339cd2039e0af..8a2878fc4bafb803a8c4b99ceccfc77405a0a3a0 100644
--- a/paddle/utils/ThreadLocal.cpp
+++ b/paddle/utils/ThreadLocal.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "Util.h"
 #include "ThreadLocal.h"
 #include "CommandLineParser.h"
+#include "Util.h"
 
 P_DEFINE_bool(thread_local_rand_use_global_seed,
               false,
diff --git a/paddle/utils/ThreadLocal.h b/paddle/utils/ThreadLocal.h
index b6e31bd05bb82cbe9aa0156799137772b5115d2b..a4987c9ec261a2ee57e62d1640e2a21c7f804c99 100644
--- a/paddle/utils/ThreadLocal.h
+++ b/paddle/utils/ThreadLocal.h
@@ -15,14 +15,14 @@ limitations under the License. */
 #pragma once
 
 #include <pthread.h>
-#include <sys/types.h>
 #include <sys/syscall.h>
+#include <sys/types.h>
 #include <unistd.h>
 #include <map>
 #include <mutex>
 #include <random>
-#include "Util.h"
 #include "Logging.h"
+#include "Util.h"
 
 namespace paddle {
 
diff --git a/paddle/utils/Util.cpp b/paddle/utils/Util.cpp
index f48726bff068080ce7f83d8dfdc67fcd73b4c669..26ff385c84564d1188120464f94f418d88c20f19 100644
--- a/paddle/utils/Util.cpp
+++ b/paddle/utils/Util.cpp
@@ -15,11 +15,11 @@ limitations under the License. */
 #include "Util.h"
 
 #include <dirent.h>
+#include <pmmintrin.h>
 #include <signal.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <xmmintrin.h>
-#include <pmmintrin.h>
 
 #include <fstream>
 #include <mutex>
@@ -28,10 +28,10 @@ limitations under the License. */
 
 #include "CommandLineParser.h"
 #include "CustomStackTrace.h"
+#include "StringUtil.h"
 #include "Thread.h"
 #include "ThreadLocal.h"
 #include "Version.h"
-#include "StringUtil.h"
 
 P_DEFINE_int32(seed, 1, "random number seed. 0 for srand(time)");
 
@@ -126,25 +126,23 @@ void registerInitFunction(std::function<void()> func, int priority) {
 }
 
 void runInitFunctions() {
-  std::call_once(
-      g_onceFlag,
-      []() {
-        LOG(INFO) << "Calling runInitFunctions";
-        if (g_initFuncs) {
-          std::sort(g_initFuncs->begin(),
-                    g_initFuncs->end(),
-                    [](const PriorityFuncPair& x, const PriorityFuncPair& y) {
-                      return x.first > y.first;
-                    });
-          for (auto& f : *g_initFuncs) {
-            f.second();
-          }
-          delete g_initFuncs;
-          g_initFuncs = nullptr;
-        }
-        g_initialized = true;
-        LOG(INFO) << "Call runInitFunctions done.";
-      });
+  std::call_once(g_onceFlag, []() {
+    LOG(INFO) << "Calling runInitFunctions";
+    if (g_initFuncs) {
+      std::sort(g_initFuncs->begin(),
+                g_initFuncs->end(),
+                [](const PriorityFuncPair& x, const PriorityFuncPair& y) {
+                  return x.first > y.first;
+                });
+      for (auto& f : *g_initFuncs) {
+        f.second();
+      }
+      delete g_initFuncs;
+      g_initFuncs = nullptr;
+    }
+    g_initialized = true;
+    LOG(INFO) << "Call runInitFunctions done.";
+  });
 }
 
 void initMain(int argc, char** argv) {
diff --git a/paddle/utils/Util.h b/paddle/utils/Util.h
index ff67439da6d80d0c40043e0a7fea0cdf0a19acc9..24ddde28e7e9f44c32d70e1b9621954ee77b2883 100644
--- a/paddle/utils/Util.h
+++ b/paddle/utils/Util.h
@@ -14,25 +14,25 @@ limitations under the License. */
 
 #pragma once
 
+#include <sys/syscall.h>  // for syscall()
+#include <sys/types.h>
 #include <algorithm>
 #include <cmath>
-#include <string>
-#include <vector>
+#include <functional>
 #include <memory>
+#include <mutex>
+#include <string>
 #include <thread>
 #include <unordered_map>
-#include <mutex>
-#include <functional>
-#include <sys/syscall.h>  // for syscall()
-#include <sys/types.h>
+#include <vector>
 
 #include "CommandLineParser.h"
+#include "DisableCopy.h"
 #include "Logging.h"
 #include "TrainerConfig.pb.h"
-#include "DisableCopy.h"
 
-#include "TypeDefs.h"
 #include "Flags.h"
+#include "TypeDefs.h"
 #include "hl_gpu.h"
 
 /**
diff --git a/paddle/utils/Version.cpp b/paddle/utils/Version.cpp
index 086515791d8870f2c5ee3ab2dcccfcbc178c4b61..a9e351b69fcbac661142d0a3322d7bf3a4293cd9 100644
--- a/paddle/utils/Version.cpp
+++ b/paddle/utils/Version.cpp
@@ -14,10 +14,10 @@ limitations under the License. */
 
 #include "Version.h"
 
-#include "Flags.h"
-#include "Util.h"
 #include <iomanip>
 #include <numeric>
+#include "Flags.h"
+#include "Util.h"
 //! TODO(yuyang18) in gflags, version has another define. Use another flag
 //! instead.
 #ifndef PADDLE_USE_GFLAGS
@@ -33,7 +33,8 @@ void printVersion(std::ostream& os) {
 #ifndef PADDLE_VERSION
 #define PADDLE_VERSION "unknown"
 #endif
-// converts macro to string https://gcc.gnu.org/onlinedocs/cpp/Stringification.html
+// converts macro to string
+// https://gcc.gnu.org/onlinedocs/cpp/Stringification.html
 #define xstr(s) str(s)
 #define str(s) #s
 
diff --git a/paddle/utils/Version.h b/paddle/utils/Version.h
index ac04963c2ce2cb2083431a56b2b3c2301e568e1a..d1a07d9485076e5382d47f7408fcbf032166b1ed 100644
--- a/paddle/utils/Version.h
+++ b/paddle/utils/Version.h
@@ -14,8 +14,8 @@ limitations under the License. */
 
 #pragma once
 #include <stddef.h>
-#include "TypeDefs.h"
 #include <iostream>
+#include "TypeDefs.h"
 
 namespace paddle {
 
diff --git a/paddle/utils/arch/osx/Locks.cpp b/paddle/utils/arch/osx/Locks.cpp
index 85902264314c6a4dc6f2c4ddb86bf4923627ee56..e03992363fd6051a1970664d63406b2e7a47fce3 100644
--- a/paddle/utils/arch/osx/Locks.cpp
+++ b/paddle/utils/arch/osx/Locks.cpp
@@ -13,10 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/utils/Locks.h"
-#include "paddle/utils/Logging.h"
 #include <dispatch/dispatch.h>
-#include <atomic>
 #include <libkern/OSAtomic.h>
+#include <atomic>
+#include "paddle/utils/Logging.h"
 
 namespace paddle {
 
diff --git a/paddle/utils/tests/test_CommandLineParser.cpp b/paddle/utils/tests/test_CommandLineParser.cpp
index 9a1d2391a8b47814c772bdf86e57ea440c11713b..ed2b3068d5dda710de728cfad14a98aeaf847954 100644
--- a/paddle/utils/tests/test_CommandLineParser.cpp
+++ b/paddle/utils/tests/test_CommandLineParser.cpp
@@ -15,8 +15,8 @@ limitations under the License. */
 #ifndef PADDLE_USE_GFLAGS
 //! Test Command Line Parser for paddle internal implement.
 
-#include <paddle/utils/CommandLineParser.h>
 #include <gtest/gtest.h>
+#include <paddle/utils/CommandLineParser.h>
 
 P_DEFINE_int32(i1, 1, "test int flag 1");
 P_DEFINE_int32(i2, 2, "test int flag 2");
diff --git a/paddle/utils/tests/test_CustomStackTrace.cpp b/paddle/utils/tests/test_CustomStackTrace.cpp
index 512330b49e9d31ef5a9334b2371d2fe65e9b6fb1..292ed4619d8bb0c3f7069cbbea7e729d91dd126c 100644
--- a/paddle/utils/tests/test_CustomStackTrace.cpp
+++ b/paddle/utils/tests/test_CustomStackTrace.cpp
@@ -15,10 +15,10 @@ limitations under the License. */
 #include <gtest/gtest.h>
 #include <chrono>
 
-#include "paddle/utils/CustomStackTrace.h"
 #include "paddle/utils/CommandLineParser.h"
-#include "paddle/utils/Util.h"
+#include "paddle/utils/CustomStackTrace.h"
 #include "paddle/utils/Locks.h"
+#include "paddle/utils/Util.h"
 
 P_DEFINE_int32(test_thread_num, 10, "testing thread number");
 
diff --git a/paddle/utils/tests/test_CustomStackTracePrint.cpp b/paddle/utils/tests/test_CustomStackTracePrint.cpp
index 60ba210b700581cd239945e2431431a4977a9376..611b16aa7116d03ee51ba0095d043b78df1742ba 100644
--- a/paddle/utils/tests/test_CustomStackTracePrint.cpp
+++ b/paddle/utils/tests/test_CustomStackTracePrint.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/utils/Util.h"
 #include "paddle/utils/CustomStackTrace.h"
+#include "paddle/utils/Util.h"
 
 int main(int argc, char** argv) {
   paddle::initMain(argc, argv);
diff --git a/paddle/utils/tests/test_Logging.cpp b/paddle/utils/tests/test_Logging.cpp
index 667864aa758373caa82a8c66048709d228783029..fbfffcc65aeb9f5ebbe97dcb54ec44bd63f8d4bd 100644
--- a/paddle/utils/tests/test_Logging.cpp
+++ b/paddle/utils/tests/test_Logging.cpp
@@ -17,10 +17,10 @@ limitations under the License. */
  * Used in embedded system where there is no glogs.
  */
 
+#include <dirent.h>
 #include <gtest/gtest.h>
-#include <fstream>
 #include <stdlib.h>
-#include <dirent.h>
+#include <fstream>
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Util.h"
 #ifndef PADDLE_USE_GLOG
diff --git a/paddle/utils/tests/test_SIMDFlags.cpp b/paddle/utils/tests/test_SIMDFlags.cpp
index a544901aa388a0d0a59ed0116ae62b8387a8829b..42edede209ad957c13c1cec8e6bb20bd0fe9d28b 100644
--- a/paddle/utils/tests/test_SIMDFlags.cpp
+++ b/paddle/utils/tests/test_SIMDFlags.cpp
@@ -9,44 +9,43 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-
 #include <gtest/gtest.h>
 
 #include "paddle/utils/CpuId.h"
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Util.h"
 
-using namespace paddle; // NOLINT
+using namespace paddle;  // NOLINT
 
 TEST(SIMDFlags, gccTest) {
 #if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__))
-    CHECK(!__builtin_cpu_supports("sse")   != HAS_SSE);
-    CHECK(!__builtin_cpu_supports("sse2")  != HAS_SSE2);
-    CHECK(!__builtin_cpu_supports("sse3")  != HAS_SSE3);
-    CHECK(!__builtin_cpu_supports("ssse3") != HAS_SSSE3);
-    CHECK(!__builtin_cpu_supports("sse4.1")!= HAS_SSE41);
-    CHECK(!__builtin_cpu_supports("sse4.2")!= HAS_SSE42);
-    CHECK(!__builtin_cpu_supports("avx")   != HAS_AVX);
-    CHECK(!__builtin_cpu_supports("avx2")  != HAS_AVX2);
+  // clang-format off
+  CHECK(!__builtin_cpu_supports("sse")    != HAS_SSE);
+  CHECK(!__builtin_cpu_supports("sse2")   != HAS_SSE2);
+  CHECK(!__builtin_cpu_supports("sse3")   != HAS_SSE3);
+  CHECK(!__builtin_cpu_supports("ssse3")  != HAS_SSSE3);
+  CHECK(!__builtin_cpu_supports("sse4.1") != HAS_SSE41);
+  CHECK(!__builtin_cpu_supports("sse4.2") != HAS_SSE42);
+  CHECK(!__builtin_cpu_supports("avx")    != HAS_AVX);
+  CHECK(!__builtin_cpu_supports("avx2")   != HAS_AVX2);
+// clang-format on
 #endif
 }
 
 TEST(SIMDFlags, normalPrint) {
-    auto simd = SIMDFlags::instance();
-    LOG(INFO) << "Has SSE2:    " << std::boolalpha << simd->isSSE2();
-    LOG(INFO) << "Has SSE3:    " << std::boolalpha << simd->isSSE3();
-    LOG(INFO) << "Has SSSE3:   " << std::boolalpha << simd->isSSSE3();
-    LOG(INFO) << "Has SSE4.1:  " << std::boolalpha << simd->isSSE41();
-    LOG(INFO) << "Has SSE4.2:  " << std::boolalpha << simd->isSSE42();
-    LOG(INFO) << "Has FMA3:    " << std::boolalpha << simd->isFMA3();
-    LOG(INFO) << "Has FMA4:    " << std::boolalpha << simd->isFMA4();
-    LOG(INFO) << "Has AVX:     " << std::boolalpha << simd->isAVX();
-    LOG(INFO) << "Has AVX2:    " << std::boolalpha << simd->isAVX2();
-    LOG(INFO) << "Has AVX512:  " << std::boolalpha << simd->isAVX512();
+  LOG(INFO) << "Has SSE:     " << std::boolalpha << HAS_SSE;
+  LOG(INFO) << "Has SSE2:    " << std::boolalpha << HAS_SSE2;
+  LOG(INFO) << "Has SSE3:    " << std::boolalpha << HAS_SSE3;
+  LOG(INFO) << "Has SSSE3:   " << std::boolalpha << HAS_SSSE3;
+  LOG(INFO) << "Has SSE4:    " << std::boolalpha << HAS_SSE41 || HAS_SSE42;
+  LOG(INFO) << "Has FMA3:    " << std::boolalpha << HAS_FMA3;
+  LOG(INFO) << "Has FMA4:    " << std::boolalpha << HAS_FMA4;
+  LOG(INFO) << "Has AVX:     " << std::boolalpha << HAS_AVX;
+  LOG(INFO) << "Has AVX2:    " << std::boolalpha << HAS_AVX2;
+  LOG(INFO) << "Has AVX512:  " << std::boolalpha << HAS_AVX512;
 }
 
 int main(int argc, char** argv) {
   testing::InitGoogleTest(&argc, argv);
-  paddle::initMain(argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/paddle/utils/tests/test_SpinLock.cpp b/paddle/utils/tests/test_SpinLock.cpp
index 9c7ad05b0b6f7e6cf98717a2b317e7d242c22d00..22f8584ef559d78bb1ba01356b3361accf3093c4 100644
--- a/paddle/utils/tests/test_SpinLock.cpp
+++ b/paddle/utils/tests/test_SpinLock.cpp
@@ -14,10 +14,10 @@ limitations under the License. */
 
 #include <gtest/gtest.h>
 #include <vector>
-#include "paddle/utils/Logging.h"
 #include "paddle/utils/CommandLineParser.h"
-#include "paddle/utils/Util.h"
 #include "paddle/utils/Locks.h"
+#include "paddle/utils/Logging.h"
+#include "paddle/utils/Util.h"
 
 P_DEFINE_int32(test_thread_num, 100, "testing thread number");
 
diff --git a/paddle/utils/tests/test_Thread.cpp b/paddle/utils/tests/test_Thread.cpp
index b069be1d7a28847a4aaefbdcef985f6b9100f8ab..2f5c5bbce07f39b799b928fd231bb4db1d2b3e05 100644
--- a/paddle/utils/tests/test_Thread.cpp
+++ b/paddle/utils/tests/test_Thread.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <atomic>
-#include <paddle/utils/Thread.h>
 #include <gtest/gtest.h>
+#include <paddle/utils/Thread.h>
+#include <atomic>
 
 using paddle::AsyncThreadPool;  // NOLINT
 
@@ -52,17 +52,13 @@ TEST(AsyncThreadPool, multiThreadAddBatchJob) {
   int counter = 0;
   const int numMonitors = 300;
   const int numSlaves = 300;
-  std::vector<AsyncThreadPool::JobFunc> moniterJobs(
-      numMonitors,
-      [&] {
-        std::vector<AsyncThreadPool::JobFunc> slaveJobs(
-            numSlaves,
-            [mut, &counter] {
-              std::lock_guard<std::mutex> lk(*mut);
-              counter++;
-            });
-        levelTwoPool.addBatchJobs(slaveJobs);
-      });
+  std::vector<AsyncThreadPool::JobFunc> moniterJobs(numMonitors, [&] {
+    std::vector<AsyncThreadPool::JobFunc> slaveJobs(numSlaves, [mut, &counter] {
+      std::lock_guard<std::mutex> lk(*mut);
+      counter++;
+    });
+    levelTwoPool.addBatchJobs(slaveJobs);
+  });
   levelOnePool.addBatchJobs(moniterJobs);
   ASSERT_EQ(counter, numMonitors * numSlaves);
 }
diff --git a/paddle/utils/tests/test_ThreadBarrier.cpp b/paddle/utils/tests/test_ThreadBarrier.cpp
index 997a393683cac7ad8e9aeceef9a74aba6c6fdf6b..4a8af5b97e3977961bce40a9aa9ad691113e342b 100644
--- a/paddle/utils/tests/test_ThreadBarrier.cpp
+++ b/paddle/utils/tests/test_ThreadBarrier.cpp
@@ -15,10 +15,10 @@ limitations under the License. */
 #include <gtest/gtest.h>
 #include <set>
 #include <vector>
-#include "paddle/utils/Logging.h"
 #include "paddle/utils/CommandLineParser.h"
-#include "paddle/utils/Util.h"
 #include "paddle/utils/Locks.h"
+#include "paddle/utils/Logging.h"
+#include "paddle/utils/Util.h"
 
 P_DEFINE_int32(test_thread_num, 100, "testing thread number");
 
diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt
index d7f523bc8d9bce00ba72c41284d2b3eb3cde6529..2c40070eca44d8656d7ce82157a1b840092b9965 100644
--- a/proto/CMakeLists.txt
+++ b/proto/CMakeLists.txt
@@ -6,25 +6,6 @@ set(proto_filenames
     ParameterService.proto
     TrainerConfig.proto)
 
-set(real_proto_files)
-
-# TODO(yuyang18): Some internal proto will also be depended on.
-#                 Find a way to automatically calculate all depends.
-foreach(filename ${proto_filenames})
-    set(PROTOBUF_3_FLAGS "")
-    if (PROTOBUF_3)
-        set(PROTOBUF_3_FLAGS "-Dproto3")
-    endif()
-    add_custom_command(OUTPUT ${filename}
-	COMMAND ${M4_EXECUTABLE} -Dreal=${ACCURACY} ${PROTOBUF_3_FLAGS} -I '${INTERNAL_PROTO_PATH}'
-              ${PROJ_ROOT}/proto/${filename}.m4 > ${filename}
-        DEPENDS ${PROJ_ROOT}/proto/${filename}.m4
-        COMMENT "Generate ${filename}")
-endforeach()
-
-add_custom_target(proto_accuracy ALL
-                    DEPENDS ${proto_filenames})
-
 set(PROTO_GEN)
 set(PROTO_GEN_PY)
 
@@ -39,9 +20,8 @@ foreach(filename ${proto_filenames})
     add_custom_command(OUTPUT ${CUR_PROTO_GEN}
         COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} 
                   --cpp_out ${CMAKE_CURRENT_BINARY_DIR}
-                  --proto_path ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/${filename}
-        DEPENDS proto_accuracy
-                ${PROJ_ROOT}/proto/${filename}.m4)
+		  --proto_path ${PROJ_ROOT}/proto ${PROJ_ROOT}/proto/${filename}
+        DEPENDS ${filename})
 
     set(CUR_PROTO_GEN_PY
         ${PROJ_ROOT}/paddle/python/paddle/proto/${base_filename}_pb2.py)
@@ -50,9 +30,8 @@ foreach(filename ${proto_filenames})
         ${PROTO_GEN_PY})
     add_custom_command(OUTPUT ${CUR_PROTO_GEN_PY}
         COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --python_out ${PROJ_ROOT}/python/paddle/proto
-                  --proto_path ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/${filename}
-        DEPENDS proto_accuracy
-                ${PROJ_ROOT}/proto/${filename}.m4)
+	--proto_path ${PROJ_ROOT}/proto ${PROJ_ROOT}/proto/${filename}
+        DEPENDS ${filename})
 endforeach()
 
 include_directories(${CMAKE_CURRENT_BINARY_DIR}/proto)
@@ -61,5 +40,4 @@ add_custom_target(gen_proto_cpp ALL DEPENDS ${PROTO_GEN})
 add_custom_target(gen_proto_py ALL DEPENDS ${PROTO_GEN_PY})
 add_library(paddle_proto STATIC
     ${PROTO_GEN})
-add_dependencies(paddle_proto proto_accuracy)
 target_include_directories(paddle_proto PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
diff --git a/proto/DataConfig.proto.m4 b/proto/DataConfig.proto
similarity index 93%
rename from proto/DataConfig.proto.m4
rename to proto/DataConfig.proto
index 1f8e3f4f3e523447b69bfd2dbce9c99dc22571d1..e895c184d9f95dba1449e6467a2566712837600b 100644
--- a/proto/DataConfig.proto.m4
+++ b/proto/DataConfig.proto
@@ -11,11 +11,11 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-ifdef(`proto3', `syntax = "proto2";')
+syntax = "proto2";
 
 package paddle;
 
-sinclude(`DataConfigExt.proto.m4')
+
 message FileGroupConf {
   optional uint32 queue_capacity = 1 [default = 1];
   // how many files to load for a load file thread
@@ -26,7 +26,7 @@ message FileGroupConf {
 };
 
 message DataConfig {
-sinclude(`DataConfigInter.proto.m4')
+
   required string type = 1;
 
   // name of a text file which contains a list of file names at each line
@@ -51,11 +51,11 @@ sinclude(`DataConfigInter.proto.m4')
 
   /// Note the field number 17, 18 and 19 have been deprecated.
 
-  // a list of values which will be used to create additional one dimensional real
+  // a list of values which will be used to create additional one dimensional float
   // values slots. These one dimensional slots can be used as the weight input
   // for cost layers.
   // Currently this is only supported by ProtoDataProvider.
-  repeated real constant_slots = 20;
+  repeated double constant_slots = 20;
 
   // for PyDataProvider.
   // Specify the load data script module name, object name and user args
@@ -80,6 +80,6 @@ sinclude(`DataConfigInter.proto.m4')
   optional bool is_main_data = 26 [default = true];
 
   // the usage ratio of instances. Setting to 1.0 means the use of all instances.
-  optional real usage_ratio = 27 [default = 1.0];
+  optional double usage_ratio = 27 [default = 1.0];
 };
 
diff --git a/proto/DataFormat.proto.m4 b/proto/DataFormat.proto
similarity index 98%
rename from proto/DataFormat.proto.m4
rename to proto/DataFormat.proto
index 54e9fd008e485d24c21c58d543be6b311378905b..19b1499b0281a1b92028cc8944c27ee4d56b8dd2 100644
--- a/proto/DataFormat.proto.m4
+++ b/proto/DataFormat.proto
@@ -11,7 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-ifdef(`proto3', `syntax = "proto2";')
+syntax = "proto2";
 
 package paddle;
 
diff --git a/proto/ModelConfig.proto.m4 b/proto/ModelConfig.proto
similarity index 95%
rename from proto/ModelConfig.proto.m4
rename to proto/ModelConfig.proto
index ccad69a3c2209542d2be855ddf3f75def9e8d729..b34e1ebdedab104f7c16dbf9e1a264f3665115ce 100644
--- a/proto/ModelConfig.proto.m4
+++ b/proto/ModelConfig.proto
@@ -11,7 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-ifdef(`proto3', `syntax = "proto2";')
+syntax = "proto2";
 
 import "ParameterConfig.proto";
 
@@ -20,7 +20,7 @@ package paddle;
 /**
  * Various structs for the configuration of a neural network
  */
-sinclude(`ModelConfigExt.proto.m4')
+
 
 message ExternalConfig {
   repeated string layer_names = 1;
@@ -146,8 +146,8 @@ message NormConfig {
 
   // the parameters for normalization
   // u = u / (1+scale*sum(u^2 in window))^pow
-  required real scale = 4;
-  required real pow = 5;
+  required double scale = 4;
+  required double pow = 5;
 
   // The size of output feature map.
   required uint32 output_x = 6;
@@ -223,7 +223,7 @@ message OperatorConfig {
   required uint64 output_size = 4;
 
   // For DotMulOperator
-  optional real dotmul_scale = 5 [default = 1.0];
+  optional double dotmul_scale = 5 [default = 1.0];
 
   // For ConvOperator
   optional ConvConfig conv_conf = 6;
@@ -266,7 +266,7 @@ message LayerInputConfig {
 }
 
 message LayerConfig {
-sinclude(`ModelConfigLayer.proto.m4')
+
   required string name = 1;
   required string type = 2;
   optional uint64 size = 3;
@@ -293,7 +293,7 @@ sinclude(`ModelConfigLayer.proto.m4')
   optional uint32 partial_sum = 9;
 
   // for dropout
-  optional real drop_rate = 10;
+  optional double drop_rate = 10;
 
   // for HierarchicalSoftmaxLayer and NCELayer
   // the number of classes
@@ -317,17 +317,17 @@ sinclude(`ModelConfigLayer.proto.m4')
   // For NCELayer
   // The distribution for generating the random negative labels.
   // A uniform distribution will be used if not provided
-  repeated real neg_sampling_dist = 17 [packed = true];
+  repeated double neg_sampling_dist = 17 [packed = true];
 
   // For MaxLayer
   // default: output VALUE of MaxLayer. set this flag to true for output INDEX
-  // INDEX will be put in Argument::value as real values.
+  // INDEX will be put in Argument::value as double values.
   optional bool output_max_index = 19 [default = false];
 
   /// The filed number 20 have been deprecated.
 
   // For self-normalized estimation
-  optional real softmax_selfnorm_alpha = 21 [default = 0.1];
+  optional double softmax_selfnorm_alpha = 21 [default = 0.1];
 
   /// The filed numbers 22 and 23 have been deprecated.
 
@@ -338,14 +338,14 @@ sinclude(`ModelConfigLayer.proto.m4')
   optional bool norm_by_times = 25;
 
   // for CostLayers
-  optional real coeff = 26 [default = 1.0];
+  optional double coeff = 26 [default = 1.0];
 
   // for AverageLayer
   // can be set to: 'average', 'sum' or 'squarerootn'
   optional string average_strategy = 27;
 
   // for error clipping
-  optional real error_clipping_threshold = 28 [default = 0.0];
+  optional double error_clipping_threshold = 28 [default = 0.0];
 
   // for operators used by mixed layer
   repeated OperatorConfig operator_confs = 29;
@@ -355,11 +355,11 @@ sinclude(`ModelConfigLayer.proto.m4')
   optional int32 max_sort_size = 31;
 
   // for SlopeInterceptLayer
-  optional real slope = 32;
-  optional real intercept = 33;
+  optional double slope = 32;
+  optional double intercept = 33;
 
   // for CosSimVecMatLayer and CosSimLayer
-  optional real cos_scale = 34;
+  optional double cos_scale = 34;
 
   // for DataNormLayer
   // can be set to: 'z-score', 'min-max' or 'decimal-scaling'
@@ -394,7 +394,7 @@ sinclude(`ModelConfigLayer.proto.m4')
   // if number of the selected columns is less than
   // sample number * selective_fc output size * selective_fc_mull_mull_ratio
   // sparse multiplication is used, otherwise, using full multiplication.
-  optional real selective_fc_full_mul_ratio = 44 [default = 0.02];
+  optional double selective_fc_full_mul_ratio = 44 [default = 0.02];
 
   // to indicate how many threads selective_fc use to to accelate
   // the plain_mul period
@@ -406,7 +406,7 @@ sinclude(`ModelConfigLayer.proto.m4')
   optional bool use_global_stats = 46;
 
   // use to compute moving mean and variance.
-  optional real moving_average_fraction = 47 [default = 0.9];
+  optional double moving_average_fraction = 47 [default = 0.9];
 
   // bias size
   optional uint32 bias_size = 48 [default = 0];
@@ -438,7 +438,7 @@ message EvaluatorConfig {
 
   // Used by PrecisionRecallEvaluator and ClassificationErrorEvaluator
   // For multi binary labels: true if output > classification_threshold
-  optional real classification_threshold = 6 [default = 0.5];
+  optional double classification_threshold = 6 [default = 0.5];
   // The positive label. -1 means average precision and recall
   optional int32 positive_label = 7 [default = -1];
 
diff --git a/proto/ParameterConfig.proto.m4 b/proto/ParameterConfig.proto
similarity index 87%
rename from proto/ParameterConfig.proto.m4
rename to proto/ParameterConfig.proto
index b5c0fea6c373307dc0af2e29c0f1ff5362823411..cbcd0af598df22c36c66767fdeb7add2aa49e87d 100644
--- a/proto/ParameterConfig.proto.m4
+++ b/proto/ParameterConfig.proto
@@ -11,7 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-ifdef(`proto3', `syntax = "proto2";')
+syntax = "proto2";
 
 package paddle;
 
@@ -32,14 +32,14 @@ message ParameterUpdaterHookConfig {
 message ParameterConfig {
   required string name = 1;
   required uint64 size = 2;
-  optional real learning_rate = 3 [default = 1.0];
-  optional real momentum = 4 [default = 0.0];
-  optional real initial_mean = 5 [default = 0.0];
-  optional real initial_std = 6 [default = 0.01];
+  optional double learning_rate = 3 [default = 1.0];
+  optional double momentum = 4 [default = 0.0];
+  optional double initial_mean = 5 [default = 0.0];
+  optional double initial_std = 6 [default = 0.01];
   // use L2-regularization if decay_rate set and decay_rate_l1 not set
-  optional real decay_rate = 7 [default = 0.0];
+  optional double decay_rate = 7 [default = 0.0];
   // use L1-regularization if decay_rate_l1 set
-  optional real decay_rate_l1 = 8 [default = 0.0];
+  optional double decay_rate_l1 = 8 [default = 0.0];
   // dims of Parameter, e.g. dims[0] as height, dims[1] as width..
   repeated uint64 dims = 9;
   // the gpu device which the parameter in.
@@ -60,7 +60,7 @@ message ParameterConfig {
   // sparse remote update or not
   optional bool sparse_remote_update = 16 [default = false];
   // gradient clipping threshold, no clipping by default
-  optional real gradient_clipping_threshold = 17 [default = 0.0];
+  optional double gradient_clipping_threshold = 17 [default = 0.0];
   // static parameters are fixed when training
   optional bool is_static = 18 [default = false];
   // para_id should NOT be set by config_parser. It is for
diff --git a/proto/ParameterService.proto.m4 b/proto/ParameterService.proto
similarity index 97%
rename from proto/ParameterService.proto.m4
rename to proto/ParameterService.proto
index 25b0991583ec128aeeca1ca775a574f81500d6e5..c1c04d8cc5bdedd09173d5dfa10b82c7ee7ed6a4 100644
--- a/proto/ParameterService.proto.m4
+++ b/proto/ParameterService.proto
@@ -11,7 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-ifdef(`proto3', `syntax = "proto2";')
+syntax = "proto2";
 
 import "ParameterConfig.proto";
 import "TrainerConfig.proto";
@@ -73,7 +73,7 @@ message SendParameterRequest {
   optional int64 num_samples = 4;
 
   // cost will be used to calculate global objective value
-  optional real cost = 5;
+  optional double cost = 5;
 
   required BatchStatus batch_status = 6;
 
@@ -245,13 +245,13 @@ enum MatrixVectorOperation {
 
 message ProtoVector {
   required int64 dim = 1;
-  repeated real values = 2 [packed = true];
+  repeated double values = 2 [packed = true];
 }
 
 message ProtoMatrix {
   required int64 num_rows = 1;
   required int64 num_cols = 2;
-  repeated real values = 3 [packed = true];
+  repeated double values = 3 [packed = true];
 }
 
 message Operation {
@@ -263,7 +263,7 @@ message Operation {
   // matrix handles created on the pserver
   repeated int64 pmatrices = 3;       // A, B, C
 
-  repeated real scalars = 4;  	      // a, b, c
+  repeated double scalars = 4;  	      // a, b, c
   repeated ProtoVector vectors = 5;   // x, y, z
   repeated ProtoMatrix matrices = 6;  // X, Y, Z
 }
@@ -272,7 +272,7 @@ message OperationResult {
   // error message. Empty if success
   optional string return_message = 1;
 //
-  repeated real scalars = 2;  // d, e, f
+  repeated double scalars = 2;  // d, e, f
   repeated ProtoVector vectors = 3;  // p, q, r
   repeated ProtoMatrix matrices = 4;  // P, Q, R
 }
diff --git a/proto/TrainerConfig.proto.m4 b/proto/TrainerConfig.proto
similarity index 87%
rename from proto/TrainerConfig.proto.m4
rename to proto/TrainerConfig.proto
index 4684203b03e3297f60629ff6929729c3daffd8c6..a334e07b6282a6ff9867482e0c3a299df2a78d1d 100644
--- a/proto/TrainerConfig.proto.m4
+++ b/proto/TrainerConfig.proto
@@ -11,7 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-ifdef(`proto3', `syntax = "proto2";')
+syntax = "proto2";
 
 import "DataConfig.proto";
 import "ModelConfig.proto";
@@ -24,9 +24,9 @@ message OptimizationConfig {
   optional int32 num_batches_per_send_parameter = 5 [default = 1];
   optional int32 num_batches_per_get_parameter = 6 [default = 1];
 
-  required real learning_rate = 7;
-  optional real learning_rate_decay_a = 8 [default = 0];
-  optional real learning_rate_decay_b = 9 [default = 0];
+  required double learning_rate = 7;
+  optional double learning_rate_decay_a = 8 [default = 0];
+  optional double learning_rate_decay_b = 9 [default = 0];
   optional string learning_rate_schedule = 27 [default = "constant"];
   // learning rate will be scaled according to learning_rate_schedule
   // 1), constant:
@@ -49,14 +49,14 @@ message OptimizationConfig {
 
   // owlqn related
   // L1-regularization
-  optional real l1weight = 10 [default = 0.1];
+  optional double l1weight = 10 [default = 0.1];
   // L2-regularization
-  optional real l2weight = 11 [default = 0];
+  optional double l2weight = 11 [default = 0];
   // "c1" in wolfe condition: if (newobj <= oldobj + c1 * origDirDeriv * step)
   // then accept the step
-  optional real c1 = 12 [default = 0.0001];
+  optional double c1 = 12 [default = 0.0001];
   // multiply the step with "backoff", when wolfe condition doesn't satisfy
-  optional real backoff = 13 [default = 0.5];
+  optional double backoff = 13 [default = 0.5];
   // how many "s"s and "y"s are kept in owlqn
   optional int32 owlqn_steps = 14 [default = 10];
   // accept the step if encountered "max_backoff" times of "reduce the step"
@@ -82,15 +82,15 @@ message OptimizationConfig {
   // default learning method("momentum") use global decayed learning rate with momentum.
   // "adagrad", "adadelta" and "rmsprop" can set momentum too.
   optional string learning_method = 23 [default = "momentum"];
-  optional real ada_epsilon = 24 [default = 1e-6];
-  optional real ada_rou = 26 [default = 0.95];
+  optional double ada_epsilon = 24 [default = 1e-6];
+  optional double ada_rou = 26 [default = 0.95];
 
   // Force to do average in cpu in order to save gpu memory usage
   optional bool do_average_in_cpu = 25 [default = false];
 
   // delta add rate in pserver, used while num_batches_per_send_parameter>1
   // will be divided by #machines automatically.
-  optional real delta_add_rate = 28 [default = 1.0];
+  optional double delta_add_rate = 28 [default = 1.0];
 
   // We split a large size into smaller mini-batches, whose sizes are
   // determined by mini_batch_size. It only takes effect when there is
@@ -108,14 +108,14 @@ message OptimizationConfig {
 
   // shrink sparse parameter value
   // only works if parameter is remote sparse update and has L1 decay rate
-  optional real shrink_parameter_value = 32 [default = 0];
+  optional double shrink_parameter_value = 32 [default = 0];
 
   ////////////////////////////
   // Options Adam Optimizer //
   ////////////////////////////
-  optional real adam_beta1 = 33 [default = 0.9];
-  optional real adam_beta2 = 34 [default = 0.999];
-  optional real adam_epsilon = 35 [default = 1e-8];
+  optional double adam_beta1 = 33 [default = 0.9];
+  optional double adam_beta2 = 34 [default = 0.999];
+  optional double adam_epsilon = 35 [default = 1e-8];
 
   // arguments for learning rate scheduler
   // Format: num1:rate1,num2:rate2,...,numK:rateK
@@ -127,7 +127,7 @@ message OptimizationConfig {
   // for async sgd gradient commit control.
   // when async_lagged_grad_discard_ratio * num_gradient_servers commit passed,
   // current async gradient will be discard silently.
-  optional real async_lagged_grad_discard_ratio = 37 [default = 1.5];
+  optional double async_lagged_grad_discard_ratio = 37 [default = 1.5];
 };
 
 message TrainerConfig {
diff --git a/python/paddle/trainer/PyDataProvider2.py b/python/paddle/trainer/PyDataProvider2.py
index 6e8cce1cce700f3a46571451da17b781e838c3b8..6618153df30250652f1721d2fb0bb75ecbb8a04a 100644
--- a/python/paddle/trainer/PyDataProvider2.py
+++ b/python/paddle/trainer/PyDataProvider2.py
@@ -203,6 +203,26 @@ class CheckWrapper(object):
             callback(each)
 
 
+class CheckInputTypeWrapper(object):
+    def __init__(self, generator, input_types, logger):
+        self.generator = generator
+        self.input_types = input_types
+        self.logger = logger
+
+    def __call__(self, obj, filename):
+        for items in self.generator(obj, filename):
+            try:
+                # dict type is required for input_types when item is dict type 
+                assert (isinstance(items, dict) and \
+                        not isinstance(self.input_types, dict))==False
+                yield items
+            except AssertionError as e:
+                self.logger.error(
+                    "%s type is required for input type but got %s" %
+                    (repr(type(items)), repr(type(self.input_types))))
+                raise
+
+
 def provider(input_types=None,
              should_shuffle=None,
              pool_size=-1,
@@ -355,6 +375,9 @@ def provider(input_types=None,
                 if use_dynamic_order:
                     self.generator = InputOrderWrapper(self.generator,
                                                        self.input_order)
+                else:
+                    self.generator = CheckInputTypeWrapper(
+                        self.generator, self.slots, self.logger)
                 if self.check:
                     self.generator = CheckWrapper(self.generator, self.slots,
                                                   check_fail_continue,
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 42a7a29403f7dde2c3d1d1f090a1885104e15e64..5b7f4d85e2c3343013938e38492be8985a8cd11f 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -3364,7 +3364,10 @@ def my_fatal(s):
     logger.critical(s)
     raise Exception()
 
+
 _parse_config_hooks = set()
+
+
 def register_parse_config_hook(f):
     """
     Register a hook function for parse_config. parse_config will invoke the hook
@@ -3373,6 +3376,7 @@ def register_parse_config_hook(f):
     """
     _parse_config_hooks.add(f)
 
+
 def parse_config(config_file, config_arg_str):
     '''
     @param config_arg_str: a string of the form var1=val1,var2=val2. It will be
diff --git a/python/paddle/trainer_config_helpers/data_sources.py b/python/paddle/trainer_config_helpers/data_sources.py
index b6ecd42857852c1f9f12db8370183d7799761a4f..c62553f54cc304259015f1e8e6733eff99f476f2 100644
--- a/python/paddle/trainer_config_helpers/data_sources.py
+++ b/python/paddle/trainer_config_helpers/data_sources.py
@@ -186,8 +186,7 @@ def define_py_data_sources2(train_list, test_list, module, obj, args=None):
                                 obj="process", 
                                 args={"dictionary": dict_name})
 
-    The related data provider can refer to 
-    `here <../../data_provider/pydataprovider2.html#dataprovider-for-the-sequential-model>`__.
+    The related data provider can refer to :ref:`api_pydataprovider2_en_sequential_model` .
 
     :param train_list: Train list name.
     :type train_list: basestring
diff --git a/python/paddle/trainer_config_helpers/default_decorators.py b/python/paddle/trainer_config_helpers/default_decorators.py
index 13712aad7b03e561f86101070a3140324b51a4e3..ad3efcbf369411b9c42b2a32ed05b04f86bf7de6 100644
--- a/python/paddle/trainer_config_helpers/default_decorators.py
+++ b/python/paddle/trainer_config_helpers/default_decorators.py
@@ -84,12 +84,15 @@ class DefaultNameFactory(object):
 
 _name_factories = []
 
+
 def reset_hook():
     for factory in _name_factories:
         factory.reset()
 
+
 register_parse_config_hook(reset_hook)
 
+
 def wrap_name_default(name_prefix=None):
     """
     Decorator to set "name" arguments default to "{name_prefix}_{invoke_count}".
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_config_parser_for_non_file_config.py b/python/paddle/trainer_config_helpers/tests/configs/test_config_parser_for_non_file_config.py
index 87a607acf4219945b602411cd689afba3298696f..9b791a0222dd60e9ae2fca8b2798cddd13ed1d1c 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_config_parser_for_non_file_config.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_config_parser_for_non_file_config.py
@@ -17,33 +17,35 @@ import sys
 import re
 import getopt
 
+
 def main(print_whole_config, globals, locals):
-  '''
+    '''
      this test will all test_config.py
   '''
-  cmdstr = """from paddle.trainer.config_parser import parse_config\n"""
-  importstr = ""
-  functionstr = ""
+    cmdstr = """from paddle.trainer.config_parser import parse_config\n"""
+    importstr = ""
+    functionstr = ""
+
+    for line in sys.stdin:
+        if re.match("^import", line) or re.match("^from.*import", line):
+            importstr = importstr + line
+        else:
+            functionstr = functionstr + "  " + line
 
-  for line in sys.stdin:
-    if re.match("^import", line) or re.match("^from.*import", line):
-      importstr = importstr + line
+    cmdstr = cmdstr + importstr + """def configs():\n""" + functionstr
+    #cmdstr = cmdstr + """def configs():\n""" + importstr + functionstr
+    if print_whole_config:
+        cmdstr = cmdstr + """print parse_config(configs, "")"""
     else:
-      functionstr = functionstr + "  " + line
+        cmdstr = cmdstr + """print parse_config(configs, "").model_config"""
 
-  cmdstr = cmdstr + importstr + """def configs():\n""" + functionstr
-  #cmdstr = cmdstr + """def configs():\n""" + importstr + functionstr
-  if print_whole_config:
-    cmdstr = cmdstr + """print parse_config(configs, "")"""
-  else:
-    cmdstr = cmdstr + """print parse_config(configs, "").model_config"""
+    exec (cmdstr, globals, locals)
 
-  exec(cmdstr, globals, locals)
 
 if __name__ == '__main__':
-  whole = False
-  opts, args = getopt.getopt(sys.argv[1:], "", ["whole"])
-  for op, value in opts:
-    if op == "--whole":
-      whole = True
-  main(whole, globals(), locals())
+    whole = False
+    opts, args = getopt.getopt(sys.argv[1:], "", ["whole"])
+    for op, value in opts:
+        if op == "--whole":
+            whole = True
+    main(whole, globals(), locals())
diff --git a/python/paddle/trainer_config_helpers/tests/test_reset_hook.py b/python/paddle/trainer_config_helpers/tests/test_reset_hook.py
index dc494d0eef22c927fe8afea5af2f8c36ff405173..0423babdb720191d8e9dfc67f1af3be339dbe27d 100644
--- a/python/paddle/trainer_config_helpers/tests/test_reset_hook.py
+++ b/python/paddle/trainer_config_helpers/tests/test_reset_hook.py
@@ -14,13 +14,13 @@
 import unittest
 from paddle.trainer.config_parser import parse_config
 
-class TestParse(unittest.TestCase):
 
+class TestParse(unittest.TestCase):
     def test_parse(self):
-        a = parse_config(
-            'trainer_config_helpers/tests/layers_test_config.py', '')
-        b = parse_config(
-            'trainer_config_helpers/tests/layers_test_config.py', '')
+        a = parse_config('trainer_config_helpers/tests/layers_test_config.py',
+                         '')
+        b = parse_config('trainer_config_helpers/tests/layers_test_config.py',
+                         '')
         self.assertEqual(a, b)
 
 
diff --git a/third_party/protobuf_test/BUILD b/third_party/protobuf_test/BUILD
index 29c5f344d313d7a467bc43ddcdd9b1c8be2ca817..e972ca8b3aa15d9d4ee1191739c39e9f43b1d906 100644
--- a/third_party/protobuf_test/BUILD
+++ b/third_party/protobuf_test/BUILD
@@ -3,18 +3,16 @@ licenses(["notice"])  # Apache 2.0
 load("@protobuf//:protobuf.bzl", "cc_proto_library")
 
 cc_proto_library(
-    name = "example_proto",
-    srcs = ["example.proto"],
-    protoc = "@protobuf//:protoc",
-    default_runtime = "@protobuf//:protobuf",
-)
+    name="example_proto",
+    srcs=["example.proto"],
+    protoc="@protobuf//:protoc",
+    default_runtime="@protobuf//:protobuf", )
 
 cc_library(
-    name = "example_lib",
-    srcs = ["example_lib.cc"],
-    hdrs = ["example_lib.h"],
-    deps = [":example_proto"],
-)
+    name="example_lib",
+    srcs=["example_lib.cc"],
+    hdrs=["example_lib.h"],
+    deps=[":example_proto"], )
 
 cc_test(
     name = "example_lib_test",
@@ -23,5 +21,4 @@ cc_test(
     deps =[
         "@gtest//:gtest",
         ":example_lib",
-    ],
-)
+    ], )
diff --git a/third_party/protobuf_test/example_lib.cc b/third_party/protobuf_test/example_lib.cc
index 56341a0124c0c22897aad8f5e1b85f9e28567a22..ced377bc0a17dde31c5c853dec1a852fa0be7223 100644
--- a/third_party/protobuf_test/example_lib.cc
+++ b/third_party/protobuf_test/example_lib.cc
@@ -3,9 +3,7 @@
 namespace third_party {
 namespace protobuf_test {
 
-std::string get_greet(const Greeting& who) {
-  return "Hello " + who.name();
-}
+std::string get_greet(const Greeting& who) { return "Hello " + who.name(); }
 
 }  // namespace protobuf_test
 }  // namespace thrid_party