diff --git a/demo/image_classification/data/process_cifar.py b/demo/image_classification/data/process_cifar.py
index b766118eb00737c7a196ed85850b3cebd690b0d0..b235010e4ece377beffaaa1b9247a77d7a96b712 100644
--- a/demo/image_classification/data/process_cifar.py
+++ b/demo/image_classification/data/process_cifar.py
@@ -16,7 +16,6 @@ import numpy as np
 import sys
 import os
 import PIL.Image as Image
-
 """
   Usage: python process_cifar input_dir output_dir
 """
@@ -30,6 +29,7 @@ def mkdir_not_exist(path):
     if not os.path.exists(path):
         os.mkdir(path)
 
+
 def create_dir_structure(output_dir):
     """
     Create the directory structure for the directory.
@@ -39,8 +39,8 @@ def create_dir_structure(output_dir):
     mkdir_not_exist(os.path.join(output_dir, "train"))
     mkdir_not_exist(os.path.join(output_dir, "test"))
 
-def convert_batch(batch_path, label_set, label_map,
-                  output_dir, data_split):
+
+def convert_batch(batch_path, label_set, label_map, output_dir, data_split):
     """
     Convert CIFAR batch to the structure of Paddle format.
     batch_path: the batch to be converted.
@@ -67,11 +67,23 @@ if __name__ == '__main__':
     output_dir = sys.argv[2]
     num_batch = 5
     create_dir_structure(output_dir)
-    label_map = {0: "airplane", 1: "automobile", 2: "bird", 3: "cat", 4: "deer",
-                 5: "dog", 6: "frog", 7: "horse", 8: "ship", 9: "truck"}
+    label_map = {
+        0: "airplane",
+        1: "automobile",
+        2: "bird",
+        3: "cat",
+        4: "deer",
+        5: "dog",
+        6: "frog",
+        7: "horse",
+        8: "ship",
+        9: "truck"
+    }
     labels = {}
     for i in range(1, num_batch + 1):
-        convert_batch(os.path.join(input_dir, "data_batch_%d" % i), labels,
-                      label_map, output_dir, "train")
-    convert_batch(os.path.join(input_dir, "test_batch"), {},
-                  label_map, output_dir, "test")
\ No newline at end of file
+        convert_batch(
+            os.path.join(input_dir, "data_batch_%d" % i), labels, label_map,
+            output_dir, "train")
+    convert_batch(
+        os.path.join(input_dir, "test_batch"), {}, label_map, output_dir,
+        "test")
diff --git a/demo/image_classification/image_provider.py b/demo/image_classification/image_provider.py
index 305efbcdc6bb11f1dac65cc3af82fb997db97f27..28bf1bb02c1f08b2e8ec9acd38f0a8594b05ab66 100644
--- a/demo/image_classification/image_provider.py
+++ b/demo/image_classification/image_provider.py
@@ -46,14 +46,14 @@ def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg,
 
     settings.img_mean = image_util.load_meta(settings.meta_path,
                                              settings.mean_img_size,
-                                             settings.img_size,
-                                             settings.color)
+                                             settings.img_size, settings.color)
 
     settings.logger.info('Image size: %s', settings.img_size)
     settings.logger.info('Meta path: %s', settings.meta_path)
     settings.input_types = [
         dense_vector(settings.img_raw_size),  # image feature
-        integer_value(settings.num_classes)]  # labels
+        integer_value(settings.num_classes)
+    ]  # labels
 
     settings.logger.info('DataProvider Initialization finished')
 
@@ -79,8 +79,8 @@ def processData(settings, file_list):
                         img = image_util.decode_jpeg(data['images'][i])
                     else:
                         img = data['images'][i]
-                    img_feat = image_util.preprocess_img(img, settings.img_mean,
-                                                         settings.img_size, settings.is_train,
-                                                         settings.color)
+                    img_feat = image_util.preprocess_img(
+                        img, settings.img_mean, settings.img_size,
+                        settings.is_train, settings.color)
                     label = data['labels'][i]
                     yield img_feat.astype('float32'), int(label)
diff --git a/demo/image_classification/image_util.py b/demo/image_classification/image_util.py
index c545d16aafbc741bce25f9469e7f67de5b88fa8c..b5c6431c06f77cef5c31ca844a8427eebaea2fce 100644
--- a/demo/image_classification/image_util.py
+++ b/demo/image_classification/image_util.py
@@ -16,17 +16,20 @@ import numpy as np
 from PIL import Image
 from cStringIO import StringIO
 
+
 def resize_image(img, target_size):
     """
     Resize an image so that the shorter edge has length target_size.
     img: the input image to be resized.
     target_size: the target resized image size.
     """
-    percent = (target_size/float(min(img.size[0], img.size[1])))
-    resized_size = int(round(img.size[0] * percent)), int(round(img.size[1] * percent))
+    percent = (target_size / float(min(img.size[0], img.size[1])))
+    resized_size = int(round(img.size[0] * percent)), int(
+        round(img.size[1] * percent))
     img = img.resize(resized_size, Image.ANTIALIAS)
     return img
 
+
 def flip(im):
     """
     Return the flipped image.
@@ -38,6 +41,7 @@ def flip(im):
     else:
         return im[:, ::-1]
 
+
 def crop_img(im, inner_size, color=True, test=True):
     """
     Return cropped image.
@@ -50,20 +54,22 @@ def crop_img(im, inner_size, color=True, test=True):
       If True, crop the center of images.
     """
     if color:
-        height, width = max(inner_size, im.shape[1]), max(inner_size, im.shape[2])
+        height, width = max(inner_size, im.shape[1]), max(inner_size,
+                                                          im.shape[2])
         padded_im = np.zeros((3, height, width))
         startY = (height - im.shape[1]) / 2
         startX = (width - im.shape[2]) / 2
         endY, endX = startY + im.shape[1], startX + im.shape[2]
-        padded_im[:, startY: endY, startX: endX] = im
+        padded_im[:, startY:endY, startX:endX] = im
     else:
         im = im.astype('float32')
-        height, width = max(inner_size, im.shape[0]), max(inner_size, im.shape[1])
+        height, width = max(inner_size, im.shape[0]), max(inner_size,
+                                                          im.shape[1])
         padded_im = np.zeros((height, width))
         startY = (height - im.shape[0]) / 2
         startX = (width - im.shape[1]) / 2
         endY, endX = startY + im.shape[0], startX + im.shape[1]
-        padded_im[startY: endY, startX: endX] = im
+        padded_im[startY:endY, startX:endX] = im
     if test:
         startY = (height - inner_size) / 2
         startX = (width - inner_size) / 2
@@ -72,19 +78,21 @@ def crop_img(im, inner_size, color=True, test=True):
         startX = np.random.randint(0, width - inner_size + 1)
     endY, endX = startY + inner_size, startX + inner_size
     if color:
-        pic = padded_im[:, startY: endY, startX: endX]
+        pic = padded_im[:, startY:endY, startX:endX]
     else:
-        pic = padded_im[startY: endY, startX: endX]
+        pic = padded_im[startY:endY, startX:endX]
     if (not test) and (np.random.randint(2) == 0):
         pic = flip(pic)
     return pic
 
+
 def decode_jpeg(jpeg_string):
     np_array = np.array(Image.open(StringIO(jpeg_string)))
     if len(np_array.shape) == 3:
         np_array = np.transpose(np_array, (2, 0, 1))
     return np_array
 
+
 def preprocess_img(im, img_mean, crop_size, is_train, color=True):
     """
     Does data augmentation for images.
@@ -99,6 +107,7 @@ def preprocess_img(im, img_mean, crop_size, is_train, color=True):
     pic -= img_mean
     return pic.flatten()
 
+
 def load_meta(meta_path, mean_img_size, crop_size, color=True):
     """
     Return the loaded meta file.
@@ -109,17 +118,18 @@ def load_meta(meta_path, mean_img_size, crop_size, color=True):
     mean = np.load(meta_path)['data_mean']
     border = (mean_img_size - crop_size) / 2
     if color:
-        assert(mean_img_size * mean_img_size * 3 == mean.shape[0])
+        assert (mean_img_size * mean_img_size * 3 == mean.shape[0])
         mean = mean.reshape(3, mean_img_size, mean_img_size)
-        mean = mean[:, border: border + crop_size,
-                       border: border + crop_size].astype('float32')
+        mean = mean[:, border:border + crop_size, border:border +
+                    crop_size].astype('float32')
     else:
-        assert(mean_img_size * mean_img_size == mean.shape[0])
+        assert (mean_img_size * mean_img_size == mean.shape[0])
         mean = mean.reshape(mean_img_size, mean_img_size)
-        mean = mean[border: border + crop_size,
-                    border: border + crop_size].astype('float32')
+        mean = mean[border:border + crop_size, border:border +
+                    crop_size].astype('float32')
     return mean
 
+
 def load_image(img_path, is_color=True):
     """
     Load image and return. 
@@ -130,6 +140,7 @@ def load_image(img_path, is_color=True):
     img.load()
     return img
 
+
 def oversample(img, crop_dims):
     """
     image : iterable of (H x W x K) ndarrays
@@ -152,50 +163,53 @@ def oversample(img, crop_dims):
         for j in w_indices:
             crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
             curr += 1
-    crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([
-        -crop_dims / 2.0,
-         crop_dims / 2.0
-    ])
+    crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate(
+        [-crop_dims / 2.0, crop_dims / 2.0])
     crops_ix = np.tile(crops_ix, (2, 1))
 
     # Extract crops
-    crops = np.empty((10 * len(img), crop_dims[0], crop_dims[1],
-                      im_shape[-1]), dtype=np.float32)
+    crops = np.empty(
+        (10 * len(img), crop_dims[0], crop_dims[1], im_shape[-1]),
+        dtype=np.float32)
     ix = 0
     for im in img:
         for crop in crops_ix:
             crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :]
             ix += 1
-        crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :]  # flip for mirrors
+        crops[ix - 5:ix] = crops[ix - 5:ix, :, ::-1, :]  # flip for mirrors
     return crops
 
+
 class ImageTransformer:
-    def __init__(self, transpose = None,
-                 channel_swap = None, mean = None, is_color = True):
+    def __init__(self,
+                 transpose=None,
+                 channel_swap=None,
+                 mean=None,
+                 is_color=True):
         self.transpose = transpose
         self.channel_swap = None
         self.mean = None
-        self.is_color = is_color 
+        self.is_color = is_color
 
-    def set_transpose(self, order): 
+    def set_transpose(self, order):
         if self.is_color:
-            assert 3 == len(order) 
+            assert 3 == len(order)
         self.transpose = order
 
-    def set_channel_swap(self, order): 
+    def set_channel_swap(self, order):
         if self.is_color:
-            assert 3 == len(order) 
+            assert 3 == len(order)
         self.channel_swap = order
 
     def set_mean(self, mean):
         # mean value, may be one value per channel 
         if mean.ndim == 1:
-            mean = mean[:, np.newaxis, np.newaxis]       
-        else: 
+            mean = mean[:, np.newaxis, np.newaxis]
+        else:
             # elementwise mean
             if self.is_color:
                 assert len(mean.shape) == 3
-        self.mean = mean 
+        self.mean = mean
 
     def transformer(self, data):
         if self.transpose is not None:
diff --git a/demo/image_classification/prediction.py b/demo/image_classification/prediction.py
index 5d9e93265867389ca6d2aa26e48fcfa08561e6ae..6a47bd5851c99635dd7d3f1d5df67dd081ca4584 100755
--- a/demo/image_classification/prediction.py
+++ b/demo/image_classification/prediction.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os,sys
+import os, sys
 import numpy as np
 import logging
 from PIL import Image
@@ -24,9 +24,11 @@ from py_paddle import swig_paddle, DataProviderConverter
 from paddle.trainer.PyDataProvider2 import dense_vector
 from paddle.trainer.config_parser import parse_config
 
-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
 logging.getLogger().setLevel(logging.INFO)
 
+
 class ImageClassifier():
     def __init__(self,
                  train_conf,
@@ -58,18 +60,19 @@ class ImageClassifier():
         self.oversample = oversample
         self.is_color = is_color
 
-        self.transformer = image_util.ImageTransformer(is_color = is_color)
-        self.transformer.set_transpose((2,0,1))
+        self.transformer = image_util.ImageTransformer(is_color=is_color)
+        self.transformer.set_transpose((2, 0, 1))
 
         self.mean_file = mean_file
         mean = np.load(self.mean_file)['data_mean']
         mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
-        self.transformer.set_mean(mean) # mean pixel
+        self.transformer.set_mean(mean)  # mean pixel
         gpu = 1 if use_gpu else 0
         conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (gpu)
         conf = parse_config(train_conf, conf_args)
         swig_paddle.initPaddle("--use_gpu=%d" % (gpu))
-        self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+        self.network = swig_paddle.GradientMachine.createFromConfigProto(
+            conf.model_config)
         assert isinstance(self.network, swig_paddle.GradientMachine)
         self.network.loadParameters(self.model_dir)
 
@@ -90,14 +93,14 @@ class ImageClassifier():
             # image_util.resize_image: short side is self.resize_dim
             image = image_util.resize_image(image, self.resize_dim)
             image = np.array(image)
-            input = np.zeros((1, image.shape[0], image.shape[1], 3),
-                             dtype=np.float32)
+            input = np.zeros(
+                (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
             input[0] = image.astype(np.float32)
             input = image_util.oversample(input, self.crop_dims)
         else:
             image = image.resize(self.crop_dims, Image.ANTIALIAS)
-            input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3),
-                             dtype=np.float32)
+            input = np.zeros(
+                (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
             input[0] = np.array(image).astype(np.float32)
 
         data_in = []
@@ -133,22 +136,24 @@ class ImageClassifier():
         lab = np.argsort(-prob)
         logging.info("Label of %s is: %d", image, lab[0])
 
+
 if __name__ == '__main__':
-    image_size=32
-    crop_size=32
-    multi_crop=True
-    config="vgg_16_cifar.py"
-    output_layer="__fc_layer_1__"
-    mean_path="data/cifar-out/batches/batches.meta"
-    model_path=sys.argv[1]
-    image=sys.argv[2]
-    use_gpu=bool(int(sys.argv[3]))
-
-    obj = ImageClassifier(train_conf=config,
-                          model_dir=model_path,
-                          resize_dim=image_size,
-                          crop_dim=crop_size,
-                          mean_file=mean_path,
-                          use_gpu=use_gpu,
-                          oversample=multi_crop)
+    image_size = 32
+    crop_size = 32
+    multi_crop = True
+    config = "vgg_16_cifar.py"
+    output_layer = "__fc_layer_1__"
+    mean_path = "data/cifar-out/batches/batches.meta"
+    model_path = sys.argv[1]
+    image = sys.argv[2]
+    use_gpu = bool(int(sys.argv[3]))
+
+    obj = ImageClassifier(
+        train_conf=config,
+        model_dir=model_path,
+        resize_dim=image_size,
+        crop_dim=crop_size,
+        mean_file=mean_path,
+        use_gpu=use_gpu,
+        oversample=multi_crop)
     obj.predict(image, output_layer)
diff --git a/demo/image_classification/preprocess.py b/demo/image_classification/preprocess.py
index fe7ea19bf02776629dff0f64f5b671dc457eae64..10b9c1691b5e51273c73a975545cd36f3822e901 100755
--- a/demo/image_classification/preprocess.py
+++ b/demo/image_classification/preprocess.py
@@ -19,24 +19,36 @@ from optparse import OptionParser
 def option_parser():
     parser = OptionParser(usage="usage: python preprcoess.py "\
                           "-i data_dir [options]")
-    parser.add_option("-i", "--input", action="store",
-                      dest="input", help="Input data directory.")
-    parser.add_option("-s", "--size", action="store",
-                      dest="size", help="Processed image size.")
-    parser.add_option("-c", "--color", action="store",
-                      dest="color", help="whether to use color images.")
+    parser.add_option(
+        "-i",
+        "--input",
+        action="store",
+        dest="input",
+        help="Input data directory.")
+    parser.add_option(
+        "-s",
+        "--size",
+        action="store",
+        dest="size",
+        help="Processed image size.")
+    parser.add_option(
+        "-c",
+        "--color",
+        action="store",
+        dest="color",
+        help="whether to use color images.")
     return parser.parse_args()
 
+
 if __name__ == '__main__':
-     options, args = option_parser()
-     data_dir = options.input
-     processed_image_size = int(options.size)
-     color = options.color == "1"
-     data_creator = ImageClassificationDatasetCreater(data_dir,
-                                                      processed_image_size,
-                                                      color)
-     data_creator.train_list_name = "train.txt"
-     data_creator.test_list_name = "test.txt"
-     data_creator.num_per_batch = 1000
-     data_creator.overwrite = True
-     data_creator.create_batches()
+    options, args = option_parser()
+    data_dir = options.input
+    processed_image_size = int(options.size)
+    color = options.color == "1"
+    data_creator = ImageClassificationDatasetCreater(
+        data_dir, processed_image_size, color)
+    data_creator.train_list_name = "train.txt"
+    data_creator.test_list_name = "test.txt"
+    data_creator.num_per_batch = 1000
+    data_creator.overwrite = True
+    data_creator.create_batches()
diff --git a/demo/image_classification/vgg_16_cifar.py b/demo/image_classification/vgg_16_cifar.py
index edd6988c48acd6b554e09b721c37b291e21f46eb..58ceff5fc2f46cac9997b6d8af2b0db0c43e0c75 100755
--- a/demo/image_classification/vgg_16_cifar.py
+++ b/demo/image_classification/vgg_16_cifar.py
@@ -18,36 +18,38 @@ is_predict = get_config_arg("is_predict", bool, False)
 
 ####################Data Configuration ##################
 if not is_predict:
-  data_dir='data/cifar-out/batches/'
-  meta_path=data_dir+'batches.meta'
-
-  args = {'meta':meta_path,'mean_img_size': 32,
-          'img_size': 32,'num_classes': 10,
-          'use_jpeg': 1,'color': "color"}
-
-  define_py_data_sources2(train_list="train.list",
-                          test_list="train.list",
-                          module='image_provider',
-                          obj='processData',
-                          args=args)
+    data_dir = 'data/cifar-out/batches/'
+    meta_path = data_dir + 'batches.meta'
+
+    args = {
+        'meta': meta_path,
+        'mean_img_size': 32,
+        'img_size': 32,
+        'num_classes': 10,
+        'use_jpeg': 1,
+        'color': "color"
+    }
+
+    define_py_data_sources2(
+        train_list="train.list",
+        test_list="train.list",
+        module='image_provider',
+        obj='processData',
+        args=args)
 
 ######################Algorithm Configuration #############
 settings(
-    batch_size = 128,
-    learning_rate = 0.1 / 128.0,
-    learning_method = MomentumOptimizer(0.9),
-    regularization = L2Regularization(0.0005 * 128)
-)
+    batch_size=128,
+    learning_rate=0.1 / 128.0,
+    learning_method=MomentumOptimizer(0.9),
+    regularization=L2Regularization(0.0005 * 128))
 
 #######################Network Configuration #############
-data_size=3*32*32
-label_size=10
-img = data_layer(name='image',
-                 size=data_size)
+data_size = 3 * 32 * 32
+label_size = 10
+img = data_layer(name='image', size=data_size)
 # small_vgg is predefined in trainer_config_helpers.networks
-predict = small_vgg(input_image=img,
-                    num_channels=3,
-                    num_classes=label_size)
+predict = small_vgg(input_image=img, num_channels=3, num_classes=label_size)
 
 if not is_predict:
     lbl = data_layer(name="label", size=label_size)
diff --git a/demo/introduction/dataprovider.py b/demo/introduction/dataprovider.py
index be8c0bc89156cf843d9b08276b52f92a4d8c9706..8515022e18dc6bbf055e6db3121568acf1df1c55 100644
--- a/demo/introduction/dataprovider.py
+++ b/demo/introduction/dataprovider.py
@@ -15,10 +15,10 @@
 from paddle.trainer.PyDataProvider2 import *
 import random
 
+
 # define data types of input: 2 real numbers
-@provider(input_types=[dense_vector(1), dense_vector(1)],use_seq=False)
+@provider(input_types=[dense_vector(1), dense_vector(1)], use_seq=False)
 def process(settings, input_file):
     for i in xrange(2000):
         x = random.random()
-        yield [x], [2*x+0.3]
-
+        yield [x], [2 * x + 0.3]
diff --git a/demo/introduction/evaluate_model.py b/demo/introduction/evaluate_model.py
index 8cfb843c42105757b0f63c4a00d034b47a37a0bb..ca4a1872731abde90e72cb167929b3d9e2e1ebf4 100755
--- a/demo/introduction/evaluate_model.py
+++ b/demo/introduction/evaluate_model.py
@@ -23,14 +23,17 @@ Usage:
 import numpy as np
 import os
 
+
 def load(file_name):
     with open(file_name, 'rb') as f:
-        f.read(16) # skip header for float type.
+        f.read(16)  # skip header for float type.
         return np.fromfile(f, dtype=np.float32)
 
+
 def main():
     print 'w=%.6f, b=%.6f from pass 29' % (load('output/pass-00029/w'),
-            load('output/pass-00029/b'))
+                                           load('output/pass-00029/b'))
+
 
 if __name__ == '__main__':
     main()
diff --git a/demo/introduction/trainer_config.py b/demo/introduction/trainer_config.py
index 3e3df5583282a4335ddea7b1cb30a84052d0adca..7c838c1a8f5b3cb6ac732197c85cd7c728eb013f 100644
--- a/demo/introduction/trainer_config.py
+++ b/demo/introduction/trainer_config.py
@@ -16,9 +16,14 @@ from paddle.trainer_config_helpers import *
 
 # 1. read data. Suppose you saved above python code as dataprovider.py
 data_file = 'empty.list'
-with open(data_file, 'w') as f: f.writelines(' ')
-define_py_data_sources2(train_list=data_file, test_list=None, 
-        module='dataprovider', obj='process',args={})
+with open(data_file, 'w') as f:
+    f.writelines(' ')
+define_py_data_sources2(
+    train_list=data_file,
+    test_list=None,
+    module='dataprovider',
+    obj='process',
+    args={})
 
 # 2. learning algorithm
 settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
@@ -26,7 +31,11 @@ settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
 # 3. Network configuration
 x = data_layer(name='x', size=1)
 y = data_layer(name='y', size=1)
-y_predict = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b'))
+y_predict = fc_layer(
+    input=x,
+    param_attr=ParamAttr(name='w'),
+    size=1,
+    act=LinearActivation(),
+    bias_attr=ParamAttr(name='b'))
 cost = regression_cost(input=y_predict, label=y)
 outputs(cost)
-
diff --git a/demo/mnist/data/generate_list.py b/demo/mnist/data/generate_list.py
index 1b929048b4d82b5e9d80585b6d0180f2e92200ce..d880721f94c68bbbc1740f82872462efdb368fa2 100644
--- a/demo/mnist/data/generate_list.py
+++ b/demo/mnist/data/generate_list.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 o = open("./" + "train.list", "w")
-o.write("./data/raw_data/train" +"\n")
+o.write("./data/raw_data/train" + "\n")
 o.close()
 
 o = open("./" + "test.list", "w")
-o.write("./data/raw_data/t10k" +"\n")
-o.close()
\ No newline at end of file
+o.write("./data/raw_data/t10k" + "\n")
+o.close()
diff --git a/demo/mnist/mnist_provider.py b/demo/mnist/mnist_provider.py
index 32af29730a7365df1a98fe54a2edf8850ee93e8d..6df4676da3bdc2e6949cc911fa3720cb51ddc568 100644
--- a/demo/mnist/mnist_provider.py
+++ b/demo/mnist/mnist_provider.py
@@ -2,10 +2,9 @@ from paddle.trainer.PyDataProvider2 import *
 
 
 # Define a py data provider
-@provider(input_types={
-    'pixel': dense_vector(28 * 28),
-    'label': integer_value(10)
-})
+@provider(
+    input_types={'pixel': dense_vector(28 * 28),
+                 'label': integer_value(10)})
 def process(settings, filename):  # settings is not used currently.
     imgf = filename + "-images-idx3-ubyte"
     labelf = filename + "-labels-idx1-ubyte"
diff --git a/demo/mnist/vgg_16_mnist.py b/demo/mnist/vgg_16_mnist.py
index 45a45bb061aa781231a944bb82ebfbc6b0dc9618..f9e89bc588abacd98a8f5fc82a00fae6bb2de10e 100644
--- a/demo/mnist/vgg_16_mnist.py
+++ b/demo/mnist/vgg_16_mnist.py
@@ -18,32 +18,29 @@ is_predict = get_config_arg("is_predict", bool, False)
 
 ####################Data Configuration ##################
 
-
 if not is_predict:
-  data_dir='./data/'
-  define_py_data_sources2(train_list= data_dir + 'train.list',
-                        test_list= data_dir + 'test.list',
-                        module='mnist_provider',
-                        obj='process')
+    data_dir = './data/'
+    define_py_data_sources2(
+        train_list=data_dir + 'train.list',
+        test_list=data_dir + 'test.list',
+        module='mnist_provider',
+        obj='process')
 
 ######################Algorithm Configuration #############
 settings(
-    batch_size = 128,
-    learning_rate = 0.1 / 128.0,
-    learning_method = MomentumOptimizer(0.9),
-    regularization = L2Regularization(0.0005 * 128)
-)
+    batch_size=128,
+    learning_rate=0.1 / 128.0,
+    learning_method=MomentumOptimizer(0.9),
+    regularization=L2Regularization(0.0005 * 128))
 
 #######################Network Configuration #############
 
-data_size=1*28*28
-label_size=10
+data_size = 1 * 28 * 28
+label_size = 10
 img = data_layer(name='pixel', size=data_size)
 
 # small_vgg is predined in trainer_config_helpers.network
-predict = small_vgg(input_image=img,
-                    num_channels=1,
-                    num_classes=label_size)
+predict = small_vgg(input_image=img, num_channels=1, num_classes=label_size)
 
 if not is_predict:
     lbl = data_layer(name="label", size=label_size)
diff --git a/demo/model_zoo/embedding/extract_para.py b/demo/model_zoo/embedding/extract_para.py
index 17067792fc38d0d25bd28dc35bfb1b88ad5020cd..47e06fae9caa9c3d9e0d6eb2e3f6633a776c5b1d 100755
--- a/demo/model_zoo/embedding/extract_para.py
+++ b/demo/model_zoo/embedding/extract_para.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 Example:
     python extract_para.py --preModel PREMODEL --preDict PREDICT \
@@ -29,6 +28,7 @@ Options:
 from optparse import OptionParser
 import struct
 
+
 def get_row_index(preDict, usrDict):
     """
     Get the row positions for all words in user dictionary from pre-trained dictionary.
@@ -47,7 +47,9 @@ def get_row_index(preDict, usrDict):
             pos.append(index[word])
     return pos
 
-def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim):
+
+def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict,
+                                  paraDim):
     """
     Extract desired parameters from a pretrained embedding model based on user dictionary
     """
@@ -70,6 +72,7 @@ def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim)
     print "extract parameters finish, total", len(rowIndex), "lines"
     fi.close()
 
+
 def main():
     """
     Main entry for running paraconvert.py 
@@ -78,19 +81,33 @@ def main():
             "python %prog --preModel PREMODEL --preDict PREDICT" \
             " --usrModel USRMODEL --usrDict USRDICT -d DIM"
     parser = OptionParser(usage)
-    parser.add_option("--preModel", action="store", dest="preModel",
-                      help="the name of pretrained embedding model")
-    parser.add_option("--preDict", action="store", dest="preDict",
-                      help="the name of pretrained dictionary")
-    parser.add_option("--usrModel", action="store", dest="usrModel",
-                      help="the name of output usr embedding model")
-    parser.add_option("--usrDict", action="store", dest="usrDict",
-                      help="the name of user specified dictionary")
-    parser.add_option("-d", action="store", dest="dim",
-                      help="dimension of parameter")
+    parser.add_option(
+        "--preModel",
+        action="store",
+        dest="preModel",
+        help="the name of pretrained embedding model")
+    parser.add_option(
+        "--preDict",
+        action="store",
+        dest="preDict",
+        help="the name of pretrained dictionary")
+    parser.add_option(
+        "--usrModel",
+        action="store",
+        dest="usrModel",
+        help="the name of output usr embedding model")
+    parser.add_option(
+        "--usrDict",
+        action="store",
+        dest="usrDict",
+        help="the name of user specified dictionary")
+    parser.add_option(
+        "-d", action="store", dest="dim", help="dimension of parameter")
     (options, args) = parser.parse_args()
-    extract_parameters_by_usrDict(options.preModel, options.preDict, 
-                      options.usrModel, options.usrDict, int(options.dim))
+    extract_parameters_by_usrDict(options.preModel, options.preDict,
+                                  options.usrModel, options.usrDict,
+                                  int(options.dim))
+
 
 if __name__ == '__main__':
     main()
diff --git a/demo/model_zoo/embedding/paraconvert.py b/demo/model_zoo/embedding/paraconvert.py
index 523412303617a38035392e4bb99f8ce119be8ac8..54155eff8e26b16ff5303d8d279e81b4bf8a90f4 100755
--- a/demo/model_zoo/embedding/paraconvert.py
+++ b/demo/model_zoo/embedding/paraconvert.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 Example:
     python paraconvert.py --b2t -i INPUT -o OUTPUT -d DIM
@@ -29,6 +28,7 @@ Options:
 from optparse import OptionParser
 import struct
 
+
 def binary2text(input, output, paraDim):
     """
     Convert a binary parameter file of embedding model to be a text file.  
@@ -76,12 +76,13 @@ def binary2text(input, output, paraDim):
     fo.close()
     print "binary2text finish, total", line, "lines"
 
+
 def get_para_count(input):
     """
     Compute the total number of embedding parameters in input text file. 
     input: the name of input text file
     """
-    numRows = 1 
+    numRows = 1
     paraDim = 0
     with open(input) as f:
         line = f.readline()
@@ -90,6 +91,7 @@ def get_para_count(input):
             numRows += 1
     return numRows * paraDim
 
+
 def text2binary(input, output, paddle_head=True):
     """
     Convert a text parameter file of embedding model to be a binary file.
@@ -123,6 +125,7 @@ def text2binary(input, output, paddle_head=True):
     fo.close()
     print "text2binary finish, total", count, "lines"
 
+
 def main():
     """
     Main entry for running paraconvert.py 
@@ -131,21 +134,26 @@ def main():
             "python %prog --b2t -i INPUT -o OUTPUT -d DIM \n" \
             "python %prog --t2b -i INPUT -o OUTPUT"
     parser = OptionParser(usage)
-    parser.add_option("--b2t", action="store_true",
-                      help="convert parameter file of embedding model from binary to text")
-    parser.add_option("--t2b", action="store_true",
-                      help="convert parameter file of embedding model from text to binary")
-    parser.add_option("-i", action="store", dest="input",
-                      help="input parameter file name")
-    parser.add_option("-o", action="store", dest="output",
-                      help="output parameter file name")
-    parser.add_option("-d", action="store", dest="dim",
-                      help="dimension of parameter")
+    parser.add_option(
+        "--b2t",
+        action="store_true",
+        help="convert parameter file of embedding model from binary to text")
+    parser.add_option(
+        "--t2b",
+        action="store_true",
+        help="convert parameter file of embedding model from text to binary")
+    parser.add_option(
+        "-i", action="store", dest="input", help="input parameter file name")
+    parser.add_option(
+        "-o", action="store", dest="output", help="output parameter file name")
+    parser.add_option(
+        "-d", action="store", dest="dim", help="dimension of parameter")
     (options, args) = parser.parse_args()
     if options.b2t:
         binary2text(options.input, options.output, options.dim)
     if options.t2b:
         text2binary(options.input, options.output)
 
+
 if __name__ == '__main__':
     main()
diff --git a/demo/model_zoo/resnet/classify.py b/demo/model_zoo/resnet/classify.py
index 06d471722f8059804a59e6823bebccff85a8d542..7855126edcfec20de251e5bc08c08c7aab8f7a8e 100755
--- a/demo/model_zoo/resnet/classify.py
+++ b/demo/model_zoo/resnet/classify.py
@@ -26,16 +26,22 @@ from py_paddle import swig_paddle, DataProviderConverter
 from paddle.trainer.PyDataProvider2 import dense_vector
 from paddle.trainer.config_parser import parse_config
 
-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
 logging.getLogger().setLevel(logging.INFO)
 
+
 class ImageClassifier():
-    def __init__(self, train_conf, model_dir=None,
-                 resize_dim=256, crop_dim=224,
+    def __init__(self,
+                 train_conf,
+                 model_dir=None,
+                 resize_dim=256,
+                 crop_dim=224,
                  use_gpu=True,
                  mean_file=None,
                  output_layer=None,
-                 oversample=False, is_color=True):
+                 oversample=False,
+                 is_color=True):
         """
         train_conf: network configure.
         model_dir: string, directory of model.
@@ -62,24 +68,25 @@ class ImageClassifier():
             assert isinstance(self.output_layer, basestring)
             self.output_layer = self.output_layer.split(",")
 
-        self.transformer = image_util.ImageTransformer(is_color = is_color)
-        self.transformer.set_transpose((2,0,1))
-        self.transformer.set_channel_swap((2,1,0))
+        self.transformer = image_util.ImageTransformer(is_color=is_color)
+        self.transformer.set_transpose((2, 0, 1))
+        self.transformer.set_channel_swap((2, 1, 0))
 
         self.mean_file = mean_file
         if self.mean_file is not None:
             mean = np.load(self.mean_file)['data_mean']
             mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
-            self.transformer.set_mean(mean) # mean pixel
+            self.transformer.set_mean(mean)  # mean pixel
         else:
             # if you use three mean value, set like:
             # this three mean value is calculated from ImageNet.
-            self.transformer.set_mean(np.array([103.939,116.779,123.68]))
+            self.transformer.set_mean(np.array([103.939, 116.779, 123.68]))
 
         conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (int(use_gpu))
         conf = parse_config(train_conf, conf_args)
         swig_paddle.initPaddle("--use_gpu=%d" % (int(use_gpu)))
-        self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+        self.network = swig_paddle.GradientMachine.createFromConfigProto(
+            conf.model_config)
         assert isinstance(self.network, swig_paddle.GradientMachine)
         self.network.loadParameters(self.model_dir)
 
@@ -105,14 +112,14 @@ class ImageClassifier():
             # image_util.resize_image: short side is self.resize_dim
             image = image_util.resize_image(image, self.resize_dim)
             image = np.array(image)
-            input = np.zeros((1, image.shape[0], image.shape[1], 3),
-                             dtype=np.float32)
+            input = np.zeros(
+                (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
             input[0] = image.astype(np.float32)
             input = image_util.oversample(input, self.crop_dims)
         else:
             image = image.resize(self.crop_dims, Image.ANTIALIAS)
-            input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3),
-                             dtype=np.float32)
+            input = np.zeros(
+                (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
             input[0] = np.array(image).astype(np.float32)
 
         data_in = []
@@ -172,7 +179,7 @@ class ImageClassifier():
             logging.info("Label of %s is: %d", image, lab[0])
         return results
 
-    def extract(self, data_file, output_dir, batch_size = 10000):
+    def extract(self, data_file, output_dir, batch_size=10000):
         """
         extract and save features of output layers, which are
         specify in Outputs() in network configure.
@@ -197,7 +204,7 @@ class ImageClassifier():
             image_feature[file_name] = feature
             sample_num += 1
             if sample_num == batch_size:
-                batch_name = os.path.join(output_dir, 'batch_%d' %(batch_num))
+                batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num))
                 self.save_file(image_feature, batch_name)
                 logging.info('Finish batch %d', batch_num)
                 batch_num += 1
@@ -206,7 +213,7 @@ class ImageClassifier():
             if idx % 1000 == 0:
                 logging.info('%d/%d, %s', idx, len(image_files), file_name)
         if sample_num > 0:
-            batch_name = os.path.join(output_dir, 'batch_%d' %(batch_num))
+            batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num))
             self.save_file(image_feature, batch_name)
             logging.info('Finish batch %d', batch_num)
         logging.info('Done: make image feature batch')
@@ -215,38 +222,64 @@ class ImageClassifier():
         of = open(file, 'wb')
         cPickle.dump(data, of, protocol=cPickle.HIGHEST_PROTOCOL)
 
+
 def option_parser():
     """
     Main entry for predciting
     """
     usage = "%prog -c config -i data_list -w model_dir [options]"
     parser = OptionParser(usage="usage: %s" % usage)
-    parser.add_option("-j", "--job",
-                      action="store", dest="job_type",
-                      help="job type: predict, extract\
+    parser.add_option(
+        "-j",
+        "--job",
+        action="store",
+        dest="job_type",
+        help="job type: predict, extract\
                             predict: predicting,\
                             extract: extract features")
-    parser.add_option("-c", "--conf",
-                      action="store", dest="train_conf",
-                      help="network config")
-    parser.add_option("-i", "--data",
-                      action="store", dest="data_file",
-                      help="image list")
-    parser.add_option("-w", "--model",
-                      action="store", dest="model_path",
-                      default=None, help="model path")
-    parser.add_option("-g", "--use_gpu", action="store",
-                      dest="use_gpu", default=True,
-                      help="Whether to use gpu mode.")
-    parser.add_option("-o", "--output_dir",
-                      action="store", dest="output_dir",
-                      default="output", help="output path")
-    parser.add_option("-m", "--mean", action="store",
-                      dest="mean", default=None,
-                      help="mean file.")
-    parser.add_option("-p", "--multi_crop", action="store_true",
-                      dest="multi_crop", default=False,
-                      help="Wether to use multiple crops on image.")
+    parser.add_option(
+        "-c",
+        "--conf",
+        action="store",
+        dest="train_conf",
+        help="network config")
+    parser.add_option(
+        "-i", "--data", action="store", dest="data_file", help="image list")
+    parser.add_option(
+        "-w",
+        "--model",
+        action="store",
+        dest="model_path",
+        default=None,
+        help="model path")
+    parser.add_option(
+        "-g",
+        "--use_gpu",
+        action="store",
+        dest="use_gpu",
+        default=True,
+        help="Whether to use gpu mode.")
+    parser.add_option(
+        "-o",
+        "--output_dir",
+        action="store",
+        dest="output_dir",
+        default="output",
+        help="output path")
+    parser.add_option(
+        "-m",
+        "--mean",
+        action="store",
+        dest="mean",
+        default=None,
+        help="mean file.")
+    parser.add_option(
+        "-p",
+        "--multi_crop",
+        action="store_true",
+        dest="multi_crop",
+        default=False,
+        help="Wether to use multiple crops on image.")
     parser.add_option("-l", "--output_layer", action="store",
                       dest="output_layer", default=None,
                       help="--job=extract, specify layers to extract "\
@@ -254,24 +287,26 @@ def option_parser():
                            "classification probability, output in resnet.py.")
     return parser.parse_args()
 
+
 def main():
     """
     1. parse input arguments.
     2. predicting or extract features according job type.
     """
     options, args = option_parser()
-    obj = ImageClassifier(options.train_conf,
-                          options.model_path,
-                          use_gpu=options.use_gpu,
-                          mean_file=options.mean,
-                          output_layer=options.output_layer,
-                          oversample=options.multi_crop)
+    obj = ImageClassifier(
+        options.train_conf,
+        options.model_path,
+        use_gpu=options.use_gpu,
+        mean_file=options.mean,
+        output_layer=options.output_layer,
+        oversample=options.multi_crop)
     if options.job_type == "predict":
         obj.predict(options.data_file)
 
     elif options.job_type == "extract":
-        obj.extract(options.data_file,
-                    options.output_dir)
+        obj.extract(options.data_file, options.output_dir)
+
 
 if __name__ == '__main__':
     main()
diff --git a/demo/model_zoo/resnet/example/__init__.py b/demo/model_zoo/resnet/example/__init__.py
index 7f9e87eee6037666b86420fba194624859d356b3..c90af2ee000d46a032984ee23559e7e99b49ddad 100644
--- a/demo/model_zoo/resnet/example/__init__.py
+++ b/demo/model_zoo/resnet/example/__init__.py
@@ -11,4 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
diff --git a/demo/model_zoo/resnet/example/image_list_provider.py b/demo/model_zoo/resnet/example/image_list_provider.py
index ee457e1fffc7ed8629dc6bde63a8047818c0ff9d..9e415f76a53326c5809b7a8c508701c519ab443b 100644
--- a/demo/model_zoo/resnet/example/image_list_provider.py
+++ b/demo/model_zoo/resnet/example/image_list_provider.py
@@ -16,8 +16,7 @@ from paddle.utils.image_util import *
 from paddle.trainer.PyDataProvider2 import *
 
 
-def hook(settings, image_size, crop_size, color, file_list,
-         is_train, **kwargs):
+def hook(settings, image_size, crop_size, color, file_list, is_train, **kwargs):
     """
     Description: Init with a list of data file
     file_list is the name list of input files.
@@ -58,7 +57,7 @@ def hook(settings, image_size, crop_size, color, file_list,
         sz = settings.crop_size * settings.crop_size
         settings.img_mean = np.zeros(sz * 3, dtype=np.single)
         for idx, value in enumerate(settings.mean_value):
-            settings.img_mean[idx * sz: (idx + 1) * sz] = value
+            settings.img_mean[idx * sz:(idx + 1) * sz] = value
         settings.img_mean = settings.img_mean.reshape(3, settings.crop_size,
                                                       settings.crop_size)
 
@@ -69,7 +68,8 @@ def hook(settings, image_size, crop_size, color, file_list,
 
     settings.input_types = [
         dense_vector(settings.img_input_size),  # image feature
-        integer_value(1)]  # labels
+        integer_value(1)
+    ]  # labels
 
     settings.logger.info('Image short side: %s', settings.img_size)
     settings.logger.info('Crop size: %s', settings.crop_size)
@@ -97,9 +97,6 @@ def processData(settings, file_list):
     # swap channel
     if settings.is_swap_channel:
         img = img[settings.swap_channel, :, :]
-    img_feat = preprocess_img(img,
-                              settings.img_mean,
-                              settings.crop_size,
-                              settings.is_train,
-                              settings.color)
+    img_feat = preprocess_img(img, settings.img_mean, settings.crop_size,
+                              settings.is_train, settings.color)
     yield img_feat.tolist(), int(lab.strip())
diff --git a/demo/model_zoo/resnet/load_feature.py b/demo/model_zoo/resnet/load_feature.py
index ee4930b7a17f7f21ceeba8db253eed64416ebf10..b0948b75fd0ac9a3fa89070aed04d523ce286f4e 100644
--- a/demo/model_zoo/resnet/load_feature.py
+++ b/demo/model_zoo/resnet/load_feature.py
@@ -17,9 +17,11 @@ import sys
 import cPickle
 import logging
 
-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
 logging.getLogger().setLevel(logging.INFO)
 
+
 def load_feature_c(file):
     """
     Load feature extracted by C++ interface.
@@ -30,14 +32,15 @@ def load_feature_c(file):
     f = open(file, 'r')
     for line in f:
         sample = []
-        for slot in line.strip().split(";"): 
-            fea = [float(val) for val in slot.strip().split()] 
+        for slot in line.strip().split(";"):
+            fea = [float(val) for val in slot.strip().split()]
             if fea:
                 sample.append(fea)
         features.append(sample)
     f.close()
     return features
 
+
 def load_feature_py(feature_dir):
     """
     Load feature extracted by python interface.
@@ -54,6 +57,7 @@ def load_feature_py(feature_dir):
             logging.info('Load feature file %s', file_name)
     return features
 
+
 if __name__ == '__main__':
-    print load_feature_py(sys.argv[1]) 
+    print load_feature_py(sys.argv[1])
     #print load_feature_c(sys.argv[1]) 
diff --git a/demo/model_zoo/resnet/resnet.py b/demo/model_zoo/resnet/resnet.py
index 483e308ac804e13ca249ef4e47e9e9b00770ce1b..015b74cd484596039b9fcf010576ca340d044db7 100644
--- a/demo/model_zoo/resnet/resnet.py
+++ b/demo/model_zoo/resnet/resnet.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 from paddle.trainer_config_helpers import *
-
 """
 paper: https://arxiv.org/abs/1512.03385
 """
@@ -28,15 +27,19 @@ if not is_predict and data_provider:
     # mean.meta size : 3 x 224 x 224.
     # If you use three mean value, set like:
     # "mean_value:103.939,116.779,123.68;"
-    args={
+    args = {
         'mean_meta': "model/mean_meta_224/mean.meta",
-        'image_size': 224, 'crop_size': 224,
-        'color': True,'swap_channel:': [2, 1, 0]}
-    define_py_data_sources2(train_list,
-                           'example/test.list',
-                           module="example.image_list_provider",
-                           obj="processData",
-                           args=args)
+        'image_size': 224,
+        'crop_size': 224,
+        'color': True,
+        'swap_channel:': [2, 1, 0]
+    }
+    define_py_data_sources2(
+        train_list,
+        'example/test.list',
+        module="example.image_list_provider",
+        obj="processData",
+        args=args)
 
 batch_size = 1
 learning_rate = 0.1 / batch_size
@@ -54,12 +57,16 @@ Settings(
     learning_method='momentum',
     learning_rate_decay_a=0.5,
     learning_rate_decay_b=1200000 * 10,
-    learning_rate_schedule="discexp",
-)
+    learning_rate_schedule="discexp", )
 
 
-def conv_bn_layer(name, input, filter_size, num_filters,
-                  stride, padding, channels=None,
+def conv_bn_layer(name,
+                  input,
+                  filter_size,
+                  num_filters,
+                  stride,
+                  padding,
+                  channels=None,
                   active_type=ReluActivation()):
     """
     A wrapper for conv layer with batch normalization layers.
@@ -67,19 +74,18 @@ def conv_bn_layer(name, input, filter_size, num_filters,
     conv layer has no activation.
     """
 
-    tmp = img_conv_layer(name=name + "_conv",
-                         input=input,
-                         filter_size=filter_size,
-                         num_channels=channels,
-                         num_filters=num_filters,
-                         stride=stride,
-                         padding=padding,
-                         act=LinearActivation(),
-                         bias_attr=False)
-    return batch_norm_layer(name=name + "_bn",
-                            input=tmp,
-                            act=active_type,
-                            use_global_stats=is_test)
+    tmp = img_conv_layer(
+        name=name + "_conv",
+        input=input,
+        filter_size=filter_size,
+        num_channels=channels,
+        num_filters=num_filters,
+        stride=stride,
+        padding=padding,
+        act=LinearActivation(),
+        bias_attr=False)
+    return batch_norm_layer(
+        name=name + "_bn", input=tmp, act=active_type, use_global_stats=is_test)
 
 
 def bottleneck_block(name, input, num_filters1, num_filters2):
@@ -88,29 +94,31 @@ def bottleneck_block(name, input, num_filters1, num_filters2):
     Last conv_bn_layer has no activation.
     Addto layer has activation of relu.
     """
-    last_name = conv_bn_layer(name=name + '_branch2a',
-                              input=input,
-                              filter_size=1,
-                              num_filters=num_filters1,
-                              stride=1,
-                              padding=0)
-    last_name = conv_bn_layer(name=name + '_branch2b',
-                              input=last_name,
-                              filter_size=3,
-                              num_filters=num_filters1,
-                              stride=1,
-                              padding=1)
-    last_name = conv_bn_layer(name=name + '_branch2c',
-                              input=last_name,
-                              filter_size=1,
-                              num_filters=num_filters2,
-                              stride=1,
-                              padding=0,
-                              active_type=LinearActivation())
-
-    return addto_layer(name=name + "_addto",
-                       input=[input, last_name],
-                       act=ReluActivation())
+    last_name = conv_bn_layer(
+        name=name + '_branch2a',
+        input=input,
+        filter_size=1,
+        num_filters=num_filters1,
+        stride=1,
+        padding=0)
+    last_name = conv_bn_layer(
+        name=name + '_branch2b',
+        input=last_name,
+        filter_size=3,
+        num_filters=num_filters1,
+        stride=1,
+        padding=1)
+    last_name = conv_bn_layer(
+        name=name + '_branch2c',
+        input=last_name,
+        filter_size=1,
+        num_filters=num_filters2,
+        stride=1,
+        padding=0,
+        active_type=LinearActivation())
+
+    return addto_layer(
+        name=name + "_addto", input=[input, last_name], act=ReluActivation())
 
 
 def mid_projection(name, input, num_filters1, num_filters2, stride=2):
@@ -123,38 +131,41 @@ def mid_projection(name, input, num_filters1, num_filters2, stride=2):
     branch2x: bottleneck building block, shortcuts are identity.
     """
     # stride = 2
-    branch1 = conv_bn_layer(name=name + '_branch1',
-                            input=input,
-                            filter_size=1,
-                            num_filters=num_filters2,
-                            stride=stride,
-                            padding=0,
-                            active_type=LinearActivation())
-
-    last_name = conv_bn_layer(name=name + '_branch2a',
-                              input=input,
-                              filter_size=1,
-                              num_filters=num_filters1,
-                              stride=stride,
-                              padding=0)
-    last_name = conv_bn_layer(name=name + '_branch2b',
-                              input=last_name,
-                              filter_size=3,
-                              num_filters=num_filters1,
-                              stride=1,
-                              padding=1)
-
-    last_name = conv_bn_layer(name=name + '_branch2c',
-                              input=last_name,
-                              filter_size=1,
-                              num_filters=num_filters2,
-                              stride=1,
-                              padding=0,
-                              active_type=LinearActivation())
-
-    return addto_layer(name=name + "_addto",
-                       input=[branch1, last_name],
-                       act=ReluActivation())
+    branch1 = conv_bn_layer(
+        name=name + '_branch1',
+        input=input,
+        filter_size=1,
+        num_filters=num_filters2,
+        stride=stride,
+        padding=0,
+        active_type=LinearActivation())
+
+    last_name = conv_bn_layer(
+        name=name + '_branch2a',
+        input=input,
+        filter_size=1,
+        num_filters=num_filters1,
+        stride=stride,
+        padding=0)
+    last_name = conv_bn_layer(
+        name=name + '_branch2b',
+        input=last_name,
+        filter_size=3,
+        num_filters=num_filters1,
+        stride=1,
+        padding=1)
+
+    last_name = conv_bn_layer(
+        name=name + '_branch2c',
+        input=last_name,
+        filter_size=1,
+        num_filters=num_filters2,
+        stride=1,
+        padding=0,
+        active_type=LinearActivation())
+
+    return addto_layer(
+        name=name + "_addto", input=[branch1, last_name], act=ReluActivation())
 
 
 def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
@@ -168,67 +179,67 @@ def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
     # For ImageNet
     # conv1: 112x112
     img = data_layer(name='input', size=224 * 224 * 3)
-    tmp = conv_bn_layer("conv1", img,
-                        filter_size=7,
-                        channels=3,
-                        num_filters=64,
-                        stride=2,
-                        padding=3)
+    tmp = conv_bn_layer(
+        "conv1",
+        img,
+        filter_size=7,
+        channels=3,
+        num_filters=64,
+        stride=2,
+        padding=3)
     tmp = img_pool_layer(name="pool1", input=tmp, pool_size=3, stride=2)
 
     # conv2_x: 56x56
-    tmp = mid_projection(name="res2_1",
-                         input=tmp,
-                         num_filters1=64,
-                         num_filters2=256,
-                         stride=1)
+    tmp = mid_projection(
+        name="res2_1", input=tmp, num_filters1=64, num_filters2=256, stride=1)
     for i in xrange(2, res2_num + 1, 1):
-        tmp = bottleneck_block(name="res2_" + str(i),
-                               input=tmp,
-                               num_filters1=64,
-                               num_filters2=256)
+        tmp = bottleneck_block(
+            name="res2_" + str(i), input=tmp, num_filters1=64, num_filters2=256)
 
     # conv3_x: 28x28
-    tmp = mid_projection(name="res3_1",
-                         input=tmp,
-                         num_filters1=128,
-                         num_filters2=512)
+    tmp = mid_projection(
+        name="res3_1", input=tmp, num_filters1=128, num_filters2=512)
     for i in xrange(2, res3_num + 1, 1):
-        tmp = bottleneck_block(name="res3_" + str(i),
-                               input=tmp, num_filters1=128,
-                               num_filters2=512)
+        tmp = bottleneck_block(
+            name="res3_" + str(i),
+            input=tmp,
+            num_filters1=128,
+            num_filters2=512)
 
     # conv4_x: 14x14
-    tmp = mid_projection(name="res4_1", input=tmp,
-                         num_filters1=256, num_filters2=1024)
+    tmp = mid_projection(
+        name="res4_1", input=tmp, num_filters1=256, num_filters2=1024)
     for i in xrange(2, res4_num + 1, 1):
-        tmp = bottleneck_block(name="res4_" + str(i),
-                               input=tmp,
-                               num_filters1=256,
-                               num_filters2=1024)
+        tmp = bottleneck_block(
+            name="res4_" + str(i),
+            input=tmp,
+            num_filters1=256,
+            num_filters2=1024)
 
     # conv5_x: 7x7
-    tmp = mid_projection(name="res5_1", input=tmp,
-                         num_filters1=512, num_filters2=2048)
+    tmp = mid_projection(
+        name="res5_1", input=tmp, num_filters1=512, num_filters2=2048)
     for i in xrange(2, res5_num + 1, 1):
-        tmp = bottleneck_block(name="res5_" + str(i),
-                               input=tmp, num_filters1=512,
-                               num_filters2=2048)
-
-    tmp = img_pool_layer(name='avgpool',
-                         input=tmp,
-                         pool_size=7,
-                         stride=1,
-                         pool_type=AvgPooling())
-
-    output = fc_layer(name='output',
-                      input=tmp,
-                      size=1000,
-                      act=SoftmaxActivation())
+        tmp = bottleneck_block(
+            name="res5_" + str(i),
+            input=tmp,
+            num_filters1=512,
+            num_filters2=2048)
+
+    tmp = img_pool_layer(
+        name='avgpool',
+        input=tmp,
+        pool_size=7,
+        stride=1,
+        pool_type=AvgPooling())
+
+    output = fc_layer(
+        name='output', input=tmp, size=1000, act=SoftmaxActivation())
 
     if not is_predict:
-        classification_cost(input=output, label=data_layer(name='label',
-                                                           size=1))
+        classification_cost(
+            input=output, label=data_layer(
+                name='label', size=1))
 
 
 def res_net_50():
diff --git a/demo/quick_start/api_train.py b/demo/quick_start/api_train.py
index 5ae19b8d26534a9521a6da7630796edce36780e7..66cbb856484d231613a0026be129a7bc3a7cfdf5 100644
--- a/demo/quick_start/api_train.py
+++ b/demo/quick_start/api_train.py
@@ -22,27 +22,32 @@ from py_paddle import DataProviderConverter
 from paddle.trainer.PyDataProvider2 \
     import integer_value, integer_value_sequence, sparse_binary_vector
 
+
 def parse_arguments():
     parser = argparse.ArgumentParser()
-    parser.add_argument("--train_data",
-                        type=str, required=False, help="train data file")
+    parser.add_argument(
+        "--train_data", type=str, required=False, help="train data file")
     parser.add_argument("--test_data", type=str, help="test data file")
-    parser.add_argument("--config",
-                        type=str, required=True, help="config file name")
+    parser.add_argument(
+        "--config", type=str, required=True, help="config file name")
     parser.add_argument("--dict_file", required=True, help="dictionary file")
-    parser.add_argument("--seq",
-                        default=1, type=int,
-                        help="whether use sequence training")
-    parser.add_argument("--use_gpu", default=0, type=int,
-                        help="whether use GPU for training")
-    parser.add_argument("--trainer_count", default=1, type=int,
-                        help="Number of threads for training")
-    parser.add_argument("--num_passes", default=5, type=int,
-                        help="Number of training passes")
+    parser.add_argument(
+        "--seq", default=1, type=int, help="whether use sequence training")
+    parser.add_argument(
+        "--use_gpu", default=0, type=int, help="whether use GPU for training")
+    parser.add_argument(
+        "--trainer_count",
+        default=1,
+        type=int,
+        help="Number of threads for training")
+    parser.add_argument(
+        "--num_passes", default=5, type=int, help="Number of training passes")
     return parser.parse_args()
 
+
 UNK_IDX = 0
 
+
 def load_data(file_name, word_dict):
     with open(file_name, 'r') as f:
         for line in f:
@@ -51,6 +56,7 @@ def load_data(file_name, word_dict):
             word_slot = [word_dict.get(w, UNK_IDX) for w in words]
             yield word_slot, int(label)
 
+
 def load_dict(dict_file):
     word_dict = dict()
     with open(dict_file, 'r') as f:
@@ -59,6 +65,7 @@ def load_dict(dict_file):
             word_dict[w] = i
     return word_dict
 
+
 def main():
     options = parse_arguments()
     api.initPaddle("--use_gpu=%s" % options.use_gpu,
@@ -86,9 +93,9 @@ def main():
     # create a data converter which converts data to PaddlePaddle
     # internal format
     input_types = [
-        integer_value_sequence(len(word_dict)) if options.seq
-            else sparse_binary_vector(len(word_dict)),
-        integer_value(2)]
+        integer_value_sequence(len(word_dict)) if options.seq else
+        sparse_binary_vector(len(word_dict)), integer_value(2)
+    ]
     converter = DataProviderConverter(input_types)
 
     batch_size = trainer_config.opt_config.batch_size
@@ -102,7 +109,7 @@ def main():
             trainer.trainOneDataBatch(size, converter(batch))
         trainer.finishTrainPass()
         if test_dataset:
-            trainer.startTestPeriod();
+            trainer.startTestPeriod()
             for pos in xrange(0, len(test_dataset), batch_size):
                 batch = itertools.islice(test_dataset, pos, pos + batch_size)
                 size = min(batch_size, len(test_dataset) - pos)
@@ -110,5 +117,6 @@ def main():
             trainer.finishTestPeriod()
     trainer.finishTrain()
 
+
 if __name__ == '__main__':
     main()
diff --git a/demo/quick_start/dataprovider_bow.py b/demo/quick_start/dataprovider_bow.py
index f8cde189cf87d73aec05da4b34e064cddecff56b..a5156a2d40cc04c02e50d676045ae6da8937ba01 100644
--- a/demo/quick_start/dataprovider_bow.py
+++ b/demo/quick_start/dataprovider_bow.py
@@ -17,6 +17,7 @@ from paddle.trainer.PyDataProvider2 import *
 # id of the word not in dictionary
 UNK_IDX = 0
 
+
 # initializer is called by the framework during initialization.
 # It allows the user to describe the data types and setup the
 # necessary data structure for later use.
@@ -38,7 +39,9 @@ def initializer(settings, dictionary, **kwargs):
         # The second input is an integer. It represents the category id of the
         # sample. 2 means there are two labels in the dataset.
         # (1 for positive and 0 for negative)
-        integer_value(2)]
+        integer_value(2)
+    ]
+
 
 # Delaring a data provider. It has an initializer 'data_initialzer'.
 # It will cache the generated data of the first pass in memory, so that
@@ -69,9 +72,8 @@ def process(settings, file_name):
 
 def predict_initializer(settings, dictionary, **kwargs):
     settings.word_dict = dictionary
-    settings.input_types = [
-        sparse_binary_vector(len(dictionary))
-    ]
+    settings.input_types = [sparse_binary_vector(len(dictionary))]
+
 
 # Declaring a data provider for prediction. The difference with process
 # is that label is not generated.
diff --git a/demo/quick_start/dataprovider_emb.py b/demo/quick_start/dataprovider_emb.py
index f5632d5f3f8bd8bb83b12198e7450b239eb1f7f6..286f3f5c82081f1a6e02a26023969790792a78a3 100755
--- a/demo/quick_start/dataprovider_emb.py
+++ b/demo/quick_start/dataprovider_emb.py
@@ -24,7 +24,8 @@ def initializer(settings, dictionary, **kwargs):
         # The value of the integers range from 0 to len(dictrionary)-1
         integer_value_sequence(len(dictionary)),
         # Define the second input for label id
-        integer_value(2)]
+        integer_value(2)
+    ]
 
 
 @provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
@@ -40,7 +41,8 @@ def process(settings, file_name):
 def predict_initializer(settings, dictionary, **kwargs):
     settings.word_dict = dictionary
     settings.input_types = [
-        integer_value(len(dictionary), seq_type=SequenceType.SEQUENCE)
+        integer_value(
+            len(dictionary), seq_type=SequenceType.SEQUENCE)
     ]
 
 
diff --git a/demo/quick_start/preprocess.py b/demo/quick_start/preprocess.py
index 69fdbe44b5245bc2855847a1507e6eaed517eb96..d87fad632a7429f7d9682badabe4c72ca127354f 100755
--- a/demo/quick_start/preprocess.py
+++ b/demo/quick_start/preprocess.py
@@ -13,7 +13,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 1. (remove HTML before or not)tokensizing
 2. pos sample : rating score 5; neg sample: rating score 1-2.
@@ -35,7 +34,8 @@ import multiprocessing
 
 batch_size = 5000
 word_count = {}
-num_tokenize = max(1, multiprocessing.cpu_count() - 2)  # parse + tokenize + save
+num_tokenize = max(1,
+                   multiprocessing.cpu_count() - 2)  # parse + tokenize + save
 max_queue_size = 8
 parse_queue = Queue(maxsize=max_queue_size + num_tokenize)
 tokenize_queue = Queue(maxsize=max_queue_size + num_tokenize)
diff --git a/demo/quick_start/trainer_config.bidi-lstm.py b/demo/quick_start/trainer_config.bidi-lstm.py
index 3be3d373422714c6b40e530cf112f9106b85d20b..51deaf31f94681b6b61f98f798cef14a65ec92cb 100644
--- a/demo/quick_start/trainer_config.bidi-lstm.py
+++ b/demo/quick_start/trainer_config.bidi-lstm.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
 trn = 'data/train.list' if not is_predict else None
 tst = 'data/test.list' if not is_predict else 'data/pred.list'
 process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
-                        test_list=tst,
-                        module="dataprovider_emb",
-                        obj=process,
-                        args={"dictionary": word_dict})
+define_py_data_sources2(
+    train_list=trn,
+    test_list=tst,
+    module="dataprovider_emb",
+    obj=process,
+    args={"dictionary": word_dict})
 
 batch_size = 128 if not is_predict else 1
 settings(
@@ -39,19 +40,17 @@ settings(
     learning_rate=2e-3,
     learning_method=AdamOptimizer(),
     regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25
-)
+    gradient_clipping_threshold=25)
 
-bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
+bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
 data = data_layer(name="word", size=len(word_dict))
 emb = embedding_layer(input=data, size=128)
 
 bi_lstm = bidirectional_lstm(input=emb, size=128)
 dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
 
-output = fc_layer(input=dropout, size=2,
-                  bias_attr=bias_attr,
-                  act=SoftmaxActivation())
+output = fc_layer(
+    input=dropout, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
 
 if is_predict:
     maxid = maxid_layer(output)
diff --git a/demo/quick_start/trainer_config.cnn.py b/demo/quick_start/trainer_config.cnn.py
index 253ec0aee26cf42226d79726a75aad6c61c77565..388efa75f903e0c7c803c99cd50d73a004133a67 100644
--- a/demo/quick_start/trainer_config.cnn.py
+++ b/demo/quick_start/trainer_config.cnn.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
 trn = 'data/train.list' if not is_predict else None
 tst = 'data/test.list' if not is_predict else 'data/pred.list'
 process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
-                        test_list=tst,
-                        module="dataprovider_emb",
-                        obj=process,
-                        args={"dictionary": word_dict})
+define_py_data_sources2(
+    train_list=trn,
+    test_list=tst,
+    module="dataprovider_emb",
+    obj=process,
+    args={"dictionary": word_dict})
 
 batch_size = 128 if not is_predict else 1
 settings(
@@ -39,8 +40,7 @@ settings(
     learning_rate=2e-3,
     learning_method=AdamOptimizer(),
     regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25
-)
+    gradient_clipping_threshold=25)
 
 data = data_layer(name="word", size=len(word_dict))
 embedding = embedding_layer(input=data, size=128)
diff --git a/demo/quick_start/trainer_config.db-lstm.py b/demo/quick_start/trainer_config.db-lstm.py
index b35bdf5a61b4731cadb5eb992796c5e885efbd7e..02bc898d881efbd3bfaed95d45cd9e70ed046746 100644
--- a/demo/quick_start/trainer_config.db-lstm.py
+++ b/demo/quick_start/trainer_config.db-lstm.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
 trn = 'data/train.list' if not is_predict else None
 tst = 'data/test.list' if not is_predict else 'data/pred.list'
 process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
-                        test_list=tst,
-                        module="dataprovider_emb",
-                        obj=process,
-                        args={"dictionary": word_dict})
+define_py_data_sources2(
+    train_list=trn,
+    test_list=tst,
+    module="dataprovider_emb",
+    obj=process,
+    args={"dictionary": word_dict})
 
 batch_size = 128 if not is_predict else 1
 settings(
@@ -39,10 +40,9 @@ settings(
     learning_rate=2e-3,
     learning_method=AdamOptimizer(),
     regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25
-)
+    gradient_clipping_threshold=25)
 
-bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
+bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
 
 data = data_layer(name="word", size=len(word_dict))
 emb = embedding_layer(input=data, size=128)
@@ -52,17 +52,18 @@ lstm_0 = lstmemory(input=hidden_0, layer_attr=ExtraAttr(drop_rate=0.1))
 
 input_layers = [hidden_0, lstm_0]
 
-for i in range(1,8):
+for i in range(1, 8):
     fc = fc_layer(input=input_layers, size=128)
-    lstm = lstmemory(input=fc, layer_attr=ExtraAttr(drop_rate=0.1),
-                    reverse=(i % 2) == 1,)
+    lstm = lstmemory(
+        input=fc,
+        layer_attr=ExtraAttr(drop_rate=0.1),
+        reverse=(i % 2) == 1, )
     input_layers = [fc, lstm]
 
 lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
 
-output = fc_layer(input=lstm_last, size=2,
-                  bias_attr=bias_attr,
-                  act=SoftmaxActivation())
+output = fc_layer(
+    input=lstm_last, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
 
 if is_predict:
     maxid = maxid_layer(output)
diff --git a/demo/quick_start/trainer_config.emb.py b/demo/quick_start/trainer_config.emb.py
index 34dd7b96f2f142159472b98a09fd0092fac15e43..8fd18a7aac704e62b137845edb46cce5bc373285 100644
--- a/demo/quick_start/trainer_config.emb.py
+++ b/demo/quick_start/trainer_config.emb.py
@@ -27,18 +27,16 @@ is_predict = get_config_arg('is_predict', bool, False)
 trn = 'data/train.list' if not is_predict else None
 tst = 'data/test.list' if not is_predict else 'data/pred.list'
 process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
-                        test_list=tst,
-                        module="dataprovider_emb",
-                        obj=process,
-                        args={"dictionary": word_dict})
+define_py_data_sources2(
+    train_list=trn,
+    test_list=tst,
+    module="dataprovider_emb",
+    obj=process,
+    args={"dictionary": word_dict})
 
 batch_size = 128 if not is_predict else 1
 settings(
-    batch_size=batch_size,
-    learning_rate=2e-3,
-    learning_method=AdamOptimizer()
-)
+    batch_size=batch_size, learning_rate=2e-3, learning_method=AdamOptimizer())
 
 data = data_layer(name="word", size=len(word_dict))
 embedding = embedding_layer(input=data, size=128)
diff --git a/demo/quick_start/trainer_config.lr.py b/demo/quick_start/trainer_config.lr.py
index c6059947f30b32975d72155150de095ade01aa9d..b9c9441baac28a8a8f6078065b75664819d6cd04 100644
--- a/demo/quick_start/trainer_config.lr.py
+++ b/demo/quick_start/trainer_config.lr.py
@@ -32,11 +32,12 @@ process = 'process' if not is_predict else 'process_predict'
 # We need to use different process for training and prediction.
 # For training, the input data includes both word IDs and labels.
 # For prediction, the input data only includs word Ids.
-define_py_data_sources2(train_list=trn,
-                        test_list=tst,
-                        module="dataprovider_bow",
-                        obj=process,
-                        args={"dictionary": word_dict})
+define_py_data_sources2(
+    train_list=trn,
+    test_list=tst,
+    module="dataprovider_bow",
+    obj=process,
+    args={"dictionary": word_dict})
 
 batch_size = 128 if not is_predict else 1
 settings(
@@ -44,8 +45,7 @@ settings(
     learning_rate=2e-3,
     learning_method=AdamOptimizer(),
     regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25
-)
+    gradient_clipping_threshold=25)
 
 # Define the data for text features. The size of the data layer is the number
 # of words in the dictionary.
diff --git a/demo/quick_start/trainer_config.lstm.py b/demo/quick_start/trainer_config.lstm.py
index b412a9cbd914dc7abd70b93bbe250759552ee071..8821e02d9bd4a0d06b8afa99df8e0fac3e2fcefe 100644
--- a/demo/quick_start/trainer_config.lstm.py
+++ b/demo/quick_start/trainer_config.lstm.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
 trn = 'data/train.list' if not is_predict else None
 tst = 'data/test.list' if not is_predict else 'data/pred.list'
 process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
-                        test_list=tst,
-                        module="dataprovider_emb",
-                        obj=process,
-                        args={"dictionary": word_dict})
+define_py_data_sources2(
+    train_list=trn,
+    test_list=tst,
+    module="dataprovider_emb",
+    obj=process,
+    args={"dictionary": word_dict})
 
 batch_size = 128 if not is_predict else 1
 settings(
@@ -39,17 +40,14 @@ settings(
     learning_rate=2e-3,
     learning_method=AdamOptimizer(),
     regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25
-)
-
+    gradient_clipping_threshold=25)
 
 data = data_layer(name="word", size=len(word_dict))
 emb = embedding_layer(input=data, size=128)
-lstm = simple_lstm(input=emb, size=128,
-                   lstm_cell_attr=ExtraAttr(drop_rate=0.25))
+lstm = simple_lstm(
+    input=emb, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.25))
 lstm_max = pooling_layer(input=lstm, pooling_type=MaxPooling())
-output = fc_layer(input=lstm_max, size=2,
-                  act=SoftmaxActivation())
+output = fc_layer(input=lstm_max, size=2, act=SoftmaxActivation())
 if is_predict:
     maxid = maxid_layer(output)
     outputs([maxid, output])
diff --git a/demo/recommendation/common_utils.py b/demo/recommendation/common_utils.py
index a5f00b3ef9ca00b42b8e31ddd6cfeca3580152b0..613e36b496e47edbc0eabd8f15a0abdcb50f6424 100755
--- a/demo/recommendation/common_utils.py
+++ b/demo/recommendation/common_utils.py
@@ -21,8 +21,9 @@ def meta_to_header(meta, name):
             yield integer_value(each_meta['max'])
         elif each_meta['type'] == 'embedding':
             is_seq = each_meta['seq'] == 'sequence'
-            yield integer_value(len(each_meta['dict']),
-                                seq_type=SequenceType.SEQUENCE if is_seq
-                                else SequenceType.NO_SEQUENCE)
+            yield integer_value(
+                len(each_meta['dict']),
+                seq_type=SequenceType.SEQUENCE
+                if is_seq else SequenceType.NO_SEQUENCE)
         elif each_meta['type'] == 'one_hot_dense':
             yield dense_vector(len(each_meta['dict']))
diff --git a/demo/recommendation/data/config_generator.py b/demo/recommendation/data/config_generator.py
index 29f38082693ad890ac4dfa302399663af8dbd27b..fa605458300f81da6772d88cfbad413e4dcf97fe 100644
--- a/demo/recommendation/data/config_generator.py
+++ b/demo/recommendation/data/config_generator.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 config_generator.py
 
@@ -29,10 +28,7 @@ import json
 import docopt
 import copy
 
-DEFAULT_FILE = {
-    "type": "split",
-    "delimiter": ","
-}
+DEFAULT_FILE = {"type": "split", "delimiter": ","}
 
 DEFAULT_FIELD = {
     "id": {
@@ -107,19 +103,16 @@ def main(filename, fmt):
                 field = copy.deepcopy(DEFAULT_FIELD[field_key])
                 field['pos'] = pos
                 fields.append(field)
-            obj[k] = {
-                "file": file_dict,
-                "fields": fields
-            }
-    meta = {
-        "meta": obj
-    }
+            obj[k] = {"file": file_dict, "fields": fields}
+    meta = {"meta": obj}
     # print meta
     if fmt == 'json':
+
         def formatter(x):
             import json
             return json.dumps(x, indent=2)
     elif fmt == 'yaml':
+
         def formatter(x):
             import yaml
             return yaml.safe_dump(x, default_flow_style=False)
diff --git a/demo/recommendation/data/meta_generator.py b/demo/recommendation/data/meta_generator.py
index 8d1a33d02aea112e51f1d43bedc06fdcee1186f5..593c863670d5eb5d684adf643ff745f3914b656b 100644
--- a/demo/recommendation/data/meta_generator.py
+++ b/demo/recommendation/data/meta_generator.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 Preprocess Movielens dataset, to get movie/user object.
 
@@ -66,8 +65,8 @@ class SortedIDGenerator(object):
         self.__key_set__.add(key)
 
     def finish_scan(self, compare=None, key=None, reverse=False):
-        self.__key_set__ = sorted(list(self.__key_set__), cmp=compare,
-                                  key=key, reverse=reverse)
+        self.__key_set__ = sorted(
+            list(self.__key_set__), cmp=compare, key=key, reverse=reverse)
         self.dict = dict()
         for idx, each_key in enumerate(self.__key_set__):
             self.dict[each_key] = idx
@@ -207,11 +206,10 @@ class EmbeddingFieldParser(object):
             self.dict = EmbeddingFieldParser.CharBasedEmbeddingDict(
                 self.seq_type == EmbeddingFieldParser.SEQUENCE)
         elif config['dict']['type'] == 'split':
-            self.dict = SplitEmbeddingDict(
-                config['dict'].get('delimiter', ','))
+            self.dict = SplitEmbeddingDict(config['dict'].get('delimiter', ','))
         elif config['dict']['type'] == 'whole_content':
-            self.dict = EmbeddingFieldParser.WholeContentDict(
-                config['dict']['sort'])
+            self.dict = EmbeddingFieldParser.WholeContentDict(config['dict'][
+                'sort'])
         else:
             print config
             assert False
@@ -333,8 +331,8 @@ class ContentExtractorFactory(object):
                 return PositionContentExtractor(config['pos'])
             else:
                 extra_args = config['regex']
-                return RegexPositionContentExtractor(pos=config['pos'],
-                                                     **extra_args)
+                return RegexPositionContentExtractor(
+                    pos=config['pos'], **extra_args)
 
 
 class MetaFile(object):
@@ -364,9 +362,10 @@ class MetaFile(object):
 
             metas = map(lambda x: x.meta_field(), field_parsers)
             # print metas
-            key_index = filter(lambda x: x is not None, map(
-                lambda (idx, meta): idx if 'is_key' in meta and meta['is_key']
-                else None, enumerate(metas)))[0]
+            key_index = filter(
+                lambda x: x is not None,
+                map(lambda (idx, meta): idx if 'is_key' in meta and meta['is_key'] else None,
+                    enumerate(metas)))[0]
 
             key_map = []
             for i in range(min(key_index, len(metas))):
@@ -374,12 +373,7 @@ class MetaFile(object):
             for i in range(key_index + 1, len(metas)):
                 key_map.append(i)
 
-            obj = {
-                '__meta__': {
-                    'raw_meta': metas,
-                    'feature_map': key_map
-                }
-            }
+            obj = {'__meta__': {'raw_meta': metas, 'feature_map': key_map}}
 
             for each_block in reader.read():
                 idx = field_parsers[key_index].parse(each_block)
diff --git a/demo/recommendation/data/split.py b/demo/recommendation/data/split.py
index ff1f7fab7befdb5bdfa39fd0f1753e6804e82d8f..8dd0cbd32af6074439e98dac024c5fed76cd52b2 100644
--- a/demo/recommendation/data/split.py
+++ b/demo/recommendation/data/split.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 Separate movielens 1m dataset to train/test file.
 
diff --git a/demo/recommendation/dataprovider.py b/demo/recommendation/dataprovider.py
index 454467f40b44bb526d143934c4a7350d41e54c0e..ff3932be03f1e4a1fc1d0bdb189ab7fe1fbbeca0 100755
--- a/demo/recommendation/dataprovider.py
+++ b/demo/recommendation/dataprovider.py
@@ -15,6 +15,7 @@
 from paddle.trainer.PyDataProvider2 import *
 import common_utils  # parse
 
+
 def hook(settings, meta, **kwargs):
     """
     Init hook is invoked before process data. It will set obj.slots and store
@@ -41,6 +42,7 @@ def hook(settings, meta, **kwargs):
     settings.input_types = headers
     settings.meta = meta
 
+
 @provider(init_hook=hook, cache=CacheType.CACHE_PASS_IN_MEM)
 def process(settings, filename):
     with open(filename, 'r') as f:
diff --git a/demo/recommendation/prediction.py b/demo/recommendation/prediction.py
index f8044a3195ec25bc2fa7c9079e4977f971059352..e2a202cfd1a476046d7e1d1896b87d72c4906ff2 100755
--- a/demo/recommendation/prediction.py
+++ b/demo/recommendation/prediction.py
@@ -28,7 +28,8 @@ if __name__ == '__main__':
     model_path = sys.argv[1]
     swig_paddle.initPaddle('--use_gpu=0')
     conf = parse_config("trainer_config.py", "is_predict=1")
-    network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+    network = swig_paddle.GradientMachine.createFromConfigProto(
+        conf.model_config)
     assert isinstance(network, swig_paddle.GradientMachine)
     network.loadParameters(model_path)
     with open('./data/meta.bin', 'rb') as f:
@@ -39,11 +40,12 @@ if __name__ == '__main__':
         while True:
             movie_id = int(raw_input("Input movie_id: "))
             user_id = int(raw_input("Input user_id: "))
-            movie_meta = meta['movie'][movie_id]    # Query Data From Meta.
+            movie_meta = meta['movie'][movie_id]  # Query Data From Meta.
             user_meta = meta['user'][user_id]
             data = [movie_id - 1]
             data.extend(movie_meta)
             data.append(user_id - 1)
             data.extend(user_meta)
-            print "Prediction Score is %.2f" % ((network.forwardTest(
-                cvt.convert([data]))[0]['value'][0][0] + 5) / 2)
+            print "Prediction Score is %.2f" % (
+                (network.forwardTest(cvt.convert([data]))[0]['value'][0][0] + 5)
+                / 2)
diff --git a/demo/recommendation/trainer_config.py b/demo/recommendation/trainer_config.py
index 624c22ec969dc98808863ad53573b9633f1791ac..cec340b0b65a841029a1c0538d9881bb38f026ff 100755
--- a/demo/recommendation/trainer_config.py
+++ b/demo/recommendation/trainer_config.py
@@ -27,8 +27,8 @@ with open(META_FILE, 'rb') as f:
     # load meta file
     meta = pickle.load(f)
 
-settings(batch_size=1600, learning_rate=1e-3,
-         learning_method=RMSPropOptimizer())
+settings(
+    batch_size=1600, learning_rate=1e-3, learning_method=RMSPropOptimizer())
 
 
 def construct_feature(name):
@@ -59,11 +59,10 @@ def construct_feature(name):
         slot_name = each_meta.get('name', '%s_id' % name)
         if type_name == 'id':
             slot_dim = each_meta['max']
-            embedding = embedding_layer(input=data_layer(slot_name,
-                                                          size=slot_dim),
-                                        size=256)
-            fusion.append(fc_layer(input=embedding,
-                                   size=256))
+            embedding = embedding_layer(
+                input=data_layer(
+                    slot_name, size=slot_dim), size=256)
+            fusion.append(fc_layer(input=embedding, size=256))
         elif type_name == 'embedding':
             is_seq = each_meta['seq'] == 'sequence'
             slot_dim = len(each_meta['dict'])
@@ -71,17 +70,14 @@ def construct_feature(name):
             embedding = embedding_layer(input=din, size=256)
             if is_seq:
                 fusion.append(
-                    text_conv_pool(input=embedding, context_len=5,
-                                   hidden_size=256))
+                    text_conv_pool(
+                        input=embedding, context_len=5, hidden_size=256))
             else:
-                fusion.append(fc_layer(input=embedding,
-                                       size=256))
+                fusion.append(fc_layer(input=embedding, size=256))
         elif type_name == 'one_hot_dense':
             slot_dim = len(each_meta['dict'])
-            hidden = fc_layer(input=data_layer(slot_name, slot_dim),
-                              size=256)
-            fusion.append(fc_layer(input=hidden,
-                                   size=256))
+            hidden = fc_layer(input=data_layer(slot_name, slot_dim), size=256)
+            fusion.append(fc_layer(input=hidden, size=256))
 
     return fc_layer(name="%s_fusion" % name, input=fusion, size=256)
 
@@ -90,10 +86,16 @@ movie_feature = construct_feature("movie")
 user_feature = construct_feature("user")
 similarity = cos_sim(a=movie_feature, b=user_feature)
 if not is_predict:
-    outputs(regression_cost(input=similarity,
-                            label=data_layer('rating', size=1)))
-
-    define_py_data_sources2('data/train.list', 'data/test.list', module='dataprovider',
-                           obj='process', args={'meta': meta})
+    outputs(
+        regression_cost(
+            input=similarity, label=data_layer(
+                'rating', size=1)))
+
+    define_py_data_sources2(
+        'data/train.list',
+        'data/test.list',
+        module='dataprovider',
+        obj='process',
+        args={'meta': meta})
 else:
     outputs(similarity)
diff --git a/demo/semantic_role_labeling/dataprovider.py b/demo/semantic_role_labeling/dataprovider.py
index 2ef25c42c1794c410fe85fd497a6ed9d2295dca9..5c003584a52d459f13b7942ebe3a7147ac58a42f 100644
--- a/demo/semantic_role_labeling/dataprovider.py
+++ b/demo/semantic_role_labeling/dataprovider.py
@@ -26,9 +26,9 @@ def hook(settings, word_dict, label_dict, **kwargs):
         integer_value_sequence(len(word_dict)),
         integer_value_sequence(len(word_dict)),
         integer_value_sequence(len(word_dict)),
-        integer_value_sequence(len(word_dict)),
-        integer_value_sequence(2),
-        integer_value_sequence(len(label_dict))]
+        integer_value_sequence(len(word_dict)), integer_value_sequence(2),
+        integer_value_sequence(len(label_dict))
+    ]
 
 
 @provider(init_hook=hook)
diff --git a/demo/semantic_role_labeling/db_lstm.py b/demo/semantic_role_labeling/db_lstm.py
index 364460afbe31caf42cd4f0836eba75e444b3f5b8..e3f6edad6972112ed04e173a9b714e3fec13d402 100644
--- a/demo/semantic_role_labeling/db_lstm.py
+++ b/demo/semantic_role_labeling/db_lstm.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 import math
 import os
 import sys
@@ -42,7 +41,7 @@ if not is_predict:
             label_dict[w] = i
 
     if is_test:
-        train_list_file = None 
+        train_list_file = None
 
     #define data provider
     define_py_data_sources2(
diff --git a/demo/semantic_role_labeling/predict.py b/demo/semantic_role_labeling/predict.py
index 9a27112828e449174e3da79dc7db9fed20bfed6f..f051d4175cf6fff43bd7f84b457ab9dd12405a15 100644
--- a/demo/semantic_role_labeling/predict.py
+++ b/demo/semantic_role_labeling/predict.py
@@ -41,22 +41,16 @@ class Prediction():
         len_dict = len(self.dict)
         len_label = len(self.labels)
 
-        conf = parse_config(
-            train_conf,
-            'dict_len=' + str(len_dict) +
-            ',label_len=' + str(len_label) +
-            ',is_predict=True')
+        conf = parse_config(train_conf, 'dict_len=' + str(len_dict) +
+                            ',label_len=' + str(len_label) + ',is_predict=True')
         self.network = swig_paddle.GradientMachine.createFromConfigProto(
             conf.model_config)
         self.network.loadParameters(model_dir)
 
         slots = [
-            integer_value_sequence(len_dict),
-            integer_value_sequence(len_dict),
-            integer_value_sequence(len_dict),
-            integer_value_sequence(len_dict),
-            integer_value_sequence(len_dict),
-            integer_value_sequence(2)
+            integer_value_sequence(len_dict), integer_value_sequence(len_dict),
+            integer_value_sequence(len_dict), integer_value_sequence(len_dict),
+            integer_value_sequence(len_dict), integer_value_sequence(2)
         ]
         self.converter = DataProviderConverter(slots)
 
@@ -110,8 +104,8 @@ class Prediction():
                 len_sen = len(sen.split())
                 line_labels = lab[index:index + len_sen]
                 index += len_sen
-                fout.write(sen + '\t' + ' '.join([self.labels_reverse[
-                    i] for i in line_labels]) + '\n')
+                fout.write(sen + '\t' + ' '.join(
+                    [self.labels_reverse[i] for i in line_labels]) + '\n')
 
 
 def option_parser():
diff --git a/demo/sentiment/dataprovider.py b/demo/sentiment/dataprovider.py
index 9a9fd81f030cb1d2a10a5000fd1d12810d12112b..53e3d1d20df92b8815347bd8937064871f326b3f 100755
--- a/demo/sentiment/dataprovider.py
+++ b/demo/sentiment/dataprovider.py
@@ -17,8 +17,8 @@ from paddle.trainer.PyDataProvider2 import *
 def hook(settings, dictionary, **kwargs):
     settings.word_dict = dictionary
     settings.input_types = [
-        integer_value_sequence(len(settings.word_dict)),
-        integer_value(2)]
+        integer_value_sequence(len(settings.word_dict)), integer_value(2)
+    ]
     settings.logger.info('dict len : %d' % (len(settings.word_dict)))
 
 
@@ -29,6 +29,7 @@ def process(settings, file_name):
             label, comment = line.strip().split('\t\t')
             label = int(label)
             words = comment.split()
-            word_slot = [settings.word_dict[w] for w in words if w in
-                         settings.word_dict]
+            word_slot = [
+                settings.word_dict[w] for w in words if w in settings.word_dict
+            ]
             yield word_slot, label
diff --git a/demo/sentiment/predict.py b/demo/sentiment/predict.py
index 7d0baeabbba68b2a160463364d05cd865bf0314f..bc0f6f31264294034ed38309f7fda370865b2845 100755
--- a/demo/sentiment/predict.py
+++ b/demo/sentiment/predict.py
@@ -18,14 +18,14 @@ from optparse import OptionParser
 from py_paddle import swig_paddle, DataProviderConverter
 from paddle.trainer.PyDataProvider2 import integer_value_sequence
 from paddle.trainer.config_parser import parse_config
-
 """
 Usage: run following command to show help message.
   python predict.py -h
 """
 
+
 class SentimentPrediction():
-    def __init__(self, train_conf, dict_file, model_dir=None, label_file = None):
+    def __init__(self, train_conf, dict_file, model_dir=None, label_file=None):
         """
         train_conf: trainer configure.
         dict_file: word dictionary file name.
@@ -44,7 +44,8 @@ class SentimentPrediction():
             self.load_label(label_file)
 
         conf = parse_config(train_conf, "is_predict=1")
-        self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+        self.network = swig_paddle.GradientMachine.createFromConfigProto(
+            conf.model_config)
         self.network.loadParameters(self.model_dir)
         input_types = [integer_value_sequence(self.dict_dim)]
         self.converter = DataProviderConverter(input_types)
@@ -61,7 +62,7 @@ class SentimentPrediction():
         """
         Load label.
         """
-        self.label={}
+        self.label = {}
         for v in open(label_file, 'r'):
             self.label[int(v.split('\t')[1])] = v.split('\t')[0]
 
@@ -72,7 +73,9 @@ class SentimentPrediction():
         with open(data_file, 'r') as fdata:
             for line in fdata:
                 words = line.strip().split()
-                word_slot = [self.word_dict[w] for w in words if w in self.word_dict]
+                word_slot = [
+                    self.word_dict[w] for w in words if w in self.word_dict
+                ]
                 if not word_slot:
                     print "all words are not in dictionary: %s", line
                     continue
@@ -89,25 +92,48 @@ class SentimentPrediction():
         if self.label is None:
             print("%s: predicting label is %d" % (data_file, lab[0][0]))
         else:
-            print("%s: predicting label is %s" % (data_file, self.label[lab[0][0]]))
+            print("%s: predicting label is %s" %
+                  (data_file, self.label[lab[0][0]]))
+
 
 def option_parser():
     usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
     parser = OptionParser(usage="usage: %s [options]" % usage)
-    parser.add_option("-n", "--tconf", action="store",
-                      dest="train_conf", help="network config")
-    parser.add_option("-d", "--dict", action="store",
-                      dest="dict_file",help="dictionary file")
-    parser.add_option("-b", "--label", action="store",
-                      dest="label", default=None,
-                      help="dictionary file")
-    parser.add_option("-i", "--data", action="store",
-                      dest="data", help="data file to predict")
-    parser.add_option("-w", "--model", action="store",
-                      dest="model_path", default=None,
-                      help="model path")
+    parser.add_option(
+        "-n",
+        "--tconf",
+        action="store",
+        dest="train_conf",
+        help="network config")
+    parser.add_option(
+        "-d",
+        "--dict",
+        action="store",
+        dest="dict_file",
+        help="dictionary file")
+    parser.add_option(
+        "-b",
+        "--label",
+        action="store",
+        dest="label",
+        default=None,
+        help="dictionary file")
+    parser.add_option(
+        "-i",
+        "--data",
+        action="store",
+        dest="data",
+        help="data file to predict")
+    parser.add_option(
+        "-w",
+        "--model",
+        action="store",
+        dest="model_path",
+        default=None,
+        help="model path")
     return parser.parse_args()
 
+
 def main():
     options, args = option_parser()
     train_conf = options.train_conf
@@ -119,5 +145,6 @@ def main():
     predict = SentimentPrediction(train_conf, dict_file, model_path, label)
     predict.predict(data)
 
+
 if __name__ == '__main__':
     main()
diff --git a/demo/sentiment/preprocess.py b/demo/sentiment/preprocess.py
index 49b53d500a1bf816bde9c9675b251be8e9a68ae9..7146e95d751c4de649e204fab724085994dfa4d3 100755
--- a/demo/sentiment/preprocess.py
+++ b/demo/sentiment/preprocess.py
@@ -22,13 +22,13 @@ from os.path import join as join_path
 from optparse import OptionParser
 
 from paddle.utils.preprocess_util import *
-
 """
 Usage: run following command to show help message.
   python preprocess.py -h 
 """
 
-def save_dict(dict, filename, is_reverse = True):
+
+def save_dict(dict, filename, is_reverse=True):
     """
     Save dictionary into file.
     dict:   input dictionary.
@@ -39,9 +39,10 @@ def save_dict(dict, filename, is_reverse = True):
     f = open(filename, 'w')
     for k, v in sorted(dict.items(), key=operator.itemgetter(1),\
                        reverse=is_reverse):
-        f.write('%s\t%s\n'%(k, v))
+        f.write('%s\t%s\n' % (k, v))
     f.close()
 
+
 def tokenize(sentences):
     """
     Use tokenizer.perl to tokenize input sentences.
@@ -58,6 +59,7 @@ def tokenize(sentences):
     toks = tok_text.split('\n')[:-1]
     return toks
 
+
 def read_lines(path):
     """
     path: String, file path.
@@ -71,12 +73,17 @@ def read_lines(path):
                 seqs.append(line)
     return seqs
 
+
 class SentimentDataSetCreate():
     """
     A class to process data for sentiment analysis task.
     """
-    def __init__(self, data_path, output_path,
-                 use_okenizer = True, multi_lines = False):
+
+    def __init__(self,
+                 data_path,
+                 output_path,
+                 use_okenizer=True,
+                 multi_lines=False):
         """
         data_path: string, traing and testing dataset path
         output_path: string, output path, store processed dataset
@@ -164,23 +171,17 @@ class SentimentDataSetCreate():
         # Preprocess train data.
         train_data, train_lab_set = self.data_list(self.train_dir)
         print "processing train set..."
-        file_lists = self.save_data(train_data,
-                                     "train",
-                                     self.batch_size,
-                                     True,
-                                     True)
+        file_lists = self.save_data(train_data, "train", self.batch_size, True,
+                                    True)
         save_list(file_lists, self.train_list)
 
         # If have test data path, preprocess test data.
         if os.path.exists(self.test_dir):
             test_data, test_lab_set = self.data_list(self.test_dir)
-            assert(train_lab_set == test_lab_set)
+            assert (train_lab_set == test_lab_set)
             print "processing test set..."
-            file_lists = self.save_data(test_data,
-                                        "test",
-                                        self.batch_size,
-                                        False,
-                                        self.dict_with_test)
+            file_lists = self.save_data(test_data, "test", self.batch_size,
+                                        False, self.dict_with_test)
             save_list(file_lists, self.test_list)
 
         # save labels set.
@@ -191,7 +192,9 @@ class SentimentDataSetCreate():
         save_dict(self.word_count, self.dict_file, True)
         self.dict_size = len(self.word_count)
 
-    def save_data(self, data, prefix = "",
+    def save_data(self,
+                  data,
+                  prefix="",
                   batch_size=50000,
                   is_shuffle=False,
                   build_dict=False):
@@ -205,7 +208,8 @@ class SentimentDataSetCreate():
         return: list of batch names
         """
         if is_shuffle and self.multi_lines:
-           return self.save_data_multi_lines(data, prefix, batch_size, build_dict)
+            return self.save_data_multi_lines(data, prefix, batch_size,
+                                              build_dict)
 
         if is_shuffle:
             random.shuffle(data)
@@ -213,7 +217,7 @@ class SentimentDataSetCreate():
         batch_names = []
         for i in range(num_batches):
             batch_name = join_path(self.output_path,
-                                   "%s_part_%03d" %(prefix, i))
+                                   "%s_part_%03d" % (prefix, i))
             begin = i * batch_size
             end = min((i + 1) * batch_size, len(data))
             # read a batch of data
@@ -246,7 +250,9 @@ class SentimentDataSetCreate():
             data_list = tokenize(data_list)
         return label_list, data_list
 
-    def save_data_multi_lines(self, data, prefix = "",
+    def save_data_multi_lines(self,
+                              data,
+                              prefix="",
                               batch_size=50000,
                               build_dict=False):
         """
@@ -274,14 +280,14 @@ class SentimentDataSetCreate():
             self.create_dict(data_list)
 
         length = len(label_list)
-        perm_list = np.array([ i for i in xrange(length) ])
+        perm_list = np.array([i for i in xrange(length)])
         random.shuffle(perm_list)
 
         num_batches = int(math.ceil(length / float(batch_size)))
         batch_names = []
         for i in range(num_batches):
             batch_name = join_path(self.output_path,
-                                   "%s_part_%03d" %(prefix, i))
+                                   "%s_part_%03d" % (prefix, i))
             begin = i * batch_size
             end = min((i + 1) * batch_size, length)
             sub_label = [label_list[perm_list[i]] for i in range(begin, end)]
@@ -304,35 +310,50 @@ class SentimentDataSetCreate():
             f.write('%s\t\t%s\n' % (lab, seq))
         f.close()
 
+
 def option_parser():
     parser = OptionParser(usage="usage: python preprcoess.py "\
                                 "-i data_dir [options]")
-    parser.add_option("-i", "--data", action="store",
-                      dest="input", help="Input data directory.")
-    parser.add_option("-o", "--output", action="store",
-                      dest="output", default=None,
-                      help="Output directory.")
-    parser.add_option("-t", "--tokenizer", action="store",
-                      dest="use_tokenizer", default=True,
-                      help="Whether to use tokenizer.")
+    parser.add_option(
+        "-i",
+        "--data",
+        action="store",
+        dest="input",
+        help="Input data directory.")
+    parser.add_option(
+        "-o",
+        "--output",
+        action="store",
+        dest="output",
+        default=None,
+        help="Output directory.")
+    parser.add_option(
+        "-t",
+        "--tokenizer",
+        action="store",
+        dest="use_tokenizer",
+        default=True,
+        help="Whether to use tokenizer.")
     parser.add_option("-m", "--multi_lines", action="store",
                       dest="multi_lines", default=False,
                       help="If input text files have multi lines and they "\
                            "need to be shuffled, you should set -m True,")
     return parser.parse_args()
 
+
 def main():
     options, args = option_parser()
-    data_dir=options.input
-    output_dir=options.output
-    use_tokenizer=options.use_tokenizer
-    multi_lines=options.multi_lines
+    data_dir = options.input
+    output_dir = options.output
+    use_tokenizer = options.use_tokenizer
+    multi_lines = options.multi_lines
     if output_dir is None:
         outname = os.path.basename(options.input)
         output_dir = join_path(os.path.dirname(data_dir), 'pre-' + outname)
-    data_creator = SentimentDataSetCreate(data_dir, output_dir,
-                                          use_tokenizer, multi_lines)
+    data_creator = SentimentDataSetCreate(data_dir, output_dir, use_tokenizer,
+                                          multi_lines)
     data_creator.create_dataset()
 
+
 if __name__ == '__main__':
     main()
diff --git a/demo/sentiment/sentiment_net.py b/demo/sentiment/sentiment_net.py
index 31e585edcaa111898c950ad016d3996fae15a7db..ff6a3624a404cb52d5d7ac0934fedba0d489dc22 100644
--- a/demo/sentiment/sentiment_net.py
+++ b/demo/sentiment/sentiment_net.py
@@ -47,10 +47,12 @@ def sentiment_data(data_dir=None,
         for i, line in enumerate(open(dict_file, 'r')):
             word_dict[line.split('\t')[0]] = i
 
-    define_py_data_sources2(train_list, test_list,
-                           module="dataprovider",
-                           obj="process",
-                           args={'dictionary': word_dict})
+    define_py_data_sources2(
+        train_list,
+        test_list,
+        module="dataprovider",
+        obj="process",
+        args={'dictionary': word_dict})
 
     return dict_dim, class_dim
 
@@ -64,8 +66,7 @@ def bidirectional_lstm_net(input_dim,
     emb = embedding_layer(input=data, size=emb_dim)
     bi_lstm = bidirectional_lstm(input=emb, size=lstm_dim)
     dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
-    output = fc_layer(input=dropout, size=class_dim,
-                      act=SoftmaxActivation())
+    output = fc_layer(input=dropout, size=class_dim, act=SoftmaxActivation())
 
     if not is_predict:
         lbl = data_layer("label", 1)
@@ -109,27 +110,36 @@ def stacked_lstm_net(input_dim,
     data = data_layer("word", input_dim)
     emb = embedding_layer(input=data, size=emb_dim)
 
-    fc1 = fc_layer(input=emb, size=hid_dim, act=linear,
-                   bias_attr=bias_attr)
-    lstm1 = lstmemory(input=fc1, act=relu, bias_attr=bias_attr,
-                      layer_attr=layer_attr)
+    fc1 = fc_layer(input=emb, size=hid_dim, act=linear, bias_attr=bias_attr)
+    lstm1 = lstmemory(
+        input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
 
     inputs = [fc1, lstm1]
     for i in range(2, stacked_num + 1):
-        fc = fc_layer(input=inputs, size=hid_dim, act=linear,
-                      param_attr=para_attr, bias_attr=bias_attr)
-        lstm = lstmemory(input=fc, reverse=(i % 2) == 0, act=relu,
-                         bias_attr=bias_attr, layer_attr=layer_attr)
+        fc = fc_layer(
+            input=inputs,
+            size=hid_dim,
+            act=linear,
+            param_attr=para_attr,
+            bias_attr=bias_attr)
+        lstm = lstmemory(
+            input=fc,
+            reverse=(i % 2) == 0,
+            act=relu,
+            bias_attr=bias_attr,
+            layer_attr=layer_attr)
         inputs = [fc, lstm]
 
     fc_last = pooling_layer(input=inputs[0], pooling_type=MaxPooling())
     lstm_last = pooling_layer(input=inputs[1], pooling_type=MaxPooling())
-    output = fc_layer(input=[fc_last, lstm_last], size=class_dim,
-                      act=SoftmaxActivation(),
-                      bias_attr=bias_attr, param_attr=para_attr)
+    output = fc_layer(
+        input=[fc_last, lstm_last],
+        size=class_dim,
+        act=SoftmaxActivation(),
+        bias_attr=bias_attr,
+        param_attr=para_attr)
 
     if is_predict:
         outputs(output)
     else:
-        outputs(
-            classification_cost(input=output, label=data_layer('label', 1)))
+        outputs(classification_cost(input=output, label=data_layer('label', 1)))
diff --git a/demo/sentiment/trainer_config.py b/demo/sentiment/trainer_config.py
index db24182a8d7359786bd1f3b2083892cf846605d1..894070e7c97dcb29e8c0df31437a374be5f5d691 100644
--- a/demo/sentiment/trainer_config.py
+++ b/demo/sentiment/trainer_config.py
@@ -20,20 +20,19 @@ is_test = get_config_arg('is_test', bool, False)
 # whether this config is used for prediction
 is_predict = get_config_arg('is_predict', bool, False)
 
-data_dir  = "./data/pre-imdb"
+data_dir = "./data/pre-imdb"
 dict_dim, class_dim = sentiment_data(data_dir, is_test, is_predict)
 
 ################## Algorithm Config #####################
 
 settings(
-  batch_size=128,
-  learning_rate=2e-3,
-  learning_method=AdamOptimizer(),
-  regularization=L2Regularization(8e-4),
-  gradient_clipping_threshold=25
-)
+    batch_size=128,
+    learning_rate=2e-3,
+    learning_method=AdamOptimizer(),
+    regularization=L2Regularization(8e-4),
+    gradient_clipping_threshold=25)
 
 #################### Network Config ######################
-stacked_lstm_net(dict_dim, class_dim=class_dim,
-                 stacked_num=3, is_predict=is_predict)
+stacked_lstm_net(
+    dict_dim, class_dim=class_dim, stacked_num=3, is_predict=is_predict)
 # bidirectional_lstm_net(dict_dim, class_dim=class_dim, is_predict=is_predict)
diff --git a/demo/seqToseq/dataprovider.py b/demo/seqToseq/dataprovider.py
index df19db109ed223c7515c3ebf2cb1918f41163930..c5da1b7685f47fda337921c7c60ac1497b9e48bb 100755
--- a/demo/seqToseq/dataprovider.py
+++ b/demo/seqToseq/dataprovider.py
@@ -30,14 +30,14 @@ def hook(settings, src_dict, trg_dict, file_list, **kwargs):
     if settings.job_mode:
         settings.trg_dict = trg_dict
         settings.slots = [
-            integer_value_sequence(len(settings.src_dict)), 
-            integer_value_sequence(len(settings.trg_dict)), 
+            integer_value_sequence(len(settings.src_dict)),
+            integer_value_sequence(len(settings.trg_dict)),
             integer_value_sequence(len(settings.trg_dict))
         ]
         settings.logger.info("trg dict len : %d" % (len(settings.trg_dict)))
     else:
         settings.slots = [
-            integer_value_sequence(len(settings.src_dict)), 
+            integer_value_sequence(len(settings.src_dict)),
             integer_value_sequence(len(open(file_list[0], "r").readlines()))
         ]
 
@@ -62,8 +62,7 @@ def process(settings, file_name):
             if settings.job_mode:
                 trg_seq = line_split[1]  # one target sequence
                 trg_words = trg_seq.split()
-                trg_ids = [settings.trg_dict.get(w, UNK_IDX)
-                           for w in trg_words]
+                trg_ids = [settings.trg_dict.get(w, UNK_IDX) for w in trg_words]
 
                 # remove sequence whose length > 80 in training mode
                 if len(src_ids) > 80 or len(trg_ids) > 80:
diff --git a/demo/seqToseq/preprocess.py b/demo/seqToseq/preprocess.py
index 5efb17a664b9a2525972c29b9b5700b483b8c07e..bd1c51b1514b790ec385d48f49197b3e0285e736 100755
--- a/demo/seqToseq/preprocess.py
+++ b/demo/seqToseq/preprocess.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 Example:
     python preprocess.py -i INPUT [-d DICTSIZE] [-m]
@@ -24,12 +23,13 @@ Options:
     -m --mergeDict merge source and target dictionary
 """
 import os
-import sys 
+import sys
 
 import string
 from optparse import OptionParser
 from paddle.utils.preprocess_util import save_list, DatasetCreater
 
+
 class SeqToSeqDatasetCreater(DatasetCreater):
     """
     A class to process data for sequence to sequence application.
@@ -75,7 +75,7 @@ class SeqToSeqDatasetCreater(DatasetCreater):
         if not os.path.exists(output):
             os.system(cmd + '> ' + output)
 
-    def build_dict(self, file_path, dict_path, dict_size = -1):
+    def build_dict(self, file_path, dict_path, dict_size=-1):
         """ 
         Create the dictionary for the file, Note that
         1. Valid characters include all printable characters
@@ -99,20 +99,23 @@ class SeqToSeqDatasetCreater(DatasetCreater):
                         for word in words:
                             if word not in dictory:
                                 dictory[word] = 1
-                            else: 
+                            else:
                                 dictory[word] += 1
             output = open(dict_path, "w+")
             output.write('<s>\n<e>\n<unk>\n')
             count = 3
-            for key, value in sorted(dictory.items(), key = lambda d:d[1], reverse = True):
+            for key, value in sorted(
+                    dictory.items(), key=lambda d: d[1], reverse=True):
                 output.write(key + "\n")
                 count += 1
                 if count == dict_size:
                     break
             self.dict_size = count
-      
-    def create_dataset(self, dict_size = -1, mergeDict = False,
-                       suffixes = ['.src', '.trg']):
+
+    def create_dataset(self,
+                       dict_size=-1,
+                       mergeDict=False,
+                       suffixes=['.src', '.trg']):
         """
         Create seqToseq dataset 
         """
@@ -135,13 +138,14 @@ class SeqToSeqDatasetCreater(DatasetCreater):
         # checkout dataset should be parallel corpora
         suffix_len = len(suffixes[0])
         for dataset in dataset_list:
-          file_list = os.listdir(dataset)
-          if len(file_list) % 2 == 1:
-              raise RuntimeError("dataset should be parallel corpora")
-          file_list.sort()
-          for i in range(0, len(file_list), 2):
-              if file_list[i][:-suffix_len] != file_list[i + 1][:-suffix_len]:
-                  raise RuntimeError("source and target file name should be equal")
+            file_list = os.listdir(dataset)
+            if len(file_list) % 2 == 1:
+                raise RuntimeError("dataset should be parallel corpora")
+            file_list.sort()
+            for i in range(0, len(file_list), 2):
+                if file_list[i][:-suffix_len] != file_list[i + 1][:-suffix_len]:
+                    raise RuntimeError(
+                        "source and target file name should be equal")
 
         # cat all the files with the same suffix in dataset
         for suffix in suffixes:
@@ -155,16 +159,18 @@ class SeqToSeqDatasetCreater(DatasetCreater):
         list = ['train.list', 'test.list', 'gen.list']
         for dataset in dataset_list:
             outname = os.path.basename(dataset)
-            self.concat_file(dataset, outname + suffixes[0], 
+            self.concat_file(dataset, outname + suffixes[0],
                              outname + suffixes[1], dir_list[id], outname)
-            save_list([os.path.join(dir_list[id], outname)], 
+            save_list([os.path.join(dir_list[id], outname)],
                       os.path.join(self.output_path, list[id]))
             id += 1
 
         # build dictionary for train data
         dict = ['src.dict', 'trg.dict']
-        dict_path = [os.path.join(self.output_path, dict[0]), 
-                     os.path.join(self.output_path, dict[1])]
+        dict_path = [
+            os.path.join(self.output_path, dict[0]),
+            os.path.join(self.output_path, dict[1])
+        ]
         if mergeDict:
             outname = os.path.join(train_dir, train_dataset.split('/')[-1])
             print 'build src dictionary for train data'
@@ -173,22 +179,30 @@ class SeqToSeqDatasetCreater(DatasetCreater):
             os.system('cp ' + dict_path[0] + ' ' + dict_path[1])
         else:
             outname = os.path.join(train_dataset, self.train_dir_name)
-            for id in range(0,2):
+            for id in range(0, 2):
                 suffix = suffixes[id]
                 print 'build ' + suffix[1:] + ' dictionary for train data'
                 self.build_dict(outname + suffix, dict_path[id], dict_size)
         print 'dictionary size is', self.dict_size
 
+
 def main():
     usage = "usage: \n" \
             "python %prog -i INPUT [-d DICTSIZE] [-m]"
     parser = OptionParser(usage)
-    parser.add_option("-i", action="store", dest="input",
-                      help="input original dataset path")
-    parser.add_option("-d", action="store", dest="dictsize",
-                      help="specified word count of dictionary")
-    parser.add_option("-m", "--mergeDict", action="store_true", dest="mergeDict",
-                      help="merge source and target dictionary")
+    parser.add_option(
+        "-i", action="store", dest="input", help="input original dataset path")
+    parser.add_option(
+        "-d",
+        action="store",
+        dest="dictsize",
+        help="specified word count of dictionary")
+    parser.add_option(
+        "-m",
+        "--mergeDict",
+        action="store_true",
+        dest="mergeDict",
+        help="merge source and target dictionary")
     (options, args) = parser.parse_args()
     if options.input[-1] == os.path.sep:
         options.input = options.input[:-1]
@@ -200,5 +214,6 @@ def main():
         data_creator = SeqToSeqDatasetCreater(options.input, output_path)
         data_creator.create_dataset(dictsize, options.mergeDict)
 
+
 if __name__ == "__main__":
-    main(); 
+    main()
diff --git a/demo/seqToseq/seqToseq_net.py b/demo/seqToseq/seqToseq_net.py
index edd6ad3f739b6cefc24d235be55c7a8f541e1ab7..ad5e3339c1461de06732eb62aca9e8323eea707b 100644
--- a/demo/seqToseq/seqToseq_net.py
+++ b/demo/seqToseq/seqToseq_net.py
@@ -50,16 +50,21 @@ def seq_to_seq_data(data_dir,
         trg_dict = None
     else:
         train_list = os.path.join(data_dir, train_list)
-        test_list = os.path.join(data_dir,test_list)
+        test_list = os.path.join(data_dir, test_list)
 
-    define_py_data_sources2(train_list, test_list,
-                           module = "dataprovider",
-                           obj = "process",
-                           args = {"src_dict": src_dict,
-                                   "trg_dict": trg_dict})
+    define_py_data_sources2(
+        train_list,
+        test_list,
+        module="dataprovider",
+        obj="process",
+        args={"src_dict": src_dict,
+              "trg_dict": trg_dict})
 
-    return {"src_dict_path": src_lang_dict, "trg_dict_path": trg_lang_dict,
-            "gen_result": gen_result}
+    return {
+        "src_dict_path": src_lang_dict,
+        "trg_dict_path": trg_lang_dict,
+        "gen_result": gen_result
+    }
 
 
 def gru_encoder_decoder(data_conf,
@@ -90,51 +95,55 @@ def gru_encoder_decoder(data_conf,
         size=word_vector_dim,
         param_attr=ParamAttr(name='_source_language_embedding'))
     src_forward = simple_gru(input=src_embedding, size=encoder_size)
-    src_backward = simple_gru(input=src_embedding,
-                              size=encoder_size,
-                              reverse=True)
+    src_backward = simple_gru(
+        input=src_embedding, size=encoder_size, reverse=True)
     encoded_vector = concat_layer(input=[src_forward, src_backward])
 
     with mixed_layer(size=decoder_size) as encoded_proj:
         encoded_proj += full_matrix_projection(input=encoded_vector)
 
     backward_first = first_seq(input=src_backward)
-    with mixed_layer(size=decoder_size,
-                     act=TanhActivation(), ) as decoder_boot:
+    with mixed_layer(
+            size=decoder_size,
+            act=TanhActivation(), ) as decoder_boot:
         decoder_boot += full_matrix_projection(input=backward_first)
 
     def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
-        decoder_mem = memory(name='gru_decoder',
-                             size=decoder_size,
-                             boot_layer=decoder_boot)
+        decoder_mem = memory(
+            name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
 
-        context = simple_attention(encoded_sequence=enc_vec,
-                                   encoded_proj=enc_proj,
-                                   decoder_state=decoder_mem, )
+        context = simple_attention(
+            encoded_sequence=enc_vec,
+            encoded_proj=enc_proj,
+            decoder_state=decoder_mem, )
 
         with mixed_layer(size=decoder_size * 3) as decoder_inputs:
             decoder_inputs += full_matrix_projection(input=context)
             decoder_inputs += full_matrix_projection(input=current_word)
 
-        gru_step = gru_step_layer(name='gru_decoder',
-                                  input=decoder_inputs,
-                                  output_mem=decoder_mem,
-                                  size=decoder_size)
+        gru_step = gru_step_layer(
+            name='gru_decoder',
+            input=decoder_inputs,
+            output_mem=decoder_mem,
+            size=decoder_size)
 
-        with mixed_layer(size=target_dict_dim,
-                         bias_attr=True,
-                         act=SoftmaxActivation()) as out:
+        with mixed_layer(
+                size=target_dict_dim, bias_attr=True,
+                act=SoftmaxActivation()) as out:
             out += full_matrix_projection(input=gru_step)
         return out
 
     decoder_group_name = "decoder_group"
-    group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
-                  StaticInput(input=encoded_proj,is_seq=True)]
+    group_inputs = [
+        StaticInput(
+            input=encoded_vector, is_seq=True), StaticInput(
+                input=encoded_proj, is_seq=True)
+    ]
 
     if not is_generating:
         trg_embedding = embedding_layer(
-            input=data_layer(name='target_language_word',
-                             size=target_dict_dim),
+            input=data_layer(
+                name='target_language_word', size=target_dict_dim),
             size=word_vector_dim,
             param_attr=ParamAttr(name='_target_language_embedding'))
         group_inputs.append(trg_embedding)
@@ -144,12 +153,12 @@ def gru_encoder_decoder(data_conf,
         # while encoded source sequence is accessed to as an unbounded memory.
         # Here, the StaticInput defines a read-only memory
         # for the recurrent_group.
-        decoder = recurrent_group(name=decoder_group_name,
-                                  step=gru_decoder_with_attention,
-                                  input=group_inputs)
+        decoder = recurrent_group(
+            name=decoder_group_name,
+            step=gru_decoder_with_attention,
+            input=group_inputs)
 
-        lbl = data_layer(name='target_language_next_word',
-                         size=target_dict_dim)
+        lbl = data_layer(name='target_language_next_word', size=target_dict_dim)
         cost = classification_cost(input=decoder, label=lbl)
         outputs(cost)
     else:
@@ -168,16 +177,19 @@ def gru_encoder_decoder(data_conf,
             embedding_size=word_vector_dim)
         group_inputs.append(trg_embedding)
 
-        beam_gen = beam_search(name=decoder_group_name,
-                               step=gru_decoder_with_attention,
-                               input=group_inputs,
-                               bos_id=0,
-                               eos_id=1,
-                               beam_size=beam_size,
-                               max_length=max_length)
-
-        seqtext_printer_evaluator(input=beam_gen,
-                                  id_input=data_layer(name="sent_id", size=1),
-                                  dict_file=trg_dict_path,
-                                  result_file=gen_trans_file)
+        beam_gen = beam_search(
+            name=decoder_group_name,
+            step=gru_decoder_with_attention,
+            input=group_inputs,
+            bos_id=0,
+            eos_id=1,
+            beam_size=beam_size,
+            max_length=max_length)
+
+        seqtext_printer_evaluator(
+            input=beam_gen,
+            id_input=data_layer(
+                name="sent_id", size=1),
+            dict_file=trg_dict_path,
+            result_file=gen_trans_file)
         outputs(beam_gen)
diff --git a/demo/sequence_tagging/dataprovider.py b/demo/sequence_tagging/dataprovider.py
index 6f412d6834be6d02397821215b1317353cd5df18..37dcb7aa17c0abd197ef2f3121bf8be6c54375c2 100644
--- a/demo/sequence_tagging/dataprovider.py
+++ b/demo/sequence_tagging/dataprovider.py
@@ -17,8 +17,7 @@ import gzip
 import logging
 
 logging.basicConfig(
-    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s',
-)
+    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s', )
 logger = logging.getLogger('paddle')
 logger.setLevel(logging.INFO)
 
@@ -32,59 +31,58 @@ num_original_columns = 3
 # [[-1,0], [0,0]]  means previous token at column 0 and current token at 
 # column 0 are combined as one feature.
 patterns = [
-    [[-2,0]],
-    [[-1,0]],
-    [[0,0]],
-    [[1,0]],
-    [[2,0]],
-
-    [[-1,0], [0,0]],
-    [[0,0], [1,0]],
-
-    [[-2,1]],
-    [[-1,1]],
-    [[0,1]],
-    [[1,1]],
-    [[2,1]],
-    [[-2,1], [-1,1]],
-    [[-1,1], [0,1]],
-    [[0,1], [1,1]],
-    [[1,1], [2,1]],
-
-    [[-2,1], [-1,1], [0,1]],
-    [[-1,1], [0,1], [1,1]],
-    [[0,1], [1,1], [2,1]],
+    [[-2, 0]],
+    [[-1, 0]],
+    [[0, 0]],
+    [[1, 0]],
+    [[2, 0]],
+    [[-1, 0], [0, 0]],
+    [[0, 0], [1, 0]],
+    [[-2, 1]],
+    [[-1, 1]],
+    [[0, 1]],
+    [[1, 1]],
+    [[2, 1]],
+    [[-2, 1], [-1, 1]],
+    [[-1, 1], [0, 1]],
+    [[0, 1], [1, 1]],
+    [[1, 1], [2, 1]],
+    [[-2, 1], [-1, 1], [0, 1]],
+    [[-1, 1], [0, 1], [1, 1]],
+    [[0, 1], [1, 1], [2, 1]],
 ]
 
 dict_label = {
- 'B-ADJP': 0,
- 'I-ADJP': 1,
- 'B-ADVP': 2,
- 'I-ADVP': 3,
- 'B-CONJP': 4,
- 'I-CONJP': 5,
- 'B-INTJ': 6,
- 'I-INTJ': 7,
- 'B-LST': 8,
- 'I-LST': 9,
- 'B-NP': 10,
- 'I-NP': 11,
- 'B-PP': 12,
- 'I-PP': 13,
- 'B-PRT': 14,
- 'I-PRT': 15,
- 'B-SBAR': 16,
- 'I-SBAR': 17,
- 'B-UCP': 18,
- 'I-UCP': 19,
- 'B-VP': 20,
- 'I-VP': 21,
- 'O': 22
+    'B-ADJP': 0,
+    'I-ADJP': 1,
+    'B-ADVP': 2,
+    'I-ADVP': 3,
+    'B-CONJP': 4,
+    'I-CONJP': 5,
+    'B-INTJ': 6,
+    'I-INTJ': 7,
+    'B-LST': 8,
+    'I-LST': 9,
+    'B-NP': 10,
+    'I-NP': 11,
+    'B-PP': 12,
+    'I-PP': 13,
+    'B-PRT': 14,
+    'I-PRT': 15,
+    'B-SBAR': 16,
+    'I-SBAR': 17,
+    'B-UCP': 18,
+    'I-UCP': 19,
+    'B-VP': 20,
+    'I-VP': 21,
+    'O': 22
 }
 
+
 def make_features(sequence):
     length = len(sequence)
     num_features = len(sequence[0])
+
     def get_features(pos):
         if pos < 0:
             return ['#B%s' % -pos] * num_features
@@ -94,9 +92,10 @@ def make_features(sequence):
 
     for i in xrange(length):
         for pattern in patterns:
-            fname = '/'.join([get_features(i+pos)[f] for pos, f in pattern])
+            fname = '/'.join([get_features(i + pos)[f] for pos, f in pattern])
             sequence[i].append(fname)
 
+
 '''
 Source file format:
 Each line is for one timestep. The features are separated by space.
@@ -109,6 +108,8 @@ i-th column.
 
 return a list of dict for each column
 '''
+
+
 def create_dictionaries(filename, cutoff, oov_policy):
     def add_to_dict(sequence, dicts):
         num_features = len(dicts)
@@ -140,7 +141,6 @@ def create_dictionaries(filename, cutoff, oov_policy):
         features = line.split(' ')
         sequence.append(features)
 
-
     for i in xrange(num_features):
         dct = dicts[i]
         n = 1 if oov_policy[i] == OOV_POLICY_USE else 0
@@ -151,7 +151,7 @@ def create_dictionaries(filename, cutoff, oov_policy):
             else:
                 dct[k] = n
                 n += 1
-            
+
         if oov_policy[i] == OOV_POLICY_USE:
             # placeholder so that len(dct) will be the number of features
             # including OOV
@@ -187,12 +187,15 @@ def initializer(settings, **xargs):
         logger.info("feature size=%s" % dim)
     settings.input_types = input_types
 
+
 '''
 if oov_policy[i] == OOV_POLICY_USE, features in i-th column which are not
 existed in dicts[i] will be assigned to id 0.
 if oov_policy[i] == OOV_POLICY_ERROR, all features in i-th column MUST exist
 in dicts[i].
 '''
+
+
 @provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
 def process(settings, filename):
     input_file = filename
@@ -231,7 +234,7 @@ def process(settings, filename):
                         logger.fatal("Unknown token: %s" % features[i])
                     else:
                         vec.ids.append(dim + 0)
-                    
+
                     dim += len(dicts[i])
                 sample[-1].append(vec)
         return sample
@@ -255,4 +258,3 @@ def process(settings, filename):
     f.close()
 
     logger.info("num_sequences=%s" % num_sequences)
-
diff --git a/demo/sequence_tagging/linear_crf.py b/demo/sequence_tagging/linear_crf.py
index 2bd1a20bc52fc546dcd0a0874bc09433e7212152..64895742e1b8c0a11cbedee0b88e61b5b63b007f 100644
--- a/demo/sequence_tagging/linear_crf.py
+++ b/demo/sequence_tagging/linear_crf.py
@@ -16,11 +16,11 @@ from paddle.trainer_config_helpers import *
 
 import math
 
-define_py_data_sources2(train_list="data/train.list",
-                        test_list="data/test.list",
-                        module="dataprovider",
-                        obj="process")
-
+define_py_data_sources2(
+    train_list="data/train.list",
+    test_list="data/test.list",
+    module="dataprovider",
+    obj="process")
 
 batch_size = 1
 settings(
@@ -30,14 +30,15 @@ settings(
     average_window=0.5,
     learning_rate=1e-1,
     learning_rate_decay_a=1e-5,
-    learning_rate_decay_b=0.25,
-)
+    learning_rate_decay_b=0.25, )
+
+num_label_types = 23
 
-num_label_types=23
 
 def get_simd_size(size):
     return int(math.ceil(float(size) / 8)) * 8
 
+
 # Currently, in order to use sparse_update=True,
 # the size has to be aligned.
 num_label_types = get_simd_size(num_label_types)
@@ -45,40 +46,37 @@ num_label_types = get_simd_size(num_label_types)
 features = data_layer(name="features", size=76328)
 word = data_layer(name="word", size=6778)
 pos = data_layer(name="pos", size=44)
-chunk = data_layer(name="chunk",
-                   size=num_label_types)
+chunk = data_layer(name="chunk", size=num_label_types)
 
 crf_input = fc_layer(
     input=features,
     size=num_label_types,
     act=LinearActivation(),
     bias_attr=False,
-    param_attr=ParamAttr(initial_std=0, sparse_update=True))
+    param_attr=ParamAttr(
+        initial_std=0, sparse_update=True))
 
-crf=crf_layer(
+crf = crf_layer(
     input=crf_input,
     label=chunk,
-    param_attr=ParamAttr(name="crfw", initial_std=0),
-)
+    param_attr=ParamAttr(
+        name="crfw", initial_std=0), )
 
-crf_decoding=crf_decoding_layer(
+crf_decoding = crf_decoding_layer(
     size=num_label_types,
     input=crf_input,
     label=chunk,
-    param_attr=ParamAttr(name="crfw"),
-)
+    param_attr=ParamAttr(name="crfw"), )
 
 sum_evaluator(
     name="error",
-    input=crf_decoding,
-)
+    input=crf_decoding, )
 
 chunk_evaluator(
     name="chunk_f1",
-    input =[crf_decoding, chunk],
+    input=[crf_decoding, chunk],
     chunk_scheme="IOB",
-    num_chunk_types=11,
-)
+    num_chunk_types=11, )
 
 inputs(word, pos, chunk, features)
 outputs(crf)
diff --git a/demo/sequence_tagging/rnn_crf.py b/demo/sequence_tagging/rnn_crf.py
index fb157bf3ea7193bca2c8a281e1afaf4b5f1d7309..90d4bbdddfdb4e38b930d54a2bc865df9fac589c 100644
--- a/demo/sequence_tagging/rnn_crf.py
+++ b/demo/sequence_tagging/rnn_crf.py
@@ -16,10 +16,11 @@ from paddle.trainer_config_helpers import *
 
 import math
 
-define_py_data_sources2(train_list="data/train.list",
-                        test_list="data/test.list",
-                        module="dataprovider",
-                        obj="process")
+define_py_data_sources2(
+    train_list="data/train.list",
+    test_list="data/test.list",
+    module="dataprovider",
+    obj="process")
 
 batch_size = 16
 settings(
@@ -27,29 +28,27 @@ settings(
     batch_size=batch_size,
     regularization=L2Regularization(batch_size * 1e-5),
     average_window=0.5,
-    learning_rate = 2e-3,
-    learning_rate_decay_a = 5e-7,
-    learning_rate_decay_b = 0.5,
-)
+    learning_rate=2e-3,
+    learning_rate_decay_a=5e-7,
+    learning_rate_decay_b=0.5, )
 
-word_dim=128
+word_dim = 128
 hidden_dim = 128
 with_rnn = True
 
-initial_std=1/math.sqrt(hidden_dim)
-param_attr=ParamAttr(initial_std=initial_std)
-cpu_layer_attr=ExtraLayerAttribute(device=-1)
+initial_std = 1 / math.sqrt(hidden_dim)
+param_attr = ParamAttr(initial_std=initial_std)
+cpu_layer_attr = ExtraLayerAttribute(device=-1)
 
 default_device(0)
 
-num_label_types=23
+num_label_types = 23
 
 features = data_layer(name="features", size=76328)
 word = data_layer(name="word", size=6778)
 pos = data_layer(name="pos", size=44)
-chunk = data_layer(name="chunk",
-                   size=num_label_types,
-                   layer_attr=cpu_layer_attr)
+chunk = data_layer(
+    name="chunk", size=num_label_types, layer_attr=cpu_layer_attr)
 
 emb = embedding_layer(
     input=word, size=word_dim, param_attr=ParamAttr(initial_std=0))
@@ -58,73 +57,64 @@ hidden1 = mixed_layer(
     size=hidden_dim,
     act=STanhActivation(),
     bias_attr=True,
-    input=[full_matrix_projection(emb),
-           table_projection(pos, param_attr=param_attr)]
-)
+    input=[
+        full_matrix_projection(emb), table_projection(
+            pos, param_attr=param_attr)
+    ])
 
 if with_rnn:
     rnn1 = recurrent_layer(
         act=ReluActivation(),
         bias_attr=True,
         input=hidden1,
-        param_attr=ParamAttr(initial_std=0),
-    )
+        param_attr=ParamAttr(initial_std=0), )
 
 hidden2 = mixed_layer(
     size=hidden_dim,
     act=STanhActivation(),
     bias_attr=True,
-    input=[full_matrix_projection(hidden1)
-    ] + ([
-        full_matrix_projection(rnn1, param_attr=ParamAttr(initial_std=0))
-    ] if with_rnn else []),
-)
+    input=[full_matrix_projection(hidden1)] +
+    ([full_matrix_projection(
+        rnn1, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), )
 
 if with_rnn:
-    rnn2=recurrent_layer(
+    rnn2 = recurrent_layer(
         reverse=True,
         act=ReluActivation(),
         bias_attr=True,
         input=hidden2,
-        param_attr=ParamAttr(initial_std=0),
-    )
+        param_attr=ParamAttr(initial_std=0), )
 
 crf_input = mixed_layer(
     size=num_label_types,
     bias_attr=False,
-    input=[
-        full_matrix_projection(hidden2),
-    ] + ([
-        full_matrix_projection(rnn2, param_attr=ParamAttr(initial_std=0))
-    ] if with_rnn else []),
-)
+    input=[full_matrix_projection(hidden2), ] +
+    ([full_matrix_projection(
+        rnn2, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), )
 
 crf = crf_layer(
     input=crf_input,
     label=chunk,
-    param_attr=ParamAttr(name="crfw", initial_std=0),
-    layer_attr=cpu_layer_attr,
-)
+    param_attr=ParamAttr(
+        name="crfw", initial_std=0),
+    layer_attr=cpu_layer_attr, )
 
 crf_decoding = crf_decoding_layer(
     size=num_label_types,
     input=crf_input,
     label=chunk,
     param_attr=ParamAttr(name="crfw"),
-    layer_attr=cpu_layer_attr,
-)
+    layer_attr=cpu_layer_attr, )
 
 sum_evaluator(
     name="error",
-    input=crf_decoding,
-)
+    input=crf_decoding, )
 
 chunk_evaluator(
     name="chunk_f1",
-    input =[crf_decoding, chunk],
+    input=[crf_decoding, chunk],
     chunk_scheme="IOB",
-    num_chunk_types=11,
-)
+    num_chunk_types=11, )
 
 inputs(word, pos, chunk, features)
 outputs(crf)
diff --git a/doc/ui/predict/predict_sample.py b/doc/ui/predict/predict_sample.py
index d55d2c730dece07f068b728d0a75f34c70b817bd..63e8b36d26057d4a87dabb8745de8e13efe2524f 100644
--- a/doc/ui/predict/predict_sample.py
+++ b/doc/ui/predict/predict_sample.py
@@ -16,82 +16,113 @@ from py_paddle import swig_paddle, DataProviderConverter
 from paddle.trainer.PyDataProvider2 import dense_vector
 from paddle.trainer.config_parser import parse_config
 
-TEST_DATA = [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.215686,
-               0.533333, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.67451,
-               0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.070588, 0.886275,
-               0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.192157, 0.070588, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-               0.670588, 0.992157, 0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.117647, 0.933333, 0.858824, 0.313725, 0, 0, 0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0.090196, 0.858824, 0.992157, 0.831373, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.141176,
-               0.992157, 0.992157, 0.611765, 0.054902, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.258824, 0.992157, 0.992157,
-               0.529412, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.368627, 0.992157, 0.992157, 0.419608, 0.003922, 0, 0, 0, 0, 0, 0,
-               0, 0, 0, 0.094118, 0.835294, 0.992157, 0.992157, 0.517647, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.603922, 0.992157,
-               0.992157, 0.992157, 0.603922, 0.545098, 0.043137, 0, 0, 0, 0, 0, 0, 0, 0.447059, 0.992157, 0.992157,
-               0.956863, 0.062745, 0, 0, 0, 0, 0, 0, 0, 0, 0.011765, 0.666667, 0.992157, 0.992157, 0.992157, 0.992157,
-               0.992157, 0.745098, 0.137255, 0, 0, 0, 0, 0, 0.152941, 0.866667, 0.992157, 0.992157, 0.521569, 0, 0, 0,
-               0, 0, 0, 0, 0, 0, 0.070588, 0.992157, 0.992157, 0.992157, 0.803922, 0.352941, 0.745098, 0.992157,
-               0.945098, 0.317647, 0, 0, 0, 0, 0.580392, 0.992157, 0.992157, 0.764706, 0.043137, 0, 0, 0, 0, 0, 0, 0, 0,
-               0, 0.070588, 0.992157, 0.992157, 0.776471, 0.043137, 0, 0.007843, 0.27451, 0.882353, 0.941176, 0.176471,
-               0, 0, 0.180392, 0.898039, 0.992157, 0.992157, 0.313725, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.070588, 0.992157,
-               0.992157, 0.713725, 0, 0, 0, 0, 0.627451, 0.992157, 0.729412, 0.062745, 0, 0.509804, 0.992157, 0.992157,
-               0.776471, 0.035294, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.494118, 0.992157, 0.992157, 0.968627, 0.168627, 0, 0,
-               0, 0.423529, 0.992157, 0.992157, 0.364706, 0, 0.717647, 0.992157, 0.992157, 0.317647, 0, 0, 0, 0, 0, 0,
-               0, 0, 0, 0, 0, 0.533333, 0.992157, 0.984314, 0.945098, 0.603922, 0, 0, 0, 0.003922, 0.466667, 0.992157,
-               0.988235, 0.976471, 0.992157, 0.992157, 0.788235, 0.007843, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.686275,
-               0.882353, 0.364706, 0, 0, 0, 0, 0, 0, 0.098039, 0.588235, 0.992157, 0.992157, 0.992157, 0.980392,
-               0.305882, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.101961, 0.67451, 0.321569, 0, 0, 0, 0, 0, 0, 0, 0.105882,
-               0.733333, 0.976471, 0.811765, 0.713725, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.65098, 0.992157,
-               0.321569, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.25098, 0.007843, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-               0.94902, 0.219608, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.968627,
-               0.764706, 0.152941, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.498039,
-               0.25098, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], [
-                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.298039, 0.333333, 0.333333, 0.333333, 0.337255, 0.333333,
-                  0.333333, 0.109804, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.027451, 0.223529, 0.776471,
-                  0.964706, 0.988235, 0.988235, 0.988235, 0.992157, 0.988235, 0.988235, 0.780392, 0.098039, 0, 0, 0, 0,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.14902, 0.698039, 0.988235, 0.992157, 0.988235, 0.901961, 0.87451,
-                  0.568627, 0.882353, 0.976471, 0.988235, 0.988235, 0.501961, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0.188235, 0.647059, 0.988235, 0.988235, 0.745098, 0.439216, 0.098039, 0, 0, 0, 0.572549, 0.988235,
-                  0.988235, 0.988235, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.2, 0.933333, 0.992157, 0.941176,
-                  0.247059, 0, 0, 0, 0, 0, 0, 0.188235, 0.898039, 0.992157, 0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0.039216, 0.639216, 0.933333, 0.988235, 0.913725, 0.278431, 0, 0, 0, 0, 0, 0, 0, 0.113725, 0.843137,
-                  0.988235, 0.988235, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.235294, 0.988235, 0.992157, 0.988235, 0.815686,
-                  0.07451, 0, 0, 0, 0, 0, 0, 0, 0.333333, 0.988235, 0.988235, 0.552941, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0.211765, 0.878431, 0.988235, 0.992157, 0.701961, 0.329412, 0.109804, 0, 0, 0, 0, 0, 0, 0, 0.698039,
-                  0.988235, 0.913725, 0.145098, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.188235, 0.890196, 0.988235, 0.988235,
-                  0.745098, 0.047059, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.882353, 0.988235, 0.568627, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0, 0.2, 0.933333, 0.992157, 0.992157, 0.992157, 0.447059, 0.294118, 0, 0, 0, 0, 0, 0, 0, 0, 0.447059,
-                  0.992157, 0.768627, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.623529, 0.988235, 0.988235, 0.988235, 0.988235,
-                  0.992157, 0.47451, 0, 0, 0, 0, 0, 0, 0, 0.188235, 0.933333, 0.87451, 0.509804, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0, 0, 0.992157, 0.988235, 0.937255, 0.792157, 0.988235, 0.894118, 0.082353, 0, 0, 0, 0, 0, 0,
-                  0.027451, 0.647059, 0.992157, 0.654902, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.623529, 0.988235, 0.913725,
-                  0.329412, 0.376471, 0.184314, 0, 0, 0, 0, 0, 0, 0.027451, 0.513725, 0.988235, 0.635294, 0.219608, 0,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.196078, 0.929412, 0.988235, 0.988235, 0.741176, 0.309804, 0, 0, 0, 0,
-                  0, 0, 0.529412, 0.988235, 0.678431, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.223529, 0.992157,
-                  0.992157, 1, 0.992157, 0.992157, 0.992157, 0.992157, 1, 0.992157, 0.992157, 0.882353, 0, 0, 0, 0, 0,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.023529, 0.478431, 0.654902, 0.658824, 0.952941, 0.988235, 0.988235,
-                  0.988235, 0.992157, 0.988235, 0.729412, 0.278431, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0, 0, 0, 0.196078, 0.647059, 0.764706, 0.764706, 0.768627, 0.580392, 0.047059, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                  0, 0, 0, 0, 0, 0, 0]]]
+TEST_DATA = [[[
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.215686, 0.533333, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.67451, 0.992157, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0.070588, 0.886275, 0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.192157,
+    0.070588, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.670588, 0.992157,
+    0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.117647, 0.933333, 0.858824, 0.313725,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.090196, 0.858824, 0.992157, 0.831373, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0.141176, 0.992157, 0.992157, 0.611765, 0.054902, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0.258824, 0.992157, 0.992157, 0.529412, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0.368627, 0.992157, 0.992157, 0.419608, 0.003922, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0.094118, 0.835294, 0.992157, 0.992157, 0.517647, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0.603922, 0.992157, 0.992157, 0.992157, 0.603922,
+    0.545098, 0.043137, 0, 0, 0, 0, 0, 0, 0, 0.447059, 0.992157, 0.992157,
+    0.956863, 0.062745, 0, 0, 0, 0, 0, 0, 0, 0, 0.011765, 0.666667, 0.992157,
+    0.992157, 0.992157, 0.992157, 0.992157, 0.745098, 0.137255, 0, 0, 0, 0, 0,
+    0.152941, 0.866667, 0.992157, 0.992157, 0.521569, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0.070588, 0.992157, 0.992157, 0.992157, 0.803922, 0.352941, 0.745098,
+    0.992157, 0.945098, 0.317647, 0, 0, 0, 0, 0.580392, 0.992157, 0.992157,
+    0.764706, 0.043137, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.070588, 0.992157, 0.992157,
+    0.776471, 0.043137, 0, 0.007843, 0.27451, 0.882353, 0.941176, 0.176471, 0,
+    0, 0.180392, 0.898039, 0.992157, 0.992157, 0.313725, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0.070588, 0.992157, 0.992157, 0.713725, 0, 0, 0, 0, 0.627451,
+    0.992157, 0.729412, 0.062745, 0, 0.509804, 0.992157, 0.992157, 0.776471,
+    0.035294, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.494118, 0.992157, 0.992157,
+    0.968627, 0.168627, 0, 0, 0, 0.423529, 0.992157, 0.992157, 0.364706, 0,
+    0.717647, 0.992157, 0.992157, 0.317647, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0.533333, 0.992157, 0.984314, 0.945098, 0.603922, 0, 0, 0, 0.003922,
+    0.466667, 0.992157, 0.988235, 0.976471, 0.992157, 0.992157, 0.788235,
+    0.007843, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.686275, 0.882353, 0.364706, 0,
+    0, 0, 0, 0, 0, 0.098039, 0.588235, 0.992157, 0.992157, 0.992157, 0.980392,
+    0.305882, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.101961, 0.67451, 0.321569,
+    0, 0, 0, 0, 0, 0, 0, 0.105882, 0.733333, 0.976471, 0.811765, 0.713725, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.65098, 0.992157, 0.321569, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0.25098, 0.007843, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+    0.94902, 0.219608, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0.968627, 0.764706, 0.152941, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.498039, 0.25098, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0
+]], [[
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0.298039, 0.333333, 0.333333, 0.333333, 0.337255,
+    0.333333, 0.333333, 0.109804, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0.027451, 0.223529, 0.776471, 0.964706, 0.988235, 0.988235, 0.988235,
+    0.992157, 0.988235, 0.988235, 0.780392, 0.098039, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0.14902, 0.698039, 0.988235, 0.992157, 0.988235, 0.901961,
+    0.87451, 0.568627, 0.882353, 0.976471, 0.988235, 0.988235, 0.501961, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.188235, 0.647059, 0.988235, 0.988235,
+    0.745098, 0.439216, 0.098039, 0, 0, 0, 0.572549, 0.988235, 0.988235,
+    0.988235, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.2, 0.933333, 0.992157,
+    0.941176, 0.247059, 0, 0, 0, 0, 0, 0, 0.188235, 0.898039, 0.992157,
+    0.992157, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.039216, 0.639216, 0.933333,
+    0.988235, 0.913725, 0.278431, 0, 0, 0, 0, 0, 0, 0, 0.113725, 0.843137,
+    0.988235, 0.988235, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.235294, 0.988235,
+    0.992157, 0.988235, 0.815686, 0.07451, 0, 0, 0, 0, 0, 0, 0, 0.333333,
+    0.988235, 0.988235, 0.552941, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.211765,
+    0.878431, 0.988235, 0.992157, 0.701961, 0.329412, 0.109804, 0, 0, 0, 0, 0,
+    0, 0, 0.698039, 0.988235, 0.913725, 0.145098, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0.188235, 0.890196, 0.988235, 0.988235, 0.745098, 0.047059, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0.882353, 0.988235, 0.568627, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.2,
+    0.933333, 0.992157, 0.992157, 0.992157, 0.447059, 0.294118, 0, 0, 0, 0, 0,
+    0, 0, 0, 0.447059, 0.992157, 0.768627, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0.623529, 0.988235, 0.988235, 0.988235, 0.988235, 0.992157, 0.47451, 0, 0,
+    0, 0, 0, 0, 0, 0.188235, 0.933333, 0.87451, 0.509804, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0.992157, 0.988235, 0.937255, 0.792157, 0.988235, 0.894118,
+    0.082353, 0, 0, 0, 0, 0, 0, 0.027451, 0.647059, 0.992157, 0.654902, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0.623529, 0.988235, 0.913725, 0.329412, 0.376471,
+    0.184314, 0, 0, 0, 0, 0, 0, 0.027451, 0.513725, 0.988235, 0.635294,
+    0.219608, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.196078, 0.929412, 0.988235,
+    0.988235, 0.741176, 0.309804, 0, 0, 0, 0, 0, 0, 0.529412, 0.988235,
+    0.678431, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.223529, 0.992157,
+    0.992157, 1, 0.992157, 0.992157, 0.992157, 0.992157, 1, 0.992157, 0.992157,
+    0.882353, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.023529,
+    0.478431, 0.654902, 0.658824, 0.952941, 0.988235, 0.988235, 0.988235,
+    0.992157, 0.988235, 0.729412, 0.278431, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0.196078, 0.647059, 0.764706, 0.764706, 0.768627,
+    0.580392, 0.047059, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0
+]]]
 
 
 def main():
     conf = parse_config("./mnist_model/trainer_config.py", "")
     print conf.data_config.load_data_args
-    network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+    network = swig_paddle.GradientMachine.createFromConfigProto(
+        conf.model_config)
     assert isinstance(network, swig_paddle.GradientMachine)  # For code hint.
     network.loadParameters("./mnist_model/")
     converter = DataProviderConverter([dense_vector(784)])
diff --git a/doc_cn/concepts/trainer_config.py b/doc_cn/concepts/trainer_config.py
index 8d8c79fb39e0c0ddf13aee5d41297506d3404362..3eccbd7bc11f4865130286de718d1be74e4d1722 100644
--- a/doc_cn/concepts/trainer_config.py
+++ b/doc_cn/concepts/trainer_config.py
@@ -1,23 +1,29 @@
 from paddle.trainer_config_helpers import *
 
-define_py_data_sources2(train_list='train.list',
-                        test_list='test.list',
-                        module='provider',
-                        obj='process')
+define_py_data_sources2(
+    train_list='train.list',
+    test_list='test.list',
+    module='provider',
+    obj='process')
 settings(
     batch_size=128,
     learning_rate=1e-3,
     learning_method=AdamOptimizer(),
-    regularization=L2Regularization(0.5)
-)
+    regularization=L2Regularization(0.5))
 
 img = data_layer(name='pixel', size=28 * 28)
 
-hidden1 = simple_img_conv_pool(input=img, filter_size=3, num_filters=32, pool_size=3,
-                               num_channel=1)
+hidden1 = simple_img_conv_pool(
+    input=img, filter_size=3, num_filters=32, pool_size=3, num_channel=1)
 
-hidden2 = fc_layer(input=hidden1, size=200, act=TanhActivation(),
-                   layer_attr=ExtraAttr(drop_rate=0.5))
+hidden2 = fc_layer(
+    input=hidden1,
+    size=200,
+    act=TanhActivation(),
+    layer_attr=ExtraAttr(drop_rate=0.5))
 predict = fc_layer(input=hidden2, size=10, act=SoftmaxActivation())
 
-outputs(classification_cost(input=predict, label=data_layer(name='label', size=10)))
+outputs(
+    classification_cost(
+        input=predict, label=data_layer(
+            name='label', size=10)))
diff --git a/doc_cn/faq/word2vec_config.py b/doc_cn/faq/word2vec_config.py
index e347252476eab670abfa2cf2dc126d96b6e04857..866b40c3d4c96c1213b3f716f29b14dd38763edb 100644
--- a/doc_cn/faq/word2vec_config.py
+++ b/doc_cn/faq/word2vec_config.py
@@ -1,8 +1,12 @@
-... # the settings and define data provider is omitted.
-DICT_DIM=3000  # dictionary dimension.
-word_ids=data_layer('word_ids', size=DICT_DIM)
+...  # the settings and define data provider is omitted.
+DICT_DIM = 3000  # dictionary dimension.
+word_ids = data_layer('word_ids', size=DICT_DIM)
 
-emb = embedding_layer(input=word_ids, size=256, param_attr=ParamAttr(sparse_update=True))
+emb = embedding_layer(
+    input=word_ids, size=256, param_attr=ParamAttr(sparse_update=True))
 emb_sum = pooling_layer(input=emb, pooling_type=SumPooling())
 predict = fc_layer(input=emb_sum, size=DICT_DIM, act=Softmax())
-outputs(classification_cost(input=predict, label=data_layer('label', size=DICT_DIM))) 
\ No newline at end of file
+outputs(
+    classification_cost(
+        input=predict, label=data_layer(
+            'label', size=DICT_DIM)))
diff --git a/doc_cn/faq/word2vec_dataprovider.py b/doc_cn/faq/word2vec_dataprovider.py
index a0a39080cece90c6c4096bba4396bfa91b3ef759..ec2753a7d01d7dd4d804c3bed0bac1be9c3fb3d3 100644
--- a/doc_cn/faq/word2vec_dataprovider.py
+++ b/doc_cn/faq/word2vec_dataprovider.py
@@ -1,8 +1,10 @@
-DICT_DIM=3000
+DICT_DIM = 3000
+
+
 @provider(input_types=[integer_sequence(DICT_DIM), integer_value(DICT_DIM)])
 def process(settings, filename):
-	with open(filename) as f:
-		# yield word ids to predict inner word id
-		# such as [28, 29, 10, 4], 4
-		# It means the sentance is  28, 29, 4, 10, 4.
-		yield read_next_from_file(f)
\ No newline at end of file
+    with open(filename) as f:
+        # yield word ids to predict inner word id
+        # such as [28, 29, 10, 4], 4
+        # It means the sentance is  28, 29, 4, 10, 4.
+        yield read_next_from_file(f)
diff --git a/doc_cn/ui/data_provider/mnist_config.py b/doc_cn/ui/data_provider/mnist_config.py
index 7ba344338c374a7f9e7e4faa804e2e124577c0be..39becff03b08f5e75b8503aaf01e782d2b0fb3be 100644
--- a/doc_cn/ui/data_provider/mnist_config.py
+++ b/doc_cn/ui/data_provider/mnist_config.py
@@ -1,8 +1,9 @@
 from paddle.trainer_config_helpers import *
 
-define_py_data_sources2(train_list='train.list',
-                        test_list=None,
-                        module='mnist_provider',
-                        obj='process')
+define_py_data_sources2(
+    train_list='train.list',
+    test_list=None,
+    module='mnist_provider',
+    obj='process')
 img = data_layer(name='pixel', size=784)
 label = data_layer(name='label', size=10)
diff --git a/doc_cn/ui/data_provider/mnist_provider.dict.py b/doc_cn/ui/data_provider/mnist_provider.dict.py
index bf13b56372b56a1e810fad159cd51371ef46c468..2ba0b126a0d6239f84950e130410aaaa6e1f24cd 100644
--- a/doc_cn/ui/data_provider/mnist_provider.dict.py
+++ b/doc_cn/ui/data_provider/mnist_provider.dict.py
@@ -2,10 +2,9 @@ from paddle.trainer.PyDataProvider2 import *
 
 
 # Define a py data provider
-@provider(input_types={
-    'pixel': dense_vector(28 * 28),
-    'label': integer_value(10)
-})
+@provider(
+    input_types={'pixel': dense_vector(28 * 28),
+                 'label': integer_value(10)})
 def process(settings, filename):  # settings is not used currently.
     f = open(filename, 'r')  # open one of training file
 
diff --git a/doc_cn/ui/data_provider/mnist_provider.py b/doc_cn/ui/data_provider/mnist_provider.py
index 92f1915c1072562a174a62b436de8f5b39dab2d4..8b828641d55735e67ca634107d5b239150649651 100644
--- a/doc_cn/ui/data_provider/mnist_provider.py
+++ b/doc_cn/ui/data_provider/mnist_provider.py
@@ -2,10 +2,7 @@ from paddle.trainer.PyDataProvider2 import *
 
 
 # Define a py data provider
-@provider(input_types=[
-    dense_vector(28 * 28),
-    integer_value(10)
-])
+@provider(input_types=[dense_vector(28 * 28), integer_value(10)])
 def process(settings, filename):  # settings is not used currently.
     f = open(filename, 'r')  # open one of training file
 
diff --git a/doc_cn/ui/data_provider/sentimental_config.py b/doc_cn/ui/data_provider/sentimental_config.py
index 051f75e32b5c0b1f36d27a54c42db94a4682ce7b..7ce71608a2372b2484ae40ccf01f0621728ddef2 100644
--- a/doc_cn/ui/data_provider/sentimental_config.py
+++ b/doc_cn/ui/data_provider/sentimental_config.py
@@ -3,9 +3,12 @@ from paddle.trainer_config_helpers import *
 dictionary = dict()
 ...  #  read dictionary from outside
 
-define_py_data_sources2(train_list='train.list', test_list=None,
-                        module='sentimental_provider', obj='process',
-                        # above codes same as mnist sample.
-                        args={  # pass to provider.
-                            'dictionary': dictionary
-                        })
+define_py_data_sources2(
+    train_list='train.list',
+    test_list=None,
+    module='sentimental_provider',
+    obj='process',
+    # above codes same as mnist sample.
+    args={  # pass to provider.
+        'dictionary': dictionary
+    })
diff --git a/doc_cn/ui/data_provider/sentimental_provider.py b/doc_cn/ui/data_provider/sentimental_provider.py
index bda37d7722a0bb98c2c681c790bb308c0e146515..0fb0bb88e95a230f01f18b78ebb37b659c3768f1 100644
--- a/doc_cn/ui/data_provider/sentimental_provider.py
+++ b/doc_cn/ui/data_provider/sentimental_provider.py
@@ -12,7 +12,8 @@ def on_init(settings, dictionary, **kwargs):
         # The text is a sequence of integer values, and each value is a word id.
         # The whole sequence is the sentences that we want to predict its
         # sentimental.
-        integer_value(len(dictionary), seq_type=SequenceType),  # text input
+        integer_value(
+            len(dictionary), seq_type=SequenceType),  # text input
 
         # label positive/negative
         integer_value(2)
diff --git a/paddle/api/__init__.py b/paddle/api/__init__.py
index 7f9e87eee6037666b86420fba194624859d356b3..c90af2ee000d46a032984ee23559e7e99b49ddad 100644
--- a/paddle/api/__init__.py
+++ b/paddle/api/__init__.py
@@ -11,4 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
diff --git a/paddle/api/paddle_ld_flags.py b/paddle/api/paddle_ld_flags.py
index 05d741f8859ba46893bff49681536d9187a3ed6e..ebe00798e8b7169ecbbef53e287ab4b78334bcf9 100644
--- a/paddle/api/paddle_ld_flags.py
+++ b/paddle/api/paddle_ld_flags.py
@@ -29,7 +29,10 @@ try:
         whole_start = ""
         whole_end = ""
 
-    LIB_DIRS = ["math", 'utils', 'parameter', "gserver", "api", "cuda", "pserver", "trainer"]
+    LIB_DIRS = [
+        "math", 'utils', 'parameter', "gserver", "api", "cuda", "pserver",
+        "trainer"
+    ]
     PARENT_LIB_DIRS = ['proto']
 
     class PaddleLDFlag(object):
@@ -55,19 +58,20 @@ try:
             self.curt = CUDA_LIBRARIES
 
         def ldflag_str(self):
-            return " ".join([self.libs_dir_str(),
-                             self.parent_dir_str(),
-                             self.libs_str()])
+            return " ".join(
+                [self.libs_dir_str(), self.parent_dir_str(), self.libs_str()])
 
         def libs_dir_str(self):
             libdirs = LIB_DIRS
-            return " ".join(map(lambda x: "-L" + os.path.join(self.paddle_build_dir, x),
-                                libdirs))
+            return " ".join(
+                map(lambda x: "-L" + os.path.join(self.paddle_build_dir, x),
+                    libdirs))
 
         def parent_dir_str(self):
             libdirs = PARENT_LIB_DIRS
-            return " ".join(map(lambda x: "-L" + os.path.join(self.paddle_build_dir, '..', x),
-                libdirs))
+            return " ".join(
+                map(lambda x: "-L" + os.path.join(self.paddle_build_dir, '..', x),
+                    libdirs))
 
         def libs_str(self):
             libs = [
@@ -113,10 +117,10 @@ try:
                 return cmake_flag
             elif cmake_flag.startswith("-l"):  # normal link command
                 return cmake_flag
-            elif cmake_flag in ["gflags-shared",
-                                "gflags-static",
-                                "gflags_nothreads-shared",
-                                "gflags_nothreads-static"]:  # special for gflags
+            elif cmake_flag in [
+                    "gflags-shared", "gflags-static", "gflags_nothreads-shared",
+                    "gflags_nothreads-static"
+            ]:  # special for gflags
                 assert PaddleLDFlag.cmake_bool(self.gflags_location)
                 return self.gflags_location
             elif len(cmake_flag) != 0:
@@ -132,18 +136,22 @@ try:
             :type cmake_str: str
             :rtype: bool
             """
-            if cmake_str in ["FALSE", "OFF", "NO"] or cmake_str.endswith("-NOTFOUND"):
+            if cmake_str in ["FALSE", "OFF", "NO"] or cmake_str.endswith(
+                    "-NOTFOUND"):
                 return False
             else:
                 return True
+
         def c_flag(self):
             if self.with_coverage:
                 return ["-fprofile-arcs", "-ftest-coverage", "-O0", "-g"]
             else:
                 return None
 except ImportError:
+
     class PaddleLDFlag(object):
         def ldflag_str(self):
             pass
+
         def c_flag(self):
             pass
diff --git a/paddle/api/test/testArguments.py b/paddle/api/test/testArguments.py
index daedd2409effccba27ff6818fc2603d3e1665bde..70fb169fd5c43d5768e67ad8e4c62a9f4d302eaf 100644
--- a/paddle/api/test/testArguments.py
+++ b/paddle/api/test/testArguments.py
@@ -32,7 +32,7 @@ class TestArguments(unittest.TestCase):
         iv = args.getSlotIds(0)
         assert isinstance(iv, swig_paddle.IVector)
         np_arr = iv.toNumpyArrayInplace()
-        self.assertEqual(np_arr.shape, (6,))
+        self.assertEqual(np_arr.shape, (6, ))
 
 
 if __name__ == '__main__':
diff --git a/paddle/api/test/testGradientMachine.py b/paddle/api/test/testGradientMachine.py
index 59b36a012a239730a1d0a5b239a3ba69f0cee1fb..e12613fbb8a66545dd3ad20d59b0b951e86e8683 100644
--- a/paddle/api/test/testGradientMachine.py
+++ b/paddle/api/test/testGradientMachine.py
@@ -30,8 +30,8 @@ class TestGradientMachine(unittest.TestCase):
         self.assertIsNotNone(model_config)
         machine = swig_paddle.GradientMachine.createByModelConfig(
             model_config, swig_paddle.CREATE_MODE_NORMAL,
-            swig_paddle.ParameterOptimizer.create(
-                opt_config).getParameterTypes())
+            swig_paddle.ParameterOptimizer.create(opt_config).getParameterTypes(
+            ))
         self.assertIsNotNone(machine)
         ipt, _ = util.loadMNISTTrainData()
         output = swig_paddle.Arguments.createArguments(0)
@@ -43,7 +43,7 @@ class TestGradientMachine(unittest.TestCase):
             assert isinstance(param, swig_paddle.Parameter)
             val = param.getBuf(swig_paddle.PARAMETER_VALUE)
             assert isinstance(val, swig_paddle.Vector)
-            arr = numpy.full((len(val),), 0.1, dtype="float32")
+            arr = numpy.full((len(val), ), 0.1, dtype="float32")
             val.copyFromNumpyArray(arr)
             param_config = param.getConfig().toProto()
             assert isinstance(param_config,
diff --git a/paddle/api/test/testMatrix.py b/paddle/api/test/testMatrix.py
index 2216ef30a58b0d97bba210bf0edee02a18264076..11035a9281656c49b6d1757dbac2f7f58cb7d8c8 100644
--- a/paddle/api/test/testMatrix.py
+++ b/paddle/api/test/testMatrix.py
@@ -69,7 +69,8 @@ class TestMatrix(unittest.TestCase):
     def test_numpy(self):
         numpy_mat = np.matrix([[1, 2], [3, 4], [5, 6]], dtype="float32")
         m = swig_paddle.Matrix.createCpuDenseFromNumpy(numpy_mat)
-        self.assertEqual((int(m.getHeight()), int(m.getWidth())), numpy_mat.shape)
+        self.assertEqual(
+            (int(m.getHeight()), int(m.getWidth())), numpy_mat.shape)
 
         # the numpy matrix and paddle matrix shared the same memory.
         numpy_mat[0, 1] = 342.23
diff --git a/paddle/api/test/testTrain.py b/paddle/api/test/testTrain.py
index 7759118a3d9d108f0c05d985ac74a5122799ccb4..a3ba4eaaa69b39b75e7ece3095b6f236c1248d41 100644
--- a/paddle/api/test/testTrain.py
+++ b/paddle/api/test/testTrain.py
@@ -98,7 +98,8 @@ def main():
         cost_vec = outArgs.getSlotValue(0)
         assert isinstance(cost_vec, swig_paddle.Matrix)
         cost_vec = cost_vec.copyToNumpyMat()
-        print 'Finish Batch', batch_id, 'with cost ', cost_vec.sum() / batch_size
+        print 'Finish Batch', batch_id, 'with cost ', cost_vec.sum(
+        ) / batch_size
         batch_id += 1
 
     for optimizer in optimizers:
diff --git a/paddle/api/test/testTrainConfig.py b/paddle/api/test/testTrainConfig.py
index 22148e31915da0c21609fe0694274cfaee4b3950..77e0cd37d566d2571fada76b9948a9b0616ad044 100644
--- a/paddle/api/test/testTrainConfig.py
+++ b/paddle/api/test/testTrainConfig.py
@@ -1,9 +1,6 @@
 from paddle.trainer_config_helpers import *
 
-settings(
-    batch_size=100,
-    learning_method=AdamOptimizer()
-)
+settings(batch_size=100, learning_method=AdamOptimizer())
 
 din = data_layer(name='input', size=784)
 
diff --git a/paddle/api/test/testTrainer.py b/paddle/api/test/testTrainer.py
index da69a60f84f4d7c6fad54fc116a31b54ef162f60..edd5a2da5785c405b46c2559ee93837ac68d7c3a 100644
--- a/paddle/api/test/testTrainer.py
+++ b/paddle/api/test/testTrainer.py
@@ -17,9 +17,9 @@ from paddle.trainer.config_parser import logger
 from py_paddle import swig_paddle
 import util
 
+
 def main():
-    trainer_config = parse_config(
-        "./testTrainConfig.py", "")
+    trainer_config = parse_config("./testTrainConfig.py", "")
     model = swig_paddle.GradientMachine.createFromConfigProto(
         trainer_config.model_config)
     trainer = swig_paddle.Trainer.create(trainer_config, model)
@@ -56,7 +56,7 @@ def main():
         logger.info('test cost=%f' % (cost / num))
 
     trainer.finishTrain()
- 
+
 
 if __name__ == '__main__':
     swig_paddle.initPaddle("--use_gpu=0", "--trainer_count=1")
diff --git a/paddle/api/test/testVector.py b/paddle/api/test/testVector.py
index f5b5d0e32e4208e7becb9755d1aed131f52ff146..5226df79eea3bedbf2b5b6f5fa684cc99a194f7c 100644
--- a/paddle/api/test/testVector.py
+++ b/paddle/api/test/testVector.py
@@ -112,5 +112,6 @@ class TestVector(unittest.TestCase):
 
 
 if __name__ == '__main__':
-    swig_paddle.initPaddle("--use_gpu=1" if swig_paddle.isGpuVersion() else "--use_gpu=0")
+    swig_paddle.initPaddle("--use_gpu=1"
+                           if swig_paddle.isGpuVersion() else "--use_gpu=0")
     unittest.main()
diff --git a/paddle/gserver/tests/__init__.py b/paddle/gserver/tests/__init__.py
index 7f9e87eee6037666b86420fba194624859d356b3..c90af2ee000d46a032984ee23559e7e99b49ddad 100644
--- a/paddle/gserver/tests/__init__.py
+++ b/paddle/gserver/tests/__init__.py
@@ -11,4 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
diff --git a/paddle/gserver/tests/pyDataProvider.py b/paddle/gserver/tests/pyDataProvider.py
index c3155e7adea04d472cca2ed74ec31c22bc2d1586..91863b4175b1a58cb7d475732f293f32a3a6ed5a 100644
--- a/paddle/gserver/tests/pyDataProvider.py
+++ b/paddle/gserver/tests/pyDataProvider.py
@@ -16,72 +16,79 @@ import numpy
 import struct
 import traceback
 
+
 def header_creator():
     ret = ""
-    ret += struct.pack('i', 3) # slot num
-    ret += struct.pack('i', 1) # sequence flag
-    ret += struct.pack('i', 0) # slot0 dense type
-    ret += struct.pack('i', 3) # slot0 dim
-    ret += struct.pack('i', 1) # slot1 sparse non value type
-    ret += struct.pack('i', 7) # slot1 dim
-    ret += struct.pack('i', 3) # slot2 index type
-    ret += struct.pack('i', 2) # slot2 dim
+    ret += struct.pack('i', 3)  # slot num
+    ret += struct.pack('i', 1)  # sequence flag
+    ret += struct.pack('i', 0)  # slot0 dense type
+    ret += struct.pack('i', 3)  # slot0 dim
+    ret += struct.pack('i', 1)  # slot1 sparse non value type
+    ret += struct.pack('i', 7)  # slot1 dim
+    ret += struct.pack('i', 3)  # slot2 index type
+    ret += struct.pack('i', 2)  # slot2 dim
     return ret
 
+
 def dense_value_creator(sample_num):
     ret = ""
-    ret += struct.pack('i', sample_num) # slot0 sample num
-    for i in range(sample_num): # slot0 value
+    ret += struct.pack('i', sample_num)  # slot0 sample num
+    for i in range(sample_num):  # slot0 value
         ret += struct.pack('f', 1.0)
         ret += struct.pack('f', 2.0)
         ret += struct.pack('f', 3.0)
     return ret
 
+
 def sparse_value_creator(sample_num):
     ret = ""
-    ret += struct.pack('i', sample_num) # slot1 sample num
-    for i in range(sample_num): # slot1 index
+    ret += struct.pack('i', sample_num)  # slot1 sample num
+    for i in range(sample_num):  # slot1 index
         ret += struct.pack('i', i * 2)
-    ret += struct.pack('i', sample_num * 2) #slot1 length
-    for i in range(sample_num): # slot1 value
+    ret += struct.pack('i', sample_num * 2)  #slot1 length
+    for i in range(sample_num):  # slot1 value
         ret += struct.pack('i', 1)
         ret += struct.pack('i', 2)
     return ret
 
+
 def index_value_creator(sample_num):
     ret = ""
-    ret += struct.pack('i', sample_num) # slot2 sample num
-    for i in range(sample_num): # slot2 value
+    ret += struct.pack('i', sample_num)  # slot2 sample num
+    for i in range(sample_num):  # slot2 value
         ret += struct.pack('i', 0)
     return ret
 
+
 def sequenceStartPositions_creator():
     ret = ""
-    ret += struct.pack('i', 2) # slot0 sequence num
-    ret += struct.pack('i', 0) # slot0 sequence value1
-    ret += struct.pack('i', 1) # slot0 sequence value2
-    ret += struct.pack('i', 1) # slot1 sequence num
-    ret += struct.pack('i', 0) # slot1 sequence value1
-    ret += struct.pack('i', 2) # slot2 sequence num
-    ret += struct.pack('i', 0) # slot2 sequence value1
-    ret += struct.pack('i', 1) # slot2 sequence value2
+    ret += struct.pack('i', 2)  # slot0 sequence num
+    ret += struct.pack('i', 0)  # slot0 sequence value1
+    ret += struct.pack('i', 1)  # slot0 sequence value2
+    ret += struct.pack('i', 1)  # slot1 sequence num
+    ret += struct.pack('i', 0)  # slot1 sequence value1
+    ret += struct.pack('i', 2)  # slot2 sequence num
+    ret += struct.pack('i', 0)  # slot2 sequence value1
+    ret += struct.pack('i', 1)  # slot2 sequence value2
     return ret
 
+
 def subSequenceStartPositions_creator():
     ret = ""
-    ret += struct.pack('i', 3) # slot0 subsequence num
-    ret += struct.pack('i', 0) # slot0 subsequence value1
-    ret += struct.pack('i', 1) # slot0 subsequence value2
-    ret += struct.pack('i', 2) # slot0 subsequence value3
-    ret += struct.pack('i', 2) # slot1 subsequence num
-    ret += struct.pack('i', 0) # slot1 subsequence value1
-    ret += struct.pack('i', 1) # slot1 subsequence value2
-    ret += struct.pack('i', 3) # slot2 subsequence num
-    ret += struct.pack('i', 0) # slot2 subsequence value1
-    ret += struct.pack('i', 1) # slot2 subsequence value2
-    ret += struct.pack('i', 2) # slot2 subsequence value3
+    ret += struct.pack('i', 3)  # slot0 subsequence num
+    ret += struct.pack('i', 0)  # slot0 subsequence value1
+    ret += struct.pack('i', 1)  # slot0 subsequence value2
+    ret += struct.pack('i', 2)  # slot0 subsequence value3
+    ret += struct.pack('i', 2)  # slot1 subsequence num
+    ret += struct.pack('i', 0)  # slot1 subsequence value1
+    ret += struct.pack('i', 1)  # slot1 subsequence value2
+    ret += struct.pack('i', 3)  # slot2 subsequence num
+    ret += struct.pack('i', 0)  # slot2 subsequence value1
+    ret += struct.pack('i', 1)  # slot2 subsequence value2
+    ret += struct.pack('i', 2)  # slot2 subsequence value3
     return ret
 
+
 class SimpleDataProvider:
     def __init__(self, *file_list):
         self.file_list = file_list
@@ -93,17 +100,18 @@ class SimpleDataProvider:
         pass
 
     def getHeader(self):
-       return  header_creator()
+        return header_creator()
 
     def getNextBatch(self, batch_size):
         ret = ""
-        ret += struct.pack('i', 2) # batch size
-        ret += dense_value_creator(2) # slot0
-        ret += sparse_value_creator(2) # slot1
-        ret += index_value_creator(2) # slot2
+        ret += struct.pack('i', 2)  # batch size
+        ret += dense_value_creator(2)  # slot0
+        ret += sparse_value_creator(2)  # slot1
+        ret += index_value_creator(2)  # slot2
         ret += sequenceStartPositions_creator()
         return ret
 
+
 class SimpleNestDataProvider:
     def __init__(self, *file_list):
         self.file_list = file_list
@@ -119,14 +127,15 @@ class SimpleNestDataProvider:
 
     def getNextBatch(self, batch_size):
         ret = ""
-        ret += struct.pack('i', 2) # batch size
-        ret += dense_value_creator(4) # slot0
-        ret += sparse_value_creator(4) # slot1
-        ret += index_value_creator(4) # slot2
+        ret += struct.pack('i', 2)  # batch size
+        ret += dense_value_creator(4)  # slot0
+        ret += sparse_value_creator(4)  # slot1
+        ret += index_value_creator(4)  # slot2
         ret += sequenceStartPositions_creator()
         ret += subSequenceStartPositions_creator()
         return ret
 
+
 if __name__ == "__main__":
     # test code
     data_provider = SimpleDataProvider('./test_batch')
diff --git a/paddle/gserver/tests/rnn_data_provider.py b/paddle/gserver/tests/rnn_data_provider.py
index 321c78cb1741bcfcbd7df2fd83ff6ba5ba910971..715ac08a42d05cec9c7f4b09a0447d44835d417d 100644
--- a/paddle/gserver/tests/rnn_data_provider.py
+++ b/paddle/gserver/tests/rnn_data_provider.py
@@ -22,18 +22,20 @@ data = [
     [[[0, 2], [2, 5], [0, 1, 2]], 1],
 ]
 
+
 # Used for sequence_nest_rnn.conf
-@provider(input_types=[integer_value_sub_sequence(10),
-                       integer_value(3)],
-          should_shuffle=False)
+@provider(
+    input_types=[integer_value_sub_sequence(10), integer_value(3)],
+    should_shuffle=False)
 def process_subseq(settings, file_name):
     for d in data:
         yield d
 
+
 # Used for sequence_rnn.conf
-@provider(input_types=[integer_value_sequence(10),
-                       integer_value(3)],
-          should_shuffle=False)
+@provider(
+    input_types=[integer_value_sequence(10), integer_value(3)],
+    should_shuffle=False)
 def process_seq(settings, file_name):
     for d in data:
         seq = []
@@ -41,18 +43,20 @@ def process_seq(settings, file_name):
             seq += subseq
         yield seq, d[1]
 
+
 # Used for sequence_nest_rnn_multi_input.conf
-@provider(input_types=[integer_value_sub_sequence(10),
-                       integer_value(3)],
-          should_shuffle=False)
+@provider(
+    input_types=[integer_value_sub_sequence(10), integer_value(3)],
+    should_shuffle=False)
 def process_subseq2(settings, file_name):
     for d in data:
         yield d
 
+
 # Used for sequence_rnn_multi_input.conf
-@provider(input_types=[integer_value_sequence(10),
-                       integer_value(3)],
-          should_shuffle=False)
+@provider(
+    input_types=[integer_value_sequence(10), integer_value(3)],
+    should_shuffle=False)
 def process_seq2(settings, file_name):
     for d in data:
         seq = []
@@ -60,31 +64,34 @@ def process_seq2(settings, file_name):
             seq += subseq
         yield seq, d[1]
 
+
 ###########################################################
 data2 = [
-    [[[1, 2], [4, 5, 2]], [[5, 4, 1], [3, 1]] ,0],
-    [[[0, 2], [2, 5], [0, 1, 2]],[[1, 5], [4], [2, 3, 6, 1]], 1],
+    [[[1, 2], [4, 5, 2]], [[5, 4, 1], [3, 1]], 0],
+    [[[0, 2], [2, 5], [0, 1, 2]], [[1, 5], [4], [2, 3, 6, 1]], 1],
 ]
 
+
 # Used for sequence_nest_rnn_multi_unequalength_inputs.conf
-@provider(input_types=[integer_value_sub_sequence(10),
-                       integer_value_sub_sequence(10),
-                       integer_value(2)],
-          should_shuffle=False)
+@provider(
+    input_types=[
+        integer_value_sub_sequence(10), integer_value_sub_sequence(10),
+        integer_value(2)
+    ],
+    should_shuffle=False)
 def process_unequalength_subseq(settings, file_name):
     for d in data2:
         yield d
 
 
 # Used for sequence_rnn_multi_unequalength_inputs.conf
-@provider(input_types=[integer_value_sequence(10),
-                       integer_value_sequence(10),
-                       integer_value(2)],
-          should_shuffle=False)
+@provider(
+    input_types=[
+        integer_value_sequence(10), integer_value_sequence(10), integer_value(2)
+    ],
+    should_shuffle=False)
 def process_unequalength_seq(settings, file_name):
     for d in data2:
-        words1=reduce(lambda x,y: x+y, d[0])
-        words2=reduce(lambda x,y: x+y, d[1])
+        words1 = reduce(lambda x, y: x + y, d[0])
+        words2 = reduce(lambda x, y: x + y, d[1])
         yield words1, words2, d[2]
-
-
diff --git a/paddle/gserver/tests/sequenceGen.py b/paddle/gserver/tests/sequenceGen.py
index b166e778d7a33f444b91d6b37c74352a72f4ac10..fab876fd30da0a80774d06028ae2321e12354d59 100644
--- a/paddle/gserver/tests/sequenceGen.py
+++ b/paddle/gserver/tests/sequenceGen.py
@@ -20,8 +20,9 @@ from paddle.trainer.PyDataProvider2 import *
 
 def hook(settings, dict_file, **kwargs):
     settings.word_dict = dict_file
-    settings.input_types = [integer_value_sequence(len(settings.word_dict)),
-                            integer_value(3)]
+    settings.input_types = [
+        integer_value_sequence(len(settings.word_dict)), integer_value(3)
+    ]
     settings.logger.info('dict len : %d' % (len(settings.word_dict)))
 
 
@@ -32,16 +33,19 @@ def process(settings, file_name):
             label, comment = line.strip().split('\t')
             label = int(''.join(label.split()))
             words = comment.split()
-            word_slot = [settings.word_dict[w] for w in words if
-                         w in settings.word_dict]
+            word_slot = [
+                settings.word_dict[w] for w in words if w in settings.word_dict
+            ]
             yield word_slot, label
 
 
 ## for hierarchical sequence network
 def hook2(settings, dict_file, **kwargs):
     settings.word_dict = dict_file
-    settings.input_types = [integer_value_sub_sequence(len(settings.word_dict)),
-                            integer_value_sequence(3)]
+    settings.input_types = [
+        integer_value_sub_sequence(len(settings.word_dict)),
+        integer_value_sequence(3)
+    ]
     settings.logger.info('dict len : %d' % (len(settings.word_dict)))
 
 
@@ -55,8 +59,10 @@ def process2(settings, file_name):
                 label, comment = line.strip().split('\t')
                 label = int(''.join(label.split()))
                 words = comment.split()
-                word_slot = [settings.word_dict[w] for w in words if
-                             w in settings.word_dict]
+                word_slot = [
+                    settings.word_dict[w] for w in words
+                    if w in settings.word_dict
+                ]
                 label_list.append(label)
                 word_slot_list.append(word_slot)
             else:
diff --git a/paddle/gserver/tests/sequence_layer_group.conf b/paddle/gserver/tests/sequence_layer_group.conf
index ac031b31280df297246c1ea2e279fc2c595bd8b7..087aa96ccb5a7fc2b6d4f5ce81de4e820580570a 100644
--- a/paddle/gserver/tests/sequence_layer_group.conf
+++ b/paddle/gserver/tests/sequence_layer_group.conf
@@ -21,15 +21,16 @@ dict_file = dict()
 for line_count, line in enumerate(open(dict_path, "r")):
     dict_file[line.strip()] = line_count
 
-define_py_data_sources2(train_list='gserver/tests/Sequence/train.list',
-                        test_list=None,
-                        module='sequenceGen',
-                        obj='process',
-                        args={"dict_file":dict_file})
+define_py_data_sources2(
+    train_list='gserver/tests/Sequence/train.list',
+    test_list=None,
+    module='sequenceGen',
+    obj='process',
+    args={"dict_file": dict_file})
 
 settings(batch_size=5)
 ######################## network configure ################################
-dict_dim = len(open(dict_path,'r').readlines())
+dict_dim = len(open(dict_path, 'r').readlines())
 word_dim = 128
 hidden_dim = 256
 label_dim = 3
@@ -39,21 +40,24 @@ data = data_layer(name="word", size=dict_dim)
 emb = embedding_layer(input=data, size=word_dim)
 
 # (lstm_input + lstm) is equal to lstmemory 
-with mixed_layer(size=hidden_dim*4) as lstm_input:
+with mixed_layer(size=hidden_dim * 4) as lstm_input:
     lstm_input += full_matrix_projection(input=emb)
 
-lstm = lstmemory_group(input=lstm_input,
-                       size=hidden_dim,
-                       act=TanhActivation(),
-                       gate_act=SigmoidActivation(),
-                       state_act=TanhActivation(),
-                       lstm_layer_attr=ExtraLayerAttribute(error_clipping_threshold=50))
+lstm = lstmemory_group(
+    input=lstm_input,
+    size=hidden_dim,
+    act=TanhActivation(),
+    gate_act=SigmoidActivation(),
+    state_act=TanhActivation(),
+    lstm_layer_attr=ExtraLayerAttribute(error_clipping_threshold=50))
 
 lstm_last = last_seq(input=lstm)
 
-with mixed_layer(size=label_dim, 
-                 act=SoftmaxActivation(), 
-                 bias_attr=True) as output:
+with mixed_layer(
+        size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output:
     output += full_matrix_projection(input=lstm_last)
 
-outputs(classification_cost(input=output, label=data_layer(name="label", size=1)))
+outputs(
+    classification_cost(
+        input=output, label=data_layer(
+            name="label", size=1)))
diff --git a/paddle/gserver/tests/sequence_nest_layer_group.conf b/paddle/gserver/tests/sequence_nest_layer_group.conf
index 38c60b657b969f9fbcf46a00c542fa100da5a877..93a0f6da7905c0b00cf70296143ded2d4431e430 100644
--- a/paddle/gserver/tests/sequence_nest_layer_group.conf
+++ b/paddle/gserver/tests/sequence_nest_layer_group.conf
@@ -21,15 +21,16 @@ dict_file = dict()
 for line_count, line in enumerate(open(dict_path, "r")):
     dict_file[line.strip()] = line_count
 
-define_py_data_sources2(train_list='gserver/tests/Sequence/train.list.nest',
-                        test_list=None,
-                        module='sequenceGen',
-                        obj='process2',
-                        args={"dict_file":dict_file})
+define_py_data_sources2(
+    train_list='gserver/tests/Sequence/train.list.nest',
+    test_list=None,
+    module='sequenceGen',
+    obj='process2',
+    args={"dict_file": dict_file})
 
 settings(batch_size=2)
 ######################## network configure ################################
-dict_dim = len(open(dict_path,'r').readlines())
+dict_dim = len(open(dict_path, 'r').readlines())
 word_dim = 128
 hidden_dim = 256
 label_dim = 3
@@ -38,37 +39,46 @@ data = data_layer(name="word", size=dict_dim)
 
 emb_group = embedding_layer(input=data, size=word_dim)
 
+
 # (lstm_input + lstm) is equal to lstmemory 
 def lstm_group(lstm_group_input):
-    with mixed_layer(size=hidden_dim*4) as group_input:
-      group_input += full_matrix_projection(input=lstm_group_input)
+    with mixed_layer(size=hidden_dim * 4) as group_input:
+        group_input += full_matrix_projection(input=lstm_group_input)
 
-    lstm_output = lstmemory_group(input=group_input,
-                                  name="lstm_group",
-                                  size=hidden_dim,
-                                  act=TanhActivation(),
-                                  gate_act=SigmoidActivation(),
-                                  state_act=TanhActivation(),
-                                  lstm_layer_attr=ExtraLayerAttribute(error_clipping_threshold=50))
+    lstm_output = lstmemory_group(
+        input=group_input,
+        name="lstm_group",
+        size=hidden_dim,
+        act=TanhActivation(),
+        gate_act=SigmoidActivation(),
+        state_act=TanhActivation(),
+        lstm_layer_attr=ExtraLayerAttribute(error_clipping_threshold=50))
     return lstm_output
 
-lstm_nest_group = recurrent_group(input=SubsequenceInput(emb_group),
-                                  step=lstm_group,
-                                  name="lstm_nest_group")
+
+lstm_nest_group = recurrent_group(
+    input=SubsequenceInput(emb_group), step=lstm_group, name="lstm_nest_group")
 # hasSubseq ->(seqlastins) seq
-lstm_last = last_seq(input=lstm_nest_group, agg_level=AggregateLevel.EACH_SEQUENCE)
+lstm_last = last_seq(
+    input=lstm_nest_group, agg_level=AggregateLevel.EACH_SEQUENCE)
 
 # seq ->(expand) hasSubseq
-lstm_expand = expand_layer(input=lstm_last, expand_as=emb_group, expand_level=ExpandLevel.FROM_SEQUENCE)
+lstm_expand = expand_layer(
+    input=lstm_last,
+    expand_as=emb_group,
+    expand_level=ExpandLevel.FROM_SEQUENCE)
 
 # hasSubseq ->(average) seq
-lstm_average = pooling_layer(input=lstm_expand,
-                             pooling_type=AvgPooling(),
-                             agg_level=AggregateLevel.EACH_SEQUENCE)
+lstm_average = pooling_layer(
+    input=lstm_expand,
+    pooling_type=AvgPooling(),
+    agg_level=AggregateLevel.EACH_SEQUENCE)
 
-with mixed_layer(size=label_dim, 
-                 act=SoftmaxActivation(), 
-                 bias_attr=True) as output:
+with mixed_layer(
+        size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output:
     output += full_matrix_projection(input=lstm_average)
 
-outputs(classification_cost(input=output, label=data_layer(name="label", size=1)))
+outputs(
+    classification_cost(
+        input=output, label=data_layer(
+            name="label", size=1)))
diff --git a/paddle/gserver/tests/test_PyDataProvider2.py b/paddle/gserver/tests/test_PyDataProvider2.py
index 71c3335231e52132e6c7e9aaf0cb92d0db2e20df..7ca30198fb1d0e7384db2c28524c7898dcd27e50 100644
--- a/paddle/gserver/tests/test_PyDataProvider2.py
+++ b/paddle/gserver/tests/test_PyDataProvider2.py
@@ -33,16 +33,19 @@ def test_init_hooker(setting, value, **kwargs):
     setting.value = value
 
 
-@provider(input_types=[dense_vector(20, seq_type=SequenceType.NO_SEQUENCE)],
-          init_hook=test_init_hooker)
+@provider(
+    input_types=[dense_vector(
+        20, seq_type=SequenceType.NO_SEQUENCE)],
+    init_hook=test_init_hooker)
 def test_init_hook(setting, filename):
     for i in xrange(200):
         yield setting.value
 
 
-@provider(
-    input_types=[
-        sparse_binary_vector(30000, seq_type=SequenceType.NO_SEQUENCE)])
+@provider(input_types=[
+    sparse_binary_vector(
+        30000, seq_type=SequenceType.NO_SEQUENCE)
+])
 def test_sparse_non_value_no_seq(setting, filename):
     for i in xrange(200):
         yield [(i + 1) * (j + 1) for j in xrange(10)]
@@ -77,28 +80,28 @@ def test_min_pool_size(setting, filename):
         yield random.randint(0, 100 - 1)
 
 
-@provider(input_types=[index_slot(100, seq_type=SequenceType.SEQUENCE)],
-          can_over_batch_size=False,
-          calc_batch_size=lambda x: len(x[0]))
+@provider(
+    input_types=[index_slot(
+        100, seq_type=SequenceType.SEQUENCE)],
+    can_over_batch_size=False,
+    calc_batch_size=lambda x: len(x[0]))
 def test_can_over_batch_size(setting, filename):
     for _ in xrange(1 << 10):
         seq_len = random.randint(0, 99)
         yield [random.randint(0, 100 - 1) for _ in xrange(seq_len)]
 
 
-@provider(input_types={'input1':index_slot(10), 'input2': index_slot(10)})
+@provider(input_types={'input1': index_slot(10), 'input2': index_slot(10)})
 def test_input_order(setting, filename):
     for _ in xrange(1000):
-        yield {
-            'input1': 0,
-            'input2': 1
-        }
+        yield {'input1': 0, 'input2': 1}
 
 
-@provider(input_types=[index_slot(10)],
-          check=True,
-          check_fail_continue=True,
-          should_shuffle="123")  # also test should shuffle
+@provider(
+    input_types=[index_slot(10)],
+    check=True,
+    check_fail_continue=True,
+    should_shuffle="123")  # also test should shuffle
 def test_check(settings, filename):
     yield_good_value = False
 
@@ -108,4 +111,3 @@ def test_check(settings, filename):
             if i < 10:
                 yield_good_value = True
             yield i
-
diff --git a/paddle/py_paddle/__init__.py b/paddle/py_paddle/__init__.py
index f372068942ea36a05c1433b482731bf112bfa51e..f8399f9c63d81f5a52bf2b277789c26d809f0153 100644
--- a/paddle/py_paddle/__init__.py
+++ b/paddle/py_paddle/__init__.py
@@ -15,9 +15,10 @@
 from util import DataProviderWrapperConverter
 from dataprovider_converter import DataProviderConverter
 
-__all__ = ['paddle',
-           'DataProviderConverter',
-           'DataProviderWrapperConverter',  # for deprecated usage.
-           'loadParameterFile']
+__all__ = [
+    'paddle',
+    'DataProviderConverter',
+    'DataProviderWrapperConverter',  # for deprecated usage.
+    'loadParameterFile'
+]
 util.monkeypatches()
-
diff --git a/paddle/py_paddle/dataprovider_converter.py b/paddle/py_paddle/dataprovider_converter.py
index dd2e146d112c055a68c8279417ce07d06fa10a7e..d64c7b20cb65a4b8dfebfc516cfc2c3fdc247114 100644
--- a/paddle/py_paddle/dataprovider_converter.py
+++ b/paddle/py_paddle/dataprovider_converter.py
@@ -45,10 +45,8 @@ class DenseScanner(IScanner):
     def finish_scan(self, argument):
         assert isinstance(argument, swig_paddle.Arguments)
         assert isinstance(self.input_type, dp2.InputType)
-        m = swig_paddle.Matrix.createDense(self.__mat__,
-                                           self.__height__,
-                                           self.input_type.dim,
-                                           False)
+        m = swig_paddle.Matrix.createDense(self.__mat__, self.__height__,
+                                           self.input_type.dim, False)
         argument.setSlotValue(self.pos, m)
 
 
@@ -141,8 +139,10 @@ class DataProviderConverter(object):
         assert isinstance(argument, swig_paddle.Arguments)
         argument.resize(len(self.input_types))
 
-        scanners = [DataProviderConverter.create_scanner(i, each_type)
-                    for i, each_type in enumerate(self.input_types)]
+        scanners = [
+            DataProviderConverter.create_scanner(i, each_type)
+            for i, each_type in enumerate(self.input_types)
+        ]
 
         for each_sample in dat:
             for each_step, scanner in zip(each_sample, scanners):
@@ -171,11 +171,14 @@ class DataProviderConverter(object):
         assert retv is not None
 
         if each.seq_type == dp2.SequenceType.SUB_SEQUENCE:
-            retv = SequenceScanner(each, i, retv, lambda a, p, seq:
-            a.setSlotSubSequenceStartPositions(p, seq))
-
-        if each.seq_type in [dp2.SequenceType.SUB_SEQUENCE,
-                             dp2.SequenceType.SEQUENCE]:
-            retv = SequenceScanner(each, i, retv, lambda a, p, seq:
-            a.setSlotSequenceStartPositions(p, seq))
+            retv = SequenceScanner(
+                each, i, retv,
+                lambda a, p, seq: a.setSlotSubSequenceStartPositions(p, seq))
+
+        if each.seq_type in [
+                dp2.SequenceType.SUB_SEQUENCE, dp2.SequenceType.SEQUENCE
+        ]:
+            retv = SequenceScanner(
+                each, i, retv,
+                lambda a, p, seq: a.setSlotSequenceStartPositions(p, seq))
         return retv
diff --git a/paddle/py_paddle/util.py b/paddle/py_paddle/util.py
index 53f67a861e7d972648cfd22f451c6e56fa5aa149..8ebcb346100c297948e0eb9a147c866f6bbca15d 100644
--- a/paddle/py_paddle/util.py
+++ b/paddle/py_paddle/util.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 Some Useful method for py_paddle.
 """
@@ -79,6 +78,7 @@ class __ParameterCallbackWrapper__(swig_paddle.UpdateCallback):
         else:
             return __ParameterCallbackWrapper__(callback).__disown__()
 
+
 def __arguments_to_numpy__(i, arg):
     assert isinstance(arg, swig_paddle.Arguments)
     value = arg.getSlotValue(i)
@@ -89,10 +89,8 @@ def __arguments_to_numpy__(i, arg):
     if ids is not None:
         assert isinstance(ids, swig_paddle.IVector)
         ids = ids.copyToNumpyArray()
-    return {
-        "value": value,
-        "id": ids
-    }
+    return {"value": value, "id": ids}
+
 
 def __monkeypatch_gradient_machine__():
     """
@@ -102,7 +100,6 @@ def __monkeypatch_gradient_machine__():
     swig_paddle.GradientMachine.loadFromConfigFile = \
         staticmethod(loadGradientMachine)
 
-
     def __matrix_to_numpy__(m):
         if isinstance(m, swig_paddle.Matrix):
             return m.copyToNumpyMat()
@@ -113,9 +110,11 @@ def __monkeypatch_gradient_machine__():
 
     def createFromConfigProto(protoObj,
                               createMode=swig_paddle.CREATE_MODE_NORMAL,
-                              paramTypes=[swig_paddle.PARAMETER_VALUE,
-                                          swig_paddle.PARAMETER_GRADIENT,
-                                          swig_paddle.PARAMETER_MOMENTUM]):
+                              paramTypes=[
+                                  swig_paddle.PARAMETER_VALUE,
+                                  swig_paddle.PARAMETER_GRADIENT,
+                                  swig_paddle.PARAMETER_MOMENTUM
+                              ]):
         """
         Create Gradient Machine From Proto object.
         :param protoObj: Model config
@@ -145,8 +144,10 @@ def __monkeypatch_gradient_machine__():
         """
         outArgs = swig_paddle.Arguments.createArguments(0)
         self.forward(inArgs, outArgs, swig_paddle.PASS_TEST)
-        return [__arguments_to_numpy__(i, outArgs) for i in xrange(
-            outArgs.getSlotNum())]
+        return [
+            __arguments_to_numpy__(i, outArgs)
+            for i in xrange(outArgs.getSlotNum())
+        ]
 
     swig_paddle.GradientMachine.forwardTest = forwardTest
 
@@ -167,7 +168,10 @@ def __monkeypatch_gradient_machine__():
     swig_paddle.GradientMachine.__forwardBackward__ = \
         swig_paddle.GradientMachine.forwardBackward
 
-    def forwardBackward(self, inArgs, outArgs, passType,
+    def forwardBackward(self,
+                        inArgs,
+                        outArgs,
+                        passType,
                         callback=swig_paddle.UpdateCallback()):
         """
         GradientMachine forward backward.
@@ -315,9 +319,8 @@ class DataProviderWrapperConverter(object):
             self.cols += other
 
         def __call__(self, slot_idx, arg):
-            mat = swig_paddle.Matrix.createSparse(len(self.indices) - 1,
-                                                  self.dim,
-                                                  len(self.cols), True)
+            mat = swig_paddle.Matrix.createSparse(
+                len(self.indices) - 1, self.dim, len(self.cols), True)
             assert isinstance(mat, swig_paddle.Matrix)
             mat.sparseCopyFrom(self.indices, self.cols)
             self.putIntoArg(slot_idx, arg, mat)
@@ -341,9 +344,8 @@ class DataProviderWrapperConverter(object):
             self.values += map(lambda x: x[1], other)
 
         def __call__(self, slot_idx, arg):
-            mat = swig_paddle.Matrix.createSparse(len(self.indices) - 1,
-                                                  self.dim,
-                                                  len(self.cols), False)
+            mat = swig_paddle.Matrix.createSparse(
+                len(self.indices) - 1, self.dim, len(self.cols), False)
             assert isinstance(mat, swig_paddle.Matrix)
             mat.sparseCopyFrom(self.indices, self.cols, self.values)
             self.putIntoArg(slot_idx, arg, mat)
@@ -352,8 +354,9 @@ class DataProviderWrapperConverter(object):
         paddle.trainer.PyDataProviderWrapper.DenseSlot: DenseValueConverter,
         paddle.trainer.PyDataProviderWrapper.IndexSlot: IdValueConverter,
         paddle.trainer.PyDataProviderWrapper.SparseNonValueSlot:
-            SparseNonValueConverter,
-        paddle.trainer.PyDataProviderWrapper.SparseValueSlot: SparseValueConverter
+        SparseNonValueConverter,
+        paddle.trainer.PyDataProviderWrapper.SparseValueSlot:
+        SparseValueConverter
     }
 
     def __init__(self, use_seq, header):
@@ -381,10 +384,9 @@ class DataProviderWrapperConverter(object):
         assert isinstance(argument, swig_paddle.Arguments)
         argument.resize(len(self.__header__))
 
-        values = map(lambda x:
-                     DataProviderWrapperConverter.__SLOT_VALUE_CONVERTER_MAP__[
-                         x.__class__](x),
-                     self.__header__)
+        values = map(
+            lambda x: DataProviderWrapperConverter.__SLOT_VALUE_CONVERTER_MAP__[x.__class__](x),
+            self.__header__)
 
         if self.__use_seq__:
             seq_dim = [[] for _ in xrange(self.__header__.__len__())]
@@ -394,14 +396,13 @@ class DataProviderWrapperConverter(object):
                 for slot_idx, sequence in enumerate(each_sample):
                     for raw_data in sequence:
                         values[slot_idx].append(raw_data)
-                    seq_start_pos[slot_idx].append(
-                        seq_start_pos[slot_idx][-1] + len(sequence))
+                    seq_start_pos[slot_idx].append(seq_start_pos[slot_idx][-1] +
+                                                   len(sequence))
                     seq_dim[slot_idx].append(len(sequence))
 
             for slot_idx in xrange(len(self.__header__)):
-                argument.setSlotSequenceDim(slot_idx,
-                                            swig_paddle.IVector.create(
-                                                seq_dim[slot_idx]))
+                argument.setSlotSequenceDim(
+                    slot_idx, swig_paddle.IVector.create(seq_dim[slot_idx]))
                 argument.setSlotSequenceStartPositions(
                     slot_idx,
                     swig_paddle.IVector.create(seq_start_pos[slot_idx]))
@@ -422,7 +423,6 @@ class DataProviderWrapperConverter(object):
         return self.convert(wrapper_data, argument)
 
 
-
 def __monkey_patch_protobuf_objects__():
     def ParameterConfig_toProto(self):
         """
@@ -459,8 +459,7 @@ def __monkey_patch_protobuf_objects__():
         :return: paddle.OptimizationConfig
         """
 
-        assert isinstance(protoObj,
-                          paddle.proto.OptimizationConfig)
+        assert isinstance(protoObj, paddle.proto.OptimizationConfig)
         return swig_paddle.OptimizationConfig.createFromProtoString(
             protoObj.SerializeToString())
 
@@ -475,8 +474,7 @@ def __monkey_patch_protobuf_objects__():
         :param protoObj: proto.TrainerConfig
         :return: paddle.TrainerConfig
         """
-        assert isinstance(protoObj,
-                          paddle.proto.TrainerConfig)
+        assert isinstance(protoObj, paddle.proto.TrainerConfig)
         return swig_paddle.TrainerConfig.createFromProtoString(
             protoObj.SerializeToString())
 
@@ -537,6 +535,7 @@ def __monkey_patch_trainer__():
             assert isinstance(model, swig_paddle.GradientMachine)
         return swig_paddle.Trainer.__create__(
             swig_paddle.TrainerConfig.createFromProto(config), model)
+
     swig_paddle.Trainer.create = staticmethod(Trainer_create)
 
     swig_paddle.Trainer.__getForwardOutput__ = \
@@ -551,14 +550,19 @@ def __monkey_patch_trainer__():
                  numpy.ndarray.
         """
         outArgs = self.__getForwardOutput__()
-        return [__arguments_to_numpy__(i, outArgs) for i in xrange(
-            outArgs.getSlotNum())]
+        return [
+            __arguments_to_numpy__(i, outArgs)
+            for i in xrange(outArgs.getSlotNum())
+        ]
 
     swig_paddle.Trainer.getForwardOutput = getForwardOutput
 
+
 def monkeypatches():
-    patches = [__monkeypatch_init_paddle__, __monkeypatch_gradient_machine__,
-               __monkey_patch_protobuf_objects__,
-               __monkey_patch_parameter__, __monkey_patch_trainer__]
+    patches = [
+        __monkeypatch_init_paddle__, __monkeypatch_gradient_machine__,
+        __monkey_patch_protobuf_objects__, __monkey_patch_parameter__,
+        __monkey_patch_trainer__
+    ]
     for patch in patches:
         patch()
diff --git a/paddle/scripts/cluster_train/conf.py b/paddle/scripts/cluster_train/conf.py
index c8fd360e7552ed7c0f11aaa06574a11344c44aba..f1114a59201b9e57a14b739a327b622327c515f7 100644
--- a/paddle/scripts/cluster_train/conf.py
+++ b/paddle/scripts/cluster_train/conf.py
@@ -13,17 +13,14 @@
 # limitations under the License.
 
 HOSTS = [
-        "root@192.168.100.17",
-        "root@192.168.100.18",
-        ]
-
+    "root@192.168.100.17",
+    "root@192.168.100.18",
+]
 '''
 workspace configuration
 '''
 #root dir for workspace, can be set as any director with real user account
 ROOT_DIR = "/home/paddle"
-
-
 '''
 network configuration
 '''
@@ -37,4 +34,4 @@ PADDLE_PORTS_NUM = 2
 PADDLE_PORTS_NUM_FOR_SPARSE = 2
 
 #environments setting for all processes in cluster job
-LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/lib64"
+LD_LIBRARY_PATH = "/usr/local/cuda/lib64:/usr/lib64"
diff --git a/paddle/scripts/cluster_train/paddle.py b/paddle/scripts/cluster_train/paddle.py
index 79698c72e619fa48c42d91d41abab61e2a5902ee..7343a600c1bf5522ac8b0cd90a38f8a362ba7ae6 100644
--- a/paddle/scripts/cluster_train/paddle.py
+++ b/paddle/scripts/cluster_train/paddle.py
@@ -12,8 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-
 """ module for launching cluster job """
 
 import os
@@ -23,13 +21,13 @@ import copy
 import time
 import signal
 
-
 from fabric.api import run, put, settings, env, prefix
 from fabric.tasks import execute
 
 #configuration for cluster
 import conf
 
+
 def refine_unknown_args(cmd_args):
     '''
     refine unknown parameters to handle some special parameters
@@ -37,7 +35,7 @@ def refine_unknown_args(cmd_args):
     new_args = []
     for arg in cmd_args:
         if arg.startswith("--") and arg.find("=") != -1:
-            equal_pos = arg.find("=") #find first = pos
+            equal_pos = arg.find("=")  #find first = pos
             arglist = list(arg)
             arglist[equal_pos] = " "
             arg = "".join(arglist)
@@ -50,6 +48,7 @@ def refine_unknown_args(cmd_args):
             new_args.append(arg)
     return new_args
 
+
 def kill_process():
     '''
     kill comments threads
@@ -60,6 +59,7 @@ def kill_process():
          | awk '{print $2}' \
          | xargs kill > /dev/null 2>&1")
 
+
 def job_prepare(jobdir, data=None):
     '''
     prepare job related workspace data
@@ -70,6 +70,7 @@ def job_prepare(jobdir, data=None):
     This function just prepare all related model and other resources
     needed at runtime.
     '''
+
     def job_create_workspace(jobdir, data=None):
         '''
         prepare job workspace, common file, etc.
@@ -94,7 +95,8 @@ def job_prepare(jobdir, data=None):
         execute(set_nodefile, i, hosts=conf.HOSTS[i])
     #clean rubbish caused by exception 
     with settings(warn_only=True):
-          execute(kill_process, hosts=conf.HOSTS)
+        execute(kill_process, hosts=conf.HOSTS)
+
 
 def job_pserver(jobdir, pids=None):
     '''
@@ -124,9 +126,8 @@ def job_pserver(jobdir, pids=None):
 
     execute(start_pserver, jobdir, pargs, hosts=conf.HOSTS)
 
-def job_trainer(jobdir,
-        train_args_dict,
-        pids=None):
+
+def job_trainer(jobdir, train_args_dict, pids=None):
     '''
     start paddle trainer
     '''
@@ -171,9 +172,8 @@ def job_trainer(jobdir,
         train_args += " --trainer_id=" + str(i)
         execute(start_trainer, jobdir, train_args, hosts=conf.HOSTS[i])
 
-def job_all(job_package,
-        jobdir=None,
-        train_args_dict=None):
+
+def job_all(job_package, jobdir=None, train_args_dict=None):
     '''
     param job_package
     param train_args_dict
@@ -183,41 +183,52 @@ def job_all(job_package,
         jobdir = conf.ROOT_DIR + "/JOB" + timestamp
     job_prepare(jobdir, job_package)
     job_pserver(jobdir)
-    time.sleep(5) #wait until pservers completely start
+    time.sleep(5)  #wait until pservers completely start
     job_trainer(jobdir, train_args_dict)
     job_clean()
 
+
 def job_clean():
     '''
     if starting job failed from paddle internal, the framework always
     is launched successfully since these process are daemon processes.
     so this job_clean can alway clean job rubbish process with ctrl+c.
     '''
+
     def signal_handler(signal, frame):
         '''
         SIGINT handler
         '''
+
         def kill_process():
-             run("ps aux \
+            run("ps aux \
                   | grep paddle_process_by_paddle \
                   | grep -v grep  \
                   | awk '{print $2}' \
                   | xargs kill > /dev/null 2>&1")
+
         with settings(warn_only=True):
-              execute(kill_process, hosts=conf.HOSTS)
+            execute(kill_process, hosts=conf.HOSTS)
 
     signal.signal(signal.SIGINT, signal_handler)
     signal.pause()
 
+
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(prog="paddle.py",
-            description='simple tool for cluster training')
-    parser.add_argument('-j', '--job_workspace',
-            required=False, default=None,
-            help='job workspace')
-    parser.add_argument('-p', '--job_dispatch_package',
-            required=False, default=None,
-            help='job package for dispatching to all other nodes')
+    parser = argparse.ArgumentParser(
+        prog="paddle.py", description='simple tool for cluster training')
+    parser.add_argument(
+        '-j',
+        '--job_workspace',
+        required=False,
+        default=None,
+        help='job workspace')
+    parser.add_argument(
+        '-p',
+        '--job_dispatch_package',
+        required=False,
+        default=None,
+        help='job package for dispatching to all other nodes')
 
     args, train_args_list = parser.parse_known_args()
     train_args = refine_unknown_args(train_args_list)
@@ -227,14 +238,10 @@ if __name__ == '__main__':
         #if assigned workspace, do not need to dispatch data,
         #so job_local_package should be None
         assert args.job_dispatch_package is None
-        job_all(None,
-                args.job_workspace,
-                train_args_dict)
+        job_all(None, args.job_workspace, train_args_dict)
     elif args.job_dispatch_package is not None:
         assert args.job_workspace is None
         assert os.path.isdir(args.job_dispatch_package)
-        job_all(args.job_dispatch_package,
-                None,
-                train_args_dict)
+        job_all(args.job_dispatch_package, None, train_args_dict)
     else:
         print "--job_workspace or --job_dispatch_package should be set"
diff --git a/paddle/trainer/tests/__init__.py b/paddle/trainer/tests/__init__.py
index 7f9e87eee6037666b86420fba194624859d356b3..c90af2ee000d46a032984ee23559e7e99b49ddad 100644
--- a/paddle/trainer/tests/__init__.py
+++ b/paddle/trainer/tests/__init__.py
@@ -11,4 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
diff --git a/paddle/trainer/tests/config_parser_test.py b/paddle/trainer/tests/config_parser_test.py
index 5ca874cec7914a20f79c2c7b1873c5bd04f60dca..c5ec315d6b01b0a5a3f73673e1756e9c06d685ba 100644
--- a/paddle/trainer/tests/config_parser_test.py
+++ b/paddle/trainer/tests/config_parser_test.py
@@ -17,6 +17,6 @@ from paddle.trainer.config_parser import parse_config_and_serialize
 if __name__ == '__main__':
     parse_config_and_serialize('trainer/tests/test_config.conf', '')
     parse_config_and_serialize(
-        'trainer/tests/sample_trainer_config.conf', 
+        'trainer/tests/sample_trainer_config.conf',
         'extension_module_name=paddle.trainer.config_parser_extension')
     parse_config_and_serialize('gserver/tests/pyDataProvider/trainer.conf', '')
diff --git a/paddle/trainer/tests/gen_proto_data.py b/paddle/trainer/tests/gen_proto_data.py
index c818a94bee7c28b0245d28dd62353d46444cb592..a3dbc10c886e183582b44fee479d5ffb074193ef 100644
--- a/paddle/trainer/tests/gen_proto_data.py
+++ b/paddle/trainer/tests/gen_proto_data.py
@@ -21,8 +21,7 @@ import logging
 import pprint
 
 logging.basicConfig(
-    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s',
-)
+    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s', )
 logger = logging.getLogger('paddle')
 logger.setLevel(logging.INFO)
 
@@ -36,33 +35,32 @@ num_original_columns = 3
 # [[-1,0], [0,0]]  means previous token at column 0 and current token at
 # column 0 are combined as one feature.
 patterns = [
-    [[-2,0]],
-    [[-1,0]],
-    [[0,0]],
-    [[1,0]],
-    [[2,0]],
-
-    [[-1,0], [0,0]],
-    [[0,0], [1,0]],
-
-    [[-2,1]],
-    [[-1,1]],
-    [[0,1]],
-    [[1,1]],
-    [[2,1]],
-    [[-2,1], [-1,1]],
-    [[-1,1], [0,1]],
-    [[0,1], [1,1]],
-    [[1,1], [2,1]],
-
-    [[-2,1], [-1,1], [0,1]],
-    [[-1,1], [0,1], [1,1]],
-    [[0,1], [1,1], [2,1]],
+    [[-2, 0]],
+    [[-1, 0]],
+    [[0, 0]],
+    [[1, 0]],
+    [[2, 0]],
+    [[-1, 0], [0, 0]],
+    [[0, 0], [1, 0]],
+    [[-2, 1]],
+    [[-1, 1]],
+    [[0, 1]],
+    [[1, 1]],
+    [[2, 1]],
+    [[-2, 1], [-1, 1]],
+    [[-1, 1], [0, 1]],
+    [[0, 1], [1, 1]],
+    [[1, 1], [2, 1]],
+    [[-2, 1], [-1, 1], [0, 1]],
+    [[-1, 1], [0, 1], [1, 1]],
+    [[0, 1], [1, 1], [2, 1]],
 ]
 
+
 def make_features(sequence):
     length = len(sequence)
     num_features = len(sequence[0])
+
     def get_features(pos):
         if pos < 0:
             return ['#B%s' % -pos] * num_features
@@ -72,9 +70,10 @@ def make_features(sequence):
 
     for i in xrange(length):
         for pattern in patterns:
-            fname = '/'.join([get_features(i+pos)[f] for pos, f in pattern])
+            fname = '/'.join([get_features(i + pos)[f] for pos, f in pattern])
             sequence[i].append(fname)
 
+
 '''
 Source file format:
 Each line is for one timestep. The features are separated by space.
@@ -87,6 +86,8 @@ i-th column.
 
 return a list of dict for each column
 '''
+
+
 def create_dictionaries(filename, cutoff, oov_policy):
     def add_to_dict(sequence, dicts):
         num_features = len(dicts)
@@ -118,7 +119,6 @@ def create_dictionaries(filename, cutoff, oov_policy):
         features = line.split(' ')
         sequence.append(features)
 
-
     for i in xrange(num_features):
         dct = dicts[i]
         n = 1 if oov_policy[i] == OOV_POLICY_USE else 0
@@ -161,12 +161,9 @@ existed in dicts[i] will be assigned to id 0.
 if oov_policy[i] == OOV_POLICY_ERROR, all features in i-th column MUST exist
 in dicts[i].
 '''
-def gen_proto_file(
-        input_file,
-        dicts,
-        oov_policy,
-        output_file):
 
+
+def gen_proto_file(input_file, dicts, oov_policy, output_file):
     def write_sequence(out, sequence):
         num_features = len(dicts)
         is_beginning = True
@@ -213,8 +210,8 @@ def gen_proto_file(
     if patterns:
         slot_def = header.slot_defs.add()
         slot_def.type = DataFormat.SlotDef.VECTOR_SPARSE_NON_VALUE
-        slot_def.dim = sum([len(dicts[i])
-                            for i in xrange(num_original_columns, len(dicts))])
+        slot_def.dim = sum(
+            [len(dicts[i]) for i in xrange(num_original_columns, len(dicts))])
         logger.info("feature_dim=%s" % slot_def.dim)
 
     for i in xrange(num_original_columns):
@@ -242,30 +239,31 @@ def gen_proto_file(
 
     logger.info("num_sequences=%s" % num_sequences)
 
+
 dict2 = {
- 'B-ADJP': 0,
- 'I-ADJP': 1,
- 'B-ADVP': 2,
- 'I-ADVP': 3,
- 'B-CONJP': 4,
- 'I-CONJP': 5,
- 'B-INTJ': 6,
- 'I-INTJ': 7,
- 'B-LST': 8,
- 'I-LST': 9,
- 'B-NP': 10,
- 'I-NP': 11,
- 'B-PP': 12,
- 'I-PP': 13,
- 'B-PRT': 14,
- 'I-PRT': 15,
- 'B-SBAR': 16,
- 'I-SBAR': 17,
- 'B-UCP': 18,
- 'I-UCP': 19,
- 'B-VP': 20,
- 'I-VP': 21,
- 'O': 22
+    'B-ADJP': 0,
+    'I-ADJP': 1,
+    'B-ADVP': 2,
+    'I-ADVP': 3,
+    'B-CONJP': 4,
+    'I-CONJP': 5,
+    'B-INTJ': 6,
+    'I-INTJ': 7,
+    'B-LST': 8,
+    'I-LST': 9,
+    'B-NP': 10,
+    'I-NP': 11,
+    'B-PP': 12,
+    'I-PP': 13,
+    'B-PRT': 14,
+    'I-PRT': 15,
+    'B-SBAR': 16,
+    'I-SBAR': 17,
+    'B-UCP': 18,
+    'I-UCP': 19,
+    'B-VP': 20,
+    'I-VP': 21,
+    'O': 22
 }
 
 if __name__ == '__main__':
@@ -273,16 +271,9 @@ if __name__ == '__main__':
     cutoff += [3] * len(patterns)
     oov_policy = [OOV_POLICY_IGNORE, OOV_POLICY_ERROR, OOV_POLICY_ERROR]
     oov_policy += [OOV_POLICY_IGNORE] * len(patterns)
-    dicts = create_dictionaries(
-        'trainer/tests/train.txt', cutoff, oov_policy)
+    dicts = create_dictionaries('trainer/tests/train.txt', cutoff, oov_policy)
     dicts[2] = dict2
-    gen_proto_file(
-        'trainer/tests/train.txt',
-        dicts,
-        oov_policy,
-        'trainer/tests/train_proto.bin')
-    gen_proto_file(
-        'trainer/tests/test.txt',
-        dicts,
-        oov_policy,
-        'trainer/tests/test_proto.bin')
+    gen_proto_file('trainer/tests/train.txt', dicts, oov_policy,
+                   'trainer/tests/train_proto.bin')
+    gen_proto_file('trainer/tests/test.txt', dicts, oov_policy,
+                   'trainer/tests/test_proto.bin')
diff --git a/paddle/trainer/tests/testPyDataWrapper.py b/paddle/trainer/tests/testPyDataWrapper.py
index 49bd760f4e20e2a12e5686b3193bdba2895612e4..4607bec24e1fec6f8b9996eb32fe991dbbe3ed79 100644
--- a/paddle/trainer/tests/testPyDataWrapper.py
+++ b/paddle/trainer/tests/testPyDataWrapper.py
@@ -21,7 +21,10 @@ import json
 import string
 
 
-@provider(slots=[SparseNonValueSlot(10), DenseSlot(2), SparseValueSlot(10), StringSlot(1), IndexSlot(3)])
+@provider(slots=[
+    SparseNonValueSlot(10), DenseSlot(2), SparseValueSlot(10), StringSlot(1),
+    IndexSlot(3)
+])
 def processNonSequenceData(obj, filename):
     with open(filename, "rb") as f:
         for line in f:
@@ -50,6 +53,7 @@ val_randomer = lambda: random.uniform(-1.0, 1.0)
 seq_count_randomer = lambda: random.randrange(1, SEQUENCE_LIMIT)
 str_count_randomer = lambda: random.randrange(1, STRING_LIMIT)
 
+
 class IDRandomer():  # A random generator, return unique id
     def __init__(self):
         self.id_set = set()
@@ -61,38 +65,57 @@ class IDRandomer():  # A random generator, return unique id
             return idx
         else:
             return self.__call__()
+
+
 # SparseValueSlot
 def sparse_value_creator(_):
     rand = IDRandomer()
     return [(rand(), val_randomer()) for _ in xrange(sparse_count_randomer())]
+
+
 sparse_value = map(sparse_value_creator, range(seq_count_randomer()))
 
+
 # DenseSlot
 def dense_creator(_):
     return [val_randomer() for _ in xrange(SPARSE_ID_LIMIT)]
+
+
 dense = map(dense_creator, range(seq_count_randomer()))
 
+
 # SparseNonValueSlot
 def sparse_creator(_):
     rand = IDRandomer()
     return [rand() for _ in xrange(sparse_count_randomer())]
+
+
 sparse_nonvalue = map(sparse_creator, range(seq_count_randomer()))
 
 # IndexSlot
 ids = [sparse_id_randomer() for _ in range(seq_count_randomer())]
 
+
 # StringSlot
-def random_str(size = 8, chars=string.ascii_letters + string.digits):
+def random_str(size=8, chars=string.ascii_letters + string.digits):
     return ''.join(random.choice(chars) for _ in range(size))
+
+
 strs = [random_str(str_count_randomer()) for _ in range(seq_count_randomer())]
 
+
 def processSeqAndGenerateDataInit(obj, *args, **kwargs):
     obj.json_filename = kwargs.get("load_data_args", "test_data.json")
 
-@provider(slots=[SparseValueSlot(SPARSE_ID_LIMIT), DenseSlot(SPARSE_ID_LIMIT),
-                 SparseNonValueSlot(SPARSE_ID_LIMIT), IndexSlot(SPARSE_ID_LIMIT),
-                 StringSlot(SPARSE_ID_LIMIT)],
-          use_seq=True, init_hook=processSeqAndGenerateDataInit)
+
+@provider(
+    slots=[
+        SparseValueSlot(SPARSE_ID_LIMIT), DenseSlot(SPARSE_ID_LIMIT),
+        SparseNonValueSlot(SPARSE_ID_LIMIT), IndexSlot(SPARSE_ID_LIMIT),
+        StringSlot(SPARSE_ID_LIMIT)
+    ],
+    use_seq=True,
+    init_hook=processSeqAndGenerateDataInit)
 def processSeqAndGenerateData(obj, name):
     retv = [sparse_value, dense, sparse_nonvalue, ids, strs]
     # Write to protoseq.
@@ -104,10 +127,15 @@ def processSeqAndGenerateData(obj, name):
 def processSubSeqAndGenerateDataInit(obj, *args, **kwargs):
     obj.json_filename = kwargs.get("load_data_args", "test_data.json")
 
-@provider(slots=[SparseValueSlot(SPARSE_ID_LIMIT), DenseSlot(SPARSE_ID_LIMIT),
-                 SparseNonValueSlot(SPARSE_ID_LIMIT), IndexSlot(SPARSE_ID_LIMIT),
-                 StringSlot(SPARSE_ID_LIMIT)],
-          use_seq=True, init_hook=processSubSeqAndGenerateDataInit)
+
+@provider(
+    slots=[
+        SparseValueSlot(SPARSE_ID_LIMIT), DenseSlot(SPARSE_ID_LIMIT),
+        SparseNonValueSlot(SPARSE_ID_LIMIT), IndexSlot(SPARSE_ID_LIMIT),
+        StringSlot(SPARSE_ID_LIMIT)
+    ],
+    use_seq=True,
+    init_hook=processSubSeqAndGenerateDataInit)
 def processSubSeqAndGenerateData(obj, name):
     retv_json = [sparse_value, dense, sparse_nonvalue, ids, strs]
     retv_wrapper = [[sparse_value], [dense], [sparse_nonvalue], [ids], [strs]]
@@ -116,6 +144,7 @@ def processSubSeqAndGenerateData(obj, name):
         json.dump(retv_json, f)
     yield retv_wrapper
 
+
 if __name__ == "__main__":
     pvd = processNonSequenceData("test.txt")
     print pvd.getNextBatch(100)
diff --git a/paddle/utils/enable_virtualenv.py b/paddle/utils/enable_virtualenv.py
index 99d822a4145cca3f5ae35c4cf144210f35460827..ccfaa7c147b2ce25cb6007aa04cfc33961b7e10b 100644
--- a/paddle/utils/enable_virtualenv.py
+++ b/paddle/utils/enable_virtualenv.py
@@ -1,10 +1,12 @@
 import os
 
+
 def __activate_virtual_env__():
-  __path__ = os.getenv('VIRTUAL_ENV')
-  if __path__ is None:
-    return
-  __script__ = os.path.join(__path__, 'bin', 'activate_this.py')
-  execfile(__script__, {'__file__': __script__})
+    __path__ = os.getenv('VIRTUAL_ENV')
+    if __path__ is None:
+        return
+    __script__ = os.path.join(__path__, 'bin', 'activate_this.py')
+    execfile(__script__, {'__file__': __script__})
+
 
 __activate_virtual_env__()