fix code style

339c4cdd · gmcather · 519d726b · 339c4cdd · 339c4cdd · 339c4cdd
5 changed file
--- a/fluid/text_classification/infer.py
+++ b/fluid/text_classification/infer.py
-"""
-For http://wiki.baidu.com/display/LegoNet/Text+Classification
-"""
 import paddle.fluid as fluid
 import paddle.v2 as paddle
 import numpy as np
@@ -11,9 +8,7 @@ import contextlib
 import utils
-def infer(test_reader,
+def infer(test_reader, use_cuda, model_path=None):
-          use_cuda, 
-          model_path=None):
    """
    inference function
    """
@@ -33,7 +28,7 @@ def infer(test_reader,
        total_count = 0
        for data in test_reader():
            acc = exe.run(inference_program,
-                    feed = utils.data2tensor(data, place),
+                          feed=utils.data2tensor(data, place),
                          fetch_list=fetch_targets,
                          return_numpy=True)
            total_acc += acc[0] * len(data)
@@ -45,12 +40,10 @@ def infer(test_reader,
 if __name__ == "__main__":
    word_dict, train_reader, test_reader = utils.prepare_data(
-            "imdb", self_dict = False,
+        "imdb", self_dict = False, batch_size = 128, buf_size = 50000)
-            batch_size = 128, buf_size = 50000)
    model_path = sys.argv[1]
    for i in range(30):
        epoch_path = model_path + "/" + "epoch" + str(i)
-        infer(test_reader, use_cuda=False,
+        infer(test_reader, use_cuda=False, model_path=epoch_path)
-                model_path=epoch_path)
--- a/fluid/text_classification/model.py
+++ b/fluid/text_classification/model.py
+"""
+For http://wiki.baidu.com/display/LegoNet/Text+Classification
+"""
+import paddle.fluid as fluid
+import paddle.v2 as paddle
+import numpy as np
+import sys
+import time
+import unittest
+import contextlib
+import utils
+def bow_net(data, label,
+            dict_dim,
+            emb_dim=128,
+            hid_dim=128,
+            hid_dim2=96,
+            class_dim=2):
+    """
+    bow net
+    """
+    emb = fluid.layers.embedding(input=data, 
+                                size=[dict_dim, emb_dim])
+    bow = fluid.layers.sequence_pool(
+        input=emb,
+        pool_type='sum')
+    bow_tanh = fluid.layers.tanh(bow)
+    fc_1 = fluid.layers.fc(input=bow_tanh,
+                        size=hid_dim, act = "tanh")
+    fc_2 = fluid.layers.fc(input=fc_1,
+                        size=hid_dim2, act = "tanh")
+    prediction = fluid.layers.fc(input=[fc_2],
+                             size=class_dim,
+                             act="softmax")
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    acc = fluid.layers.accuracy(input=prediction, label=label)
+    return avg_cost, acc, prediction
+def conv_net(data, label,
+            dict_dim,
+            emb_dim=128,
+            hid_dim=128,
+            hid_dim2=96,
+            class_dim=2,
+            win_size=3):
+    """
+    conv net
+    """
+    emb = fluid.layers.embedding(input=data, 
+                                size=[dict_dim, emb_dim])
+    conv_3 = fluid.nets.sequence_conv_pool(input=emb,
+                                    num_filters=hid_dim,
+                                    filter_size=win_size,
+                                    act="tanh",
+                                    pool_type="max")
+    fc_1 = fluid.layers.fc(input=[conv_3],
+                                    size=hid_dim2)
+    prediction = fluid.layers.fc(input=[fc_1],
+                             size=class_dim,
+                             act="softmax")
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    acc = fluid.layers.accuracy(input=prediction, label=label)
+    return avg_cost, acc, prediction
+def lstm_net(data, label,
+            dict_dim,
+            emb_dim=128,
+            hid_dim=128,
+            hid_dim2=96,
+            class_dim=2):
+    """
+    lstm net
+    """
+    emb = fluid.layers.embedding(input=data, 
+                                size=[dict_dim, emb_dim])
+    fc0 = fluid.layers.fc(input=emb, 
+                        size=hid_dim * 4, 
+                        act='tanh')
+    lstm_h, c = fluid.layers.dynamic_lstm(input=fc0, 
+                        size=hid_dim * 4, 
+                        is_reverse=False)
+    lstm_max = fluid.layers.sequence_pool(input=lstm_h, 
+                        pool_type='max')
+    lstm_max_tanh = fluid.layers.tanh(lstm_max)
+    fc1 = fluid.layers.fc(input=lstm_max_tanh, 
+                        size=hid_dim2, 
+                        act='tanh')
+    prediction = fluid.layers.fc(input=fc1, 
+                        size=class_dim, 
+                        act='softmax')
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    acc = fluid.layers.accuracy(input=prediction, label=label)
+    return avg_cost, acc, prediction
+def gru_net(data, label,
+            dict_dim,
+            emb_dim=128,
+            hid_dim=128,
+            hid_dim2=96,
+            class_dim=2):
+    """
+    gru net
+    """
+    emb = fluid.layers.embedding(input=data, 
+                                size=[dict_dim, emb_dim])
+    fc0 = fluid.layers.fc(input=emb, 
+                        size=hid_dim * 3)
+    gru_h = fluid.layers.dynamic_gru(input=fc0, 
+                        size=hid_dim, 
+                        is_reverse=False)
+    gru_max = fluid.layers.sequence_pool(input=gru_h, 
+                        pool_type='max')
+    gru_max_tanh = fluid.layers.tanh(gru_max)
+    fc1 = fluid.layers.fc(input=gru_max_tanh, 
+                        size=hid_dim2, 
+                        act='tanh')
+    prediction = fluid.layers.fc(input=fc1, 
+                        size=class_dim, 
+                        act='softmax')
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    acc = fluid.layers.accuracy(input=prediction, label=label)
+    return avg_cost, acc, prediction
+def train(train_reader,
+        word_dict,
+        network,
+        use_cuda,
+        parallel,
+        save_dirname,
+        lr=0.2,
+        batch_size=128,
+        pass_num=30):
+    """
+    train network
+    """
+    data = fluid.layers.data(
+        name="words", 
+        shape=[1], 
+        dtype="int64", 
+        lod_level=1)
+    label = fluid.layers.data(
+        name="label", 
+        shape=[1], 
+        dtype="int64")
+    if not parallel:
+        cost, acc, prediction = network(
+            data, label, len(word_dict))
+    else:
+        places = fluid.layers.get_places(device_count = 2)
+        pd = fluid.layers.ParallelDo(places)
+        with pd.do():
+            cost, acc, prediction = network(
+            pd.read_input(data), 
+            pd.read_input(label), 
+            len(word_dict))
+            pd.write_output(cost)
+            pd.write_output(acc)
+        cost, acc = pd()
+        cost = fluid.layers.mean(cost)
+        acc = fluid.layers.mean(acc)
+    sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr)
+    sgd_optimizer.minimize(cost)
+    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
+    exe.run(fluid.default_startup_program())
+    for pass_id in xrange(pass_num):
+        avg_cost_list, avg_acc_list = [], []
+        for data in train_reader():
+            avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(),
+                                        feed=feeder.feed(data),
+                                        fetch_list=[cost, acc])
+            avg_cost_list.append(avg_cost_np)
+            avg_acc_list.append(avg_acc_np)
+        print("pass_id: %d, avg_acc: %f" % (pass_id, np.mean(avg_acc_list)))
+    # save_model
+    fluid.io.save_inference_model(
+            save_dirname, 
+            ["words", "label"],
+            acc, exe)
+def test(test_reader, use_cuda, 
+        save_dirname=None):
+    """
+    test function
+    """
+    if save_dirname is None:
+        print(str(save_dirname) + " cannot be found")
+        return
+    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    inference_scope = fluid.core.Scope()
+    with fluid.scope_guard(inference_scope):
+        [inference_program, feed_target_names,
+        fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
+        total_acc = 0.0
+        total_count = 0
+        for data in test_reader():
+            acc = exe.run(inference_program,
+                    feed = utils.data2tensor(data, place),
+                    fetch_list=fetch_targets,
+                    return_numpy=True)
+            total_acc += acc[0] * len(data)
+            total_count += len(data)
+        print("test_acc: %f" % (total_acc / total_count))
+def main(network,
+        dataset,
+        model_conf,
+        use_cuda,
+        parallel,
+        batch_size,
+        lr=0.2,
+        pass_num=30):
+    """
+    main function
+    """
+    word_dict, train_reader, test_reader = utils.prepare_data(
+            dataset, self_dict = False,
+            batch_size = batch_size, buf_size = 50000)
+    train(train_reader, word_dict,
+            network, use_cuda=use_cuda,
+            parallel = parallel,
+            save_dirname=model_conf,
+            lr=lr,
+            pass_num=pass_num, 
+            batch_size=batch_size)
+    test(test_reader, use_cuda=use_cuda,
+            save_dirname=model_conf)
+class TestModel(unittest.TestCase):
+    """
+    Test Case Module
+    """
+    @contextlib.contextmanager
+    def new_program_scope(self):
+        """
+        setting external env
+        """
+        prog = fluid.Program()
+        startup_prog = fluid.Program()
+        scope = fluid.core.Scope()
+        with fluid.scope_guard(scope):
+            with fluid.program_guard(prog, startup_prog):
+                yield
+    @unittest.skip(reason='success, total time:14.19s')
+    def test_bow_cpu(self):
+        """
+        Test bow cpu single thread
+        """
+        with self.new_program_scope():
+            main(bow_net, "tiny_imdb",
+                "bow.cpu", False, False, 128)
+    @unittest.skip(reason='success, total time:7.62s')
+    def test_bow_gpu(self):
+        """
+        Test bow gpu single thread
+        """
+        with self.new_program_scope():
+            main(bow_net, "tiny_imdb",
+                "bow.gpu", True, False, 128)
+    @unittest.skip(reason='success, total time:15.02s')
+    def test_bow_cpu_mthread(self):
+        """
+        Test bow cpu mthread
+        """
+        with self.new_program_scope():
+            main(bow_net, "tiny_imdb",
+                "bow.cpu_mthread", False, True, 128)
+    @unittest.skip(reason='success, total time:9.45s')
+    def test_bow_gpu_mthread(self):
+        """
+        Test bow gpu mthread
+        """
+        with self.new_program_scope():
+            main(bow_net, "tiny_imdb",
+                "bow.gpu_mthread", True, True, 128)
+    @unittest.skip(reason='success, total time:85.0s')
+    def test_cnn_cpu(self):
+        """
+        Test cnn cpu single thread
+        """
+        with self.new_program_scope():
+            main(conv_net, "tiny_imdb",
+                "conv.cpu", False, False, 128)
+    @unittest.skip(reason='success, total time:12.0s')
+    def test_cnn_gpu(self):
+        """
+        Test cnn gpu single thread
+        """
+        with self.new_program_scope():
+            main(conv_net, "tiny_imdb",
+                "conv.gpu", True, False, 128)
+    @unittest.skip(reason='success, total time:53.0s')
+    def test_cnn_cpu_mthread(self):
+        """
+        Test cnn cpu mthread
+        """
+        with self.new_program_scope():
+            main(conv_net, "tiny_imdb",
+                "conv.cpu_mthread", False, True, 128)
+    @unittest.skip(reason='success, total time:10.9s')
+    def test_cnn_gpu_mthread(self):
+        """
+        Test cnn gpu mthread
+        """
+        with self.new_program_scope():
+            main(conv_net, "tiny_imdb",
+                "conv.gpu_mthread", True, True, 128)
+    @unittest.skip(reason='success, total time:232.5s')
+    def test_lstm_cpu(self):
+        """
+        Test lstm cpu single thread
+        """
+        with self.new_program_scope():
+            main(lstm_net, "tiny_imdb",
+                "lstm.cpu", False, False, 128)
+    @unittest.skip(reason='success, total time:26.5s')
+    def test_lstm_gpu(self):
+        """
+        Test lstm gpu single thread
+        """
+        with self.new_program_scope():
+            main(lstm_net, "tiny_imdb",
+                "lstm.gpu", True, False, 128)
+    @unittest.skip(reason='success, total time:135.0s')
+    def test_lstm_cpu_mthread(self):
+        """
+        Test lstm cpu mthread
+        """
+        with self.new_program_scope():
+            main(lstm_net, "tiny_imdb",
+                "lstm.cpu_mthread", False, True, 128)
+    @unittest.skip(reason='success, total time:26.23s')
+    def test_lstm_gpu_mthread(self):
+        """
+        Test lstm gpu mthread
+        """
+        with self.new_program_scope():
+            main(lstm_net, "tiny_imdb",
+                "lstm.gpu_mthread", True, True, 128)
+    @unittest.skip(reason='success, total time:163.0s')
+    def test_gru_cpu(self):
+        """
+        Test gru cpu single thread
+        """
+        with self.new_program_scope():
+            main(gru_net, "tiny_imdb",
+                "gru.cpu", False, False, 128)
+    @unittest.skip(reason='success, total time:28.88s')
+    def test_gru_gpu(self):
+        """
+        Test gru gpu single thread
+        """
+        with self.new_program_scope():
+            main(gru_net, "tiny_imdb",
+                "gru.gpu", True, False, 128, 0.02, 30)
+    @unittest.skip(reason='success, total time:97.15s')
+    def test_gru_cpu_mthread(self):
+        """
+        Test gru cpu mthread
+        """
+        with self.new_program_scope():
+            main(gru_net, "tiny_imdb",
+                "gru.cpu_mthread", False, True, 128)
+    @unittest.skip(reason='success, total time:26.05s')
+    def test_gru_gpu_mthread(self):
+        """
+        Test gru gpu mthread
+        """
+        with self.new_program_scope():
+            main(gru_net, "tiny_imdb",
+                "gru.gpu_mthread", True, True, 128)
+if __name__ == "__main__":
+    unittest.main()
--- a/fluid/text_classification/nets.py
+++ b/fluid/text_classification/nets.py
-"""
-For http://wiki.baidu.com/display/LegoNet/Text+Classification
-"""
 import paddle.fluid as fluid
 import paddle.v2 as paddle
 import numpy as np
@@ -8,7 +5,8 @@ import sys
 import time
-def bow_net(data, label,
+def bow_net(data,
+            label,
            dict_dim,
            emb_dim=128,
            hid_dim=128,
@@ -17,19 +15,12 @@ def bow_net(data, label,
    """
    bow net
    """
-    emb = fluid.layers.embedding(input=data, 
+    emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
-                                size=[dict_dim, emb_dim])
+    bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
-    bow = fluid.layers.sequence_pool(
-        input=emb,
-        pool_type='sum')
    bow_tanh = fluid.layers.tanh(bow)
-    fc_1 = fluid.layers.fc(input=bow_tanh,
+    fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh")
-                        size=hid_dim, act = "tanh")
+    fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh")
-    fc_2 = fluid.layers.fc(input=fc_1,
+    prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
-                        size=hid_dim2, act = "tanh")
-    prediction = fluid.layers.fc(input=[fc_2],
-                             size=class_dim,
-                             act="softmax")
    cost = fluid.layers.cross_entropy(input=prediction, label=label)
    avg_cost = fluid.layers.mean(x=cost)
    acc = fluid.layers.accuracy(input=prediction, label=label)
@@ -37,7 +28,8 @@ def bow_net(data, label,
    return avg_cost, acc, prediction
-def cnn_net(data, label,
+def cnn_net(data,
+            label,
            dict_dim,
            emb_dim=128,
            hid_dim=128,
@@ -47,8 +39,7 @@ def cnn_net(data, label,
    """
    conv net
    """
-    emb = fluid.layers.embedding(input=data, 
+    emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
-                                size=[dict_dim, emb_dim])
    conv_3 = fluid.nets.sequence_conv_pool(input=emb,
        num_filters=hid_dim,
@@ -56,12 +47,9 @@ def cnn_net(data, label,
        act="tanh",
        pool_type="max")
-    fc_1 = fluid.layers.fc(input=[conv_3],
+    fc_1 = fluid.layers.fc(input=[conv_3], size=hid_dim2)
-                                    size=hid_dim2)
-    prediction = fluid.layers.fc(input=[fc_1],
+    prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax")
-                             size=class_dim,
-                             act="softmax")
    cost = fluid.layers.cross_entropy(input=prediction, label=label)
    avg_cost = fluid.layers.mean(x=cost)
    acc = fluid.layers.accuracy(input=prediction, label=label)
@@ -69,7 +57,8 @@ def cnn_net(data, label,
    return avg_cost, acc, prediction
-def lstm_net(data, label,
+def lstm_net(data,
+             label,
             dict_dim,
             emb_dim=128,
             hid_dim=128,
@@ -79,29 +68,22 @@ def lstm_net(data, label,
    """
    lstm net
    """
-    emb = fluid.layers.embedding(input=data, 
+    emb = fluid.layers.embedding(
+        input=data, 
        size=[dict_dim, emb_dim],
        param_attr=fluid.ParamAttr(learning_rate=emb_lr))
-    fc0 = fluid.layers.fc(input=emb, 
+    fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4, act='tanh')
-                        size=hid_dim * 4, 
-                        act='tanh')
-    lstm_h, c = fluid.layers.dynamic_lstm(input=fc0, 
+    lstm_h, c = fluid.layers.dynamic_lstm(
-                        size=hid_dim * 4, 
+        input=fc0, size=hid_dim * 4, is_reverse=False)
-                        is_reverse=False)
-    lstm_max = fluid.layers.sequence_pool(input=lstm_h, 
+    lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max')
-                        pool_type='max')
    lstm_max_tanh = fluid.layers.tanh(lstm_max)
-    fc1 = fluid.layers.fc(input=lstm_max_tanh, 
+    fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh')
-                        size=hid_dim2, 
-                        act='tanh')
-    prediction = fluid.layers.fc(input=fc1, 
+    prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
-                        size=class_dim, 
-                        act='softmax')
    cost = fluid.layers.cross_entropy(input=prediction, label=label)
    avg_cost = fluid.layers.mean(x=cost)
@@ -110,7 +92,8 @@ def lstm_net(data, label,
    return avg_cost, acc, prediction
-def gru_net(data, label,
+def gru_net(data,
+            label,
            dict_dim,
            emb_dim=128,
            hid_dim=128,
@@ -120,28 +103,17 @@ def gru_net(data, label,
    """
    gru net
    """
-    emb = fluid.layers.embedding(input=data, 
+    emb = fluid.layers.embedding(
+        input=data,
        size=[dict_dim, emb_dim],
        param_attr=fluid.ParamAttr(learning_rate=emb_lr))
-    fc0 = fluid.layers.fc(input=emb, 
+    fc0 = fluid.layers.fc(input=emb, size=hid_dim * 3)
-                        size=hid_dim * 3)
+    gru_h = fluid.layers.dynamic_gru(input=fc0, size=hid_dim, is_reverse=False)
+    gru_max = fluid.layers.sequence_pool(input=gru_h, pool_type='max')
-    gru_h = fluid.layers.dynamic_gru(input=fc0, 
-                        size=hid_dim, 
-                        is_reverse=False)
-    gru_max = fluid.layers.sequence_pool(input=gru_h, 
-                        pool_type='max')
    gru_max_tanh = fluid.layers.tanh(gru_max)
+    fc1 = fluid.layers.fc(input=gru_max_tanh, size=hid_dim2, act='tanh')
-    fc1 = fluid.layers.fc(input=gru_max_tanh, 
+    prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
-                        size=hid_dim2, 
-                        act='tanh')
-    prediction = fluid.layers.fc(input=fc1, 
-                        size=class_dim, 
-                        act='softmax')
    cost = fluid.layers.cross_entropy(input=prediction, label=label)
    avg_cost = fluid.layers.mean(x=cost)

--- a/fluid/text_classification/train.py
+++ b/fluid/text_classification/train.py
-"""
-For http://wiki.baidu.com/display/LegoNet/Text+Classification
-"""
 import paddle.fluid as fluid
 import paddle.v2 as paddle
 import numpy as np
@@ -27,27 +24,18 @@ def train(train_reader,
    train network
    """
    data = fluid.layers.data(
-        name="words", 
+        name="words", shape=[1], dtype="int64", lod_level=1)
-        shape=[1], 
-        dtype="int64", 
-        lod_level=1)
-    label = fluid.layers.data(
+    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
-        name="label", 
-        shape=[1], 
-        dtype="int64")
    if not parallel:
-        cost, acc, prediction = network(
+        cost, acc, prediction = network(data, label, len(word_dict))
-            data, label, len(word_dict))
    else:
-        places = fluid.layers.get_places(device_count = 2)
+        places = fluid.layers.get_places(device_count=2)
        pd = fluid.layers.ParallelDo(places)
        with pd.do():
            cost, acc, prediction = network(
-            pd.read_input(data), 
+                pd.read_input(data), pd.read_input(label), len(word_dict))
-            pd.read_input(label), 
-            len(word_dict))
            pd.write_output(cost)
            pd.write_output(acc)
@@ -68,8 +56,7 @@ def train(train_reader,
        data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
        for data in train_reader():
            avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(),
-                                        feed=feeder.feed(data),
+                feed=feeder.feed(data), fetch_list=[cost, acc])
-                                        fetch_list=[cost, acc])
            data_size = len(data)
            total_acc += data_size * avg_acc_np
            total_cost += data_size * avg_cost_np
@@ -77,36 +64,61 @@ def train(train_reader,
        avg_cost = total_cost / data_count
        avg_acc = total_acc / data_count
-        print("pass_id: %d, avg_acc: %f, avg_cost: %f" % (pass_id, avg_acc, avg_cost))
+        print("pass_id: %d, avg_acc: %f, avg_cost: %f" % 
+              (pass_id, avg_acc, avg_cost))
        epoch_model = save_dirname + "/" + "epoch" + str(pass_id)
-        fluid.io.save_inference_model(
+        fluid.io.save_inference_model(epoch_model, ["words", "label"], acc, exe)
-                epoch_model, 
-                ["words", "label"],
-                acc, exe)
 def train_net():
    word_dict, train_reader, test_reader = utils.prepare_data(
-            "imdb", self_dict = False,
+        "imdb", self_dict=False, batch_size=128, buf_size=50000)
-            batch_size = 128, buf_size = 50000)
    if sys.argv[1] == "bow":
-        train(train_reader, word_dict, bow_net, use_cuda=False,
+        train(
-                parallel=False, save_dirname="bow_model", lr=0.002,
+            train_reader,
-                pass_num=30, batch_size=128)
+            word_dict,
+            bow_net,
+            use_cuda=False,
+            parallel=False,
+            save_dirname="bow_model",
+            lr=0.002,
+            pass_num=30,
+            batch_size=128)
    elif sys.argv[1] == "cnn":
-        train(train_reader, word_dict, cnn_net, use_cuda=True,
+        train(
-                parallel=False, save_dirname="cnn_model", lr=0.01,
+            train_reader,
-                pass_num=30, batch_size=4)
+            word_dict,
+            cnn_net,
+            use_cuda=True,
+            parallel=False,
+            save_dirname="cnn_model",
+            lr=0.01,
+            pass_num=30,
+            batch_size=4)
    elif sys.argv[1] == "lstm":
-        train(train_reader, word_dict, lstm_net, use_cuda=True,
+        train(
-                parallel=False, save_dirname="lstm_model", lr=0.05,
+            train_reader,
-                pass_num=30, batch_size=4)
+            word_dict,
+            lstm_net,
+            use_cuda=True,
+            parallel=False,
+            save_dirname="lstm_model",
+            lr=0.05,
+            pass_num=30,
+            batch_size=4)
    elif sys.argv[1] == "gru":
-        train(train_reader, word_dict, bow_net, use_cuda=True,
+        train(
-                parallel=False, save_dirname="gru_model", lr=0.05,
+            train_reader,
-                pass_num=30, batch_size=128)
+            word_dict,
+            lstm_net,
+            use_cuda=True,
+            parallel=False,
+            save_dirname="gru_model",
+            lr=0.05,
+            pass_num=30,
+            batch_size=128)
    else:
        print("network name cannot be found!")
        sys.exit(1)    

--- a/fluid/text_classification/utils.py
+++ b/fluid/text_classification/utils.py
-"""
-For http://wiki.baidu.com/display/LegoNet/Text+Classification
-"""
 import paddle.fluid as fluid
 import paddle.v2 as paddle
 import numpy as np
@@ -9,6 +6,7 @@ import time
 import light_imdb
 import tiny_imdb
 def to_lodtensor(data, place):
    """
    convert to LODtensor
@@ -45,7 +43,7 @@ def data2tensor(data, place):
    """
    data2tensor
    """
-    input_seq = to_lodtensor(map(lambda x:x[0], data), place)
+    input_seq = to_lodtensor(map(lambda x: x[0], data), place)
    y_data = np.array(map(lambda x: x[1], data)).astype("int64")
    y_data = y_data.reshape([-1, 1])
    return {"words": input_seq, "label": y_data}
@@ -73,8 +71,7 @@ def prepare_data(data_type="imdb",
    if data_type == "imdb":
        train_reader = paddle.batch(
            paddle.reader.shuffle(
-                paddle.dataset.imdb.train(word_dict), 
+                paddle.dataset.imdb.train(word_dict), buf_size = buf_size),
-                buf_size = buf_size),
            batch_size = batch_size)
        test_reader = paddle.batch(
@@ -86,27 +83,23 @@ def prepare_data(data_type="imdb",
    elif data_type == "light_imdb":
        train_reader = paddle.batch(
            paddle.reader.shuffle(
-                light_imdb.train(word_dict), 
+                light_imdb.train(word_dict), buf_size = buf_size),
-                buf_size = buf_size),
            batch_size = batch_size)
        test_reader = paddle.batch(
            paddle.reader.shuffle(
-                light_imdb.test(word_dict), 
+                light_imdb.test(word_dict), buf_size = buf_size),
-                buf_size = buf_size),
            batch_size = batch_size)
    elif data_type == "tiny_imdb":
        train_reader = paddle.batch(
            paddle.reader.shuffle(
-                tiny_imdb.train(word_dict), 
+                tiny_imdb.train(word_dict), buf_size = buf_size),
-                buf_size = buf_size),
            batch_size = batch_size)
        test_reader = paddle.batch(
            paddle.reader.shuffle(
-                tiny_imdb.test(word_dict), 
+                tiny_imdb.test(word_dict), buf_size = buf_size),
-                buf_size = buf_size),
            batch_size = batch_size)
    else:
        raise RuntimeError("no such dataset")