Merge pull request #589 from lcy-seso/fix_text_classification

fix the bug that the pool for shuffle is too small.

Merge pull request #589 from lcy-seso/fix_text_classification
fix the bug that the pool for shuffle is too small.
56f7614b · Cao Ying · GitHub · e6e39202 · f683f15f · 56f7614b
隐藏空白更改
内联并排

Showing with 3 addition and 5 deletion

text_classification/train.py text_classification/train.py +3 -5

未找到文件。
--- a/text_classification/train.py
+++ b/text_classification/train.py
@@ -46,7 +46,7 @@ def train(topology,
        word_dict = paddle.dataset.imdb.word_dict()
        train_reader = paddle.batch(
            paddle.reader.shuffle(
-                lambda: paddle.dataset.imdb.train(word_dict)(), buf_size=1000),
+                lambda: paddle.dataset.imdb.train(word_dict)(), buf_size=51200),
            batch_size=100)
        test_reader = paddle.batch(
            lambda: paddle.dataset.imdb.test(word_dict)(), batch_size=100)
@@ -83,16 +83,14 @@ def train(topology,
        train_reader = paddle.batch(
            paddle.reader.shuffle(
                reader.train_reader(train_data_dir, word_dict, lbl_dict),
-                buf_size=1000),
+                buf_size=51200),
            batch_size=batch_size)

        if test_data_dir is not None:
            # here, because training and testing data share a same format,
            # we still use the reader.train_reader to read the testing data.
            test_reader = paddle.batch(
-                paddle.reader.shuffle(
-                    reader.train_reader(test_data_dir, word_dict, lbl_dict),
-                    buf_size=1000),
+                reader.train_reader(test_data_dir, word_dict, lbl_dict),
                batch_size=batch_size)
        else:
            test_reader = None