From 353b7ab030e90e920343d3adc170645969bb0a74 Mon Sep 17 00:00:00 2001
From: peterzhang2029 <zhangchao41@baidu.com>
Date: Wed, 31 Jan 2018 19:17:11 +0800
Subject: [PATCH] update the dictionary generator and buffer size

---
 fluid/text_classification/train.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/fluid/text_classification/train.py b/fluid/text_classification/train.py
index 5980f71c..98f63f08 100644
--- a/fluid/text_classification/train.py
+++ b/fluid/text_classification/train.py
@@ -40,10 +40,8 @@ def to_lodtensor(data, place):
 def load_vocab(filename):
     vocab = {}
     with open(filename) as f:
-        wid = 0
-        for line in f:
-            vocab[line.strip()] = wid
-            wid += 1
+        for idx, line in enumerate(f):
+            vocab[line.strip()] = idx
     return vocab
 
 
@@ -101,13 +99,13 @@ def main(dict_path):
     # The training data set.
     train_reader = paddle.batch(
         paddle.reader.shuffle(
-            paddle.dataset.imdb.train(word_dict), buf_size=5000),
+            paddle.dataset.imdb.train(word_dict), buf_size=51200),
         batch_size=conf.batch_size)
 
     # The testing data set.
     test_reader = paddle.batch(
         paddle.reader.shuffle(
-            paddle.dataset.imdb.test(word_dict), buf_size=5000),
+            paddle.dataset.imdb.test(word_dict), buf_size=51200),
         batch_size=conf.batch_size)
 
     if conf.use_gpu:
-- 
GitLab