From 353b7ab030e90e920343d3adc170645969bb0a74 Mon Sep 17 00:00:00 2001 From: peterzhang2029 Date: Wed, 31 Jan 2018 19:17:11 +0800 Subject: [PATCH] update the dictionary generator and buffer size --- fluid/text_classification/train.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fluid/text_classification/train.py b/fluid/text_classification/train.py index 5980f71c..98f63f08 100644 --- a/fluid/text_classification/train.py +++ b/fluid/text_classification/train.py @@ -40,10 +40,8 @@ def to_lodtensor(data, place): def load_vocab(filename): vocab = {} with open(filename) as f: - wid = 0 - for line in f: - vocab[line.strip()] = wid - wid += 1 + for idx, line in enumerate(f): + vocab[line.strip()] = idx return vocab @@ -101,13 +99,13 @@ def main(dict_path): # The training data set. train_reader = paddle.batch( paddle.reader.shuffle( - paddle.dataset.imdb.train(word_dict), buf_size=5000), + paddle.dataset.imdb.train(word_dict), buf_size=51200), batch_size=conf.batch_size) # The testing data set. test_reader = paddle.batch( paddle.reader.shuffle( - paddle.dataset.imdb.test(word_dict), buf_size=5000), + paddle.dataset.imdb.test(word_dict), buf_size=51200), batch_size=conf.batch_size) if conf.use_gpu: -- GitLab