From a2cec420c82575913a155b922122291eefe3b622 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Tue, 28 Feb 2017 20:28:05 +0000 Subject: [PATCH] In response to comments from Wen-bo --- python/paddle/v2/dataset/imikolov.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py index e6d0357f97f..d9518dd27e9 100644 --- a/python/paddle/v2/dataset/imikolov.py +++ b/python/paddle/v2/dataset/imikolov.py @@ -3,7 +3,6 @@ imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/ """ import paddle.v2.dataset.common import tarfile -import collections __all__ = ['train', 'test'] @@ -40,10 +39,8 @@ def build_dict(train_filename, test_filename): testf = tf.extractfile(test_filename) word_freq = word_count(testf, word_count(trainf)) - STOPWORD_FREQ = 3000 TYPO_FREQ = 50 - word_freq = filter(lambda x: x[1] > TYPO_FREQ and x[1] < STOPWORD_FREQ, - word_freq.items()) + word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items()) dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0])) words, _ = list(zip(*dictionary)) -- GitLab