In response to comments from Wen-bo

a2cec420 · Yi Wang · da6af591 · a2cec420
隐藏空白更改
内联并排

Showing with 1 addition and 4 deletion

python/paddle/v2/dataset/imikolov.py python/paddle/v2/dataset/imikolov.py +1 -4

未找到文件。
--- a/python/paddle/v2/dataset/imikolov.py
+++ b/python/paddle/v2/dataset/imikolov.py
@@ -3,7 +3,6 @@ imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
 """
 import paddle.v2.dataset.common
 import tarfile
-import collections

 __all__ = ['train', 'test']

@@ -40,10 +39,8 @@ def build_dict(train_filename, test_filename):
        testf = tf.extractfile(test_filename)
        word_freq = word_count(testf, word_count(trainf))

-        STOPWORD_FREQ = 3000
        TYPO_FREQ = 50
-        word_freq = filter(lambda x: x[1] > TYPO_FREQ and x[1] < STOPWORD_FREQ,
-                           word_freq.items())
+        word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items())

        dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0]))
        words, _ = list(zip(*dictionary))