diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py index 37c4296f9bcea7e16daa46f778934331513c30c4..00c2a3b9928d1ca5f3e8cd5e87ba7ad4108e9dad 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/v2/dataset/imdb.py @@ -124,7 +124,7 @@ def test(word_idx): re.compile("aclImdb/test/neg/.*\.txt$"), word_idx) -def word_dict(): +def word_dict(cutoff=150): """ Build a word dictionary from the corpus. @@ -132,7 +132,7 @@ def word_dict(): :rtype: dict """ return build_dict( - re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150) + re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), cutoff) def fetch():