diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py index 191d9ecfb127c1851a392bc9ec83734d630d0ac4..fab8a68b0beee8b813bee2a05047e2da526a9c9b 100644 --- a/python/paddle/v2/dataset/common.py +++ b/python/paddle/v2/dataset/common.py @@ -23,7 +23,6 @@ import paddle.v2.dataset import cPickle import glob import cPickle as pickle -import random __all__ = [ 'DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader', @@ -206,7 +205,6 @@ def convert(output_path, reader, line_count, name_prefix): indx_f = 0 def write_data(indx_f, lines): - random.shuffle(lines) filename = "%s/%s-%05d" % (output_path, name_prefix, indx_f) writer = recordio.writer(filename) for l in lines: diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py index 21ed7f7a5ce279f5bc65e5b008f14a1b0ff97343..37c4296f9bcea7e16daa46f778934331513c30c4 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/v2/dataset/imdb.py @@ -25,7 +25,6 @@ import collections import tarfile import re import string -import random __all__ = ['build_dict', 'train', 'test', 'convert'] @@ -83,7 +82,6 @@ def reader_creator(pos_pattern, neg_pattern, word_idx): load(pos_pattern, INS, 0) load(neg_pattern, INS, 1) - random.shuffle(INS) def reader(): for doc, label in INS: diff --git a/python/paddle/v2/dataset/mq2007.py b/python/paddle/v2/dataset/mq2007.py index b705c9109b2b6769c9fafa9241db5d81c682f9e3..d3b3dd524c34be660c5f2d4fc5ce2fa0420efbc1 100644 --- a/python/paddle/v2/dataset/mq2007.py +++ b/python/paddle/v2/dataset/mq2007.py @@ -24,7 +24,6 @@ http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ20 """ import os -import random import functools import rarfile from common import download @@ -265,7 +264,7 @@ def query_filter(querylists): return filter_query -def load_from_text(filepath, shuffle=True, fill_missing=-1): +def load_from_text(filepath, shuffle=False, fill_missing=-1): """ parse data file into querys """ @@ -287,17 +286,14 @@ def load_from_text(filepath, shuffle=True, fill_missing=-1): querylist._add_query(query) if querylist is not None: querylists.append(querylist) - if shuffle == True: - random.shuffle(querylists) return querylists -def __reader__(filepath, format="pairwise", shuffle=True, fill_missing=-1): +def __reader__(filepath, format="pairwise", shuffle=False, fill_missing=-1): """ Parameters -------- filename : string - shuffle : shuffle query-doc pair under the same query fill_missing : fill the missing value. default in MQ2007 is -1 Returns