未验证 提交 0237b7e9 编写于 作者: D dzhwinter 提交者: GitHub

"remove random shuffle" (#7521)

上级 3bbff25b
......@@ -23,7 +23,6 @@ import paddle.v2.dataset
import cPickle
import glob
import cPickle as pickle
import random
__all__ = [
'DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader',
......@@ -206,7 +205,6 @@ def convert(output_path, reader, line_count, name_prefix):
indx_f = 0
def write_data(indx_f, lines):
random.shuffle(lines)
filename = "%s/%s-%05d" % (output_path, name_prefix, indx_f)
writer = recordio.writer(filename)
for l in lines:
......
......@@ -25,7 +25,6 @@ import collections
import tarfile
import re
import string
import random
__all__ = ['build_dict', 'train', 'test', 'convert']
......@@ -83,7 +82,6 @@ def reader_creator(pos_pattern, neg_pattern, word_idx):
load(pos_pattern, INS, 0)
load(neg_pattern, INS, 1)
random.shuffle(INS)
def reader():
for doc, label in INS:
......
......@@ -24,7 +24,6 @@ http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ20
"""
import os
import random
import functools
import rarfile
from common import download
......@@ -265,7 +264,7 @@ def query_filter(querylists):
return filter_query
def load_from_text(filepath, shuffle=True, fill_missing=-1):
def load_from_text(filepath, shuffle=False, fill_missing=-1):
"""
parse data file into querys
"""
......@@ -287,17 +286,14 @@ def load_from_text(filepath, shuffle=True, fill_missing=-1):
querylist._add_query(query)
if querylist is not None:
querylists.append(querylist)
if shuffle == True:
random.shuffle(querylists)
return querylists
def __reader__(filepath, format="pairwise", shuffle=True, fill_missing=-1):
def __reader__(filepath, format="pairwise", shuffle=False, fill_missing=-1):
"""
Parameters
--------
filename : string
shuffle : shuffle query-doc pair under the same query
fill_missing : fill the missing value. default in MQ2007 is -1
Returns
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册