未验证 提交 0237b7e9 编写于 作者: D dzhwinter 提交者: GitHub

"remove random shuffle" (#7521)

上级 3bbff25b
...@@ -23,7 +23,6 @@ import paddle.v2.dataset ...@@ -23,7 +23,6 @@ import paddle.v2.dataset
import cPickle import cPickle
import glob import glob
import cPickle as pickle import cPickle as pickle
import random
__all__ = [ __all__ = [
'DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader', 'DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader',
...@@ -206,7 +205,6 @@ def convert(output_path, reader, line_count, name_prefix): ...@@ -206,7 +205,6 @@ def convert(output_path, reader, line_count, name_prefix):
indx_f = 0 indx_f = 0
def write_data(indx_f, lines): def write_data(indx_f, lines):
random.shuffle(lines)
filename = "%s/%s-%05d" % (output_path, name_prefix, indx_f) filename = "%s/%s-%05d" % (output_path, name_prefix, indx_f)
writer = recordio.writer(filename) writer = recordio.writer(filename)
for l in lines: for l in lines:
......
...@@ -25,7 +25,6 @@ import collections ...@@ -25,7 +25,6 @@ import collections
import tarfile import tarfile
import re import re
import string import string
import random
__all__ = ['build_dict', 'train', 'test', 'convert'] __all__ = ['build_dict', 'train', 'test', 'convert']
...@@ -83,7 +82,6 @@ def reader_creator(pos_pattern, neg_pattern, word_idx): ...@@ -83,7 +82,6 @@ def reader_creator(pos_pattern, neg_pattern, word_idx):
load(pos_pattern, INS, 0) load(pos_pattern, INS, 0)
load(neg_pattern, INS, 1) load(neg_pattern, INS, 1)
random.shuffle(INS)
def reader(): def reader():
for doc, label in INS: for doc, label in INS:
......
...@@ -24,7 +24,6 @@ http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ20 ...@@ -24,7 +24,6 @@ http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ20
""" """
import os import os
import random
import functools import functools
import rarfile import rarfile
from common import download from common import download
...@@ -265,7 +264,7 @@ def query_filter(querylists): ...@@ -265,7 +264,7 @@ def query_filter(querylists):
return filter_query return filter_query
def load_from_text(filepath, shuffle=True, fill_missing=-1): def load_from_text(filepath, shuffle=False, fill_missing=-1):
""" """
parse data file into querys parse data file into querys
""" """
...@@ -287,17 +286,14 @@ def load_from_text(filepath, shuffle=True, fill_missing=-1): ...@@ -287,17 +286,14 @@ def load_from_text(filepath, shuffle=True, fill_missing=-1):
querylist._add_query(query) querylist._add_query(query)
if querylist is not None: if querylist is not None:
querylists.append(querylist) querylists.append(querylist)
if shuffle == True:
random.shuffle(querylists)
return querylists return querylists
def __reader__(filepath, format="pairwise", shuffle=True, fill_missing=-1): def __reader__(filepath, format="pairwise", shuffle=False, fill_missing=-1):
""" """
Parameters Parameters
-------- --------
filename : string filename : string
shuffle : shuffle query-doc pair under the same query
fill_missing : fill the missing value. default in MQ2007 is -1 fill_missing : fill the missing value. default in MQ2007 is -1
Returns Returns
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册