提交 e8493620 编写于 作者: M minqiyang

Remove the overfix of print function in dataset/ folder

上级 4bf3c8c5
...@@ -74,13 +74,13 @@ def download(url, module_name, md5sum, save_name=None): ...@@ -74,13 +74,13 @@ def download(url, module_name, md5sum, save_name=None):
retry_limit = 3 retry_limit = 3
while not (os.path.exists(filename) and md5file(filename) == md5sum): while not (os.path.exists(filename) and md5file(filename) == md5sum):
if os.path.exists(filename): if os.path.exists(filename):
print(("file md5", md5file(filename), md5sum)) print("file md5", md5file(filename), md5sum)
if retry < retry_limit: if retry < retry_limit:
retry += 1 retry += 1
else: else:
raise RuntimeError("Cannot download {0} within retry limit {1}". raise RuntimeError("Cannot download {0} within retry limit {1}".
format(url, retry_limit)) format(url, retry_limit))
print(("Cache file %s not found, downloading %s" % (filename, url))) print("Cache file %s not found, downloading %s" % (filename, url))
r = requests.get(url, stream=True) r = requests.get(url, stream=True)
total_length = r.headers.get('content-length') total_length = r.headers.get('content-length')
...@@ -189,7 +189,7 @@ def cluster_files_reader(files_pattern, ...@@ -189,7 +189,7 @@ def cluster_files_reader(files_pattern,
my_file_list = [] my_file_list = []
for idx, fn in enumerate(file_list): for idx, fn in enumerate(file_list):
if idx % trainer_count == trainer_id: if idx % trainer_count == trainer_id:
print(("append file: %s" % fn)) print("append file: %s" % fn)
my_file_list.append(fn) my_file_list.append(fn)
for fn in my_file_list: for fn in my_file_list:
with open(fn, "r") as f: with open(fn, "r") as f:
......
...@@ -16,7 +16,7 @@ Movielens 1-M dataset. ...@@ -16,7 +16,7 @@ Movielens 1-M dataset.
Movielens 1-M dataset contains 1 million ratings from 6000 users on 4000 Movielens 1-M dataset contains 1 million ratings from 6000 users on 4000
movies, which was collected by GroupLens Research. This module will download movies, which was collected by GroupLens Research. This module will download
Movielens 1-M dataset from Movielens 1-M dataset from
http://files.grouplens.org/datasets/movielens/ml-1m.zip and parse training http://files.grouplens.org/datasets/movielens/ml-1m.zip and parse training
set and test set into paddle reader creators. set and test set into paddle reader creators.
...@@ -243,7 +243,7 @@ def unittest(): ...@@ -243,7 +243,7 @@ def unittest():
for test_count, _ in enumerate(test()()): for test_count, _ in enumerate(test()()):
pass pass
print((train_count, test_count)) print(train_count, test_count)
def fetch(): def fetch():
......
...@@ -53,7 +53,7 @@ class Query(object): ...@@ -53,7 +53,7 @@ class Query(object):
---------- ----------
query_id : int query_id : int
query_id in dataset, mapping from query to relevance documents query_id in dataset, mapping from query to relevance documents
relevance_score : int relevance_score : int
relevance score of query and document pair relevance score of query and document pair
feature_vector : array, dense feature feature_vector : array, dense feature
feature in vector format feature in vector format
...@@ -92,7 +92,7 @@ class Query(object): ...@@ -92,7 +92,7 @@ class Query(object):
sys.stdout.write("expect 48 space split parts, get %d" % sys.stdout.write("expect 48 space split parts, get %d" %
(len(parts))) (len(parts)))
return None return None
# format : 0 qid:10 1:0.000272 2:0.000000 .... # format : 0 qid:10 1:0.000272 2:0.000000 ....
self.relevance_score = int(parts[0]) self.relevance_score = int(parts[0])
self.query_id = int(parts[1].split(':')[1]) self.query_id = int(parts[1].split(':')[1])
for p in parts[2:]: for p in parts[2:]:
...@@ -295,7 +295,7 @@ def __reader__(filepath, format="pairwise", shuffle=False, fill_missing=-1): ...@@ -295,7 +295,7 @@ def __reader__(filepath, format="pairwise", shuffle=False, fill_missing=-1):
-------- --------
filename : string filename : string
fill_missing : fill the missing value. default in MQ2007 is -1 fill_missing : fill the missing value. default in MQ2007 is -1
Returns Returns
------ ------
yield yield
...@@ -330,4 +330,4 @@ if __name__ == "__main__": ...@@ -330,4 +330,4 @@ if __name__ == "__main__":
mytest = functools.partial( mytest = functools.partial(
__reader__, filepath="MQ2007/MQ2007/Fold1/sample", format="listwise") __reader__, filepath="MQ2007/MQ2007/Fold1/sample", format="listwise")
for label, query in mytest(): for label, query in mytest():
print((label, query)) print(label, query)
...@@ -47,7 +47,7 @@ def download_data_if_not_yet(): ...@@ -47,7 +47,7 @@ def download_data_if_not_yet():
nltk.download( nltk.download(
'movie_reviews', download_dir=paddle.dataset.common.DATA_HOME) 'movie_reviews', download_dir=paddle.dataset.common.DATA_HOME)
print("Download data set success.....") print("Download data set success.....")
print(("Path is " + nltk.data.find('corpora/movie_reviews').path)) print("Path is " + nltk.data.find('corpora/movie_reviews').path)
def get_word_dict(): def get_word_dict():
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册