提交 e8493620 编写于 作者: M minqiyang

Remove the overfix of print function in dataset/ folder

上级 4bf3c8c5
......@@ -74,13 +74,13 @@ def download(url, module_name, md5sum, save_name=None):
retry_limit = 3
while not (os.path.exists(filename) and md5file(filename) == md5sum):
if os.path.exists(filename):
print(("file md5", md5file(filename), md5sum))
print("file md5", md5file(filename), md5sum)
if retry < retry_limit:
retry += 1
else:
raise RuntimeError("Cannot download {0} within retry limit {1}".
format(url, retry_limit))
print(("Cache file %s not found, downloading %s" % (filename, url)))
print("Cache file %s not found, downloading %s" % (filename, url))
r = requests.get(url, stream=True)
total_length = r.headers.get('content-length')
......@@ -189,7 +189,7 @@ def cluster_files_reader(files_pattern,
my_file_list = []
for idx, fn in enumerate(file_list):
if idx % trainer_count == trainer_id:
print(("append file: %s" % fn))
print("append file: %s" % fn)
my_file_list.append(fn)
for fn in my_file_list:
with open(fn, "r") as f:
......
......@@ -16,7 +16,7 @@ Movielens 1-M dataset.
Movielens 1-M dataset contains 1 million ratings from 6000 users on 4000
movies, which was collected by GroupLens Research. This module will download
Movielens 1-M dataset from
Movielens 1-M dataset from
http://files.grouplens.org/datasets/movielens/ml-1m.zip and parse training
set and test set into paddle reader creators.
......@@ -243,7 +243,7 @@ def unittest():
for test_count, _ in enumerate(test()()):
pass
print((train_count, test_count))
print(train_count, test_count)
def fetch():
......
......@@ -53,7 +53,7 @@ class Query(object):
----------
query_id : int
query_id in dataset, mapping from query to relevance documents
relevance_score : int
relevance_score : int
relevance score of query and document pair
feature_vector : array, dense feature
feature in vector format
......@@ -92,7 +92,7 @@ class Query(object):
sys.stdout.write("expect 48 space split parts, get %d" %
(len(parts)))
return None
# format : 0 qid:10 1:0.000272 2:0.000000 ....
# format : 0 qid:10 1:0.000272 2:0.000000 ....
self.relevance_score = int(parts[0])
self.query_id = int(parts[1].split(':')[1])
for p in parts[2:]:
......@@ -295,7 +295,7 @@ def __reader__(filepath, format="pairwise", shuffle=False, fill_missing=-1):
--------
filename : string
fill_missing : fill the missing value. default in MQ2007 is -1
Returns
------
yield
......@@ -330,4 +330,4 @@ if __name__ == "__main__":
mytest = functools.partial(
__reader__, filepath="MQ2007/MQ2007/Fold1/sample", format="listwise")
for label, query in mytest():
print((label, query))
print(label, query)
......@@ -47,7 +47,7 @@ def download_data_if_not_yet():
nltk.download(
'movie_reviews', download_dir=paddle.dataset.common.DATA_HOME)
print("Download data set success.....")
print(("Path is " + nltk.data.find('corpora/movie_reviews').path))
print("Path is " + nltk.data.find('corpora/movie_reviews').path)
def get_word_dict():
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册