Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
e8493620
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e8493620
编写于
7月 26, 2018
作者:
M
minqiyang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Remove the overfix of print function in dataset/ folder
上级
4bf3c8c5
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
10 addition
and
10 deletion
+10
-10
python/paddle/dataset/common.py
python/paddle/dataset/common.py
+3
-3
python/paddle/dataset/movielens.py
python/paddle/dataset/movielens.py
+2
-2
python/paddle/dataset/mq2007.py
python/paddle/dataset/mq2007.py
+4
-4
python/paddle/dataset/sentiment.py
python/paddle/dataset/sentiment.py
+1
-1
未找到文件。
python/paddle/dataset/common.py
浏览文件 @
e8493620
...
@@ -74,13 +74,13 @@ def download(url, module_name, md5sum, save_name=None):
...
@@ -74,13 +74,13 @@ def download(url, module_name, md5sum, save_name=None):
retry_limit
=
3
retry_limit
=
3
while
not
(
os
.
path
.
exists
(
filename
)
and
md5file
(
filename
)
==
md5sum
):
while
not
(
os
.
path
.
exists
(
filename
)
and
md5file
(
filename
)
==
md5sum
):
if
os
.
path
.
exists
(
filename
):
if
os
.
path
.
exists
(
filename
):
print
(
(
"file md5"
,
md5file
(
filename
),
md5sum
)
)
print
(
"file md5"
,
md5file
(
filename
),
md5sum
)
if
retry
<
retry_limit
:
if
retry
<
retry_limit
:
retry
+=
1
retry
+=
1
else
:
else
:
raise
RuntimeError
(
"Cannot download {0} within retry limit {1}"
.
raise
RuntimeError
(
"Cannot download {0} within retry limit {1}"
.
format
(
url
,
retry_limit
))
format
(
url
,
retry_limit
))
print
(
(
"Cache file %s not found, downloading %s"
%
(
filename
,
url
)
))
print
(
"Cache file %s not found, downloading %s"
%
(
filename
,
url
))
r
=
requests
.
get
(
url
,
stream
=
True
)
r
=
requests
.
get
(
url
,
stream
=
True
)
total_length
=
r
.
headers
.
get
(
'content-length'
)
total_length
=
r
.
headers
.
get
(
'content-length'
)
...
@@ -189,7 +189,7 @@ def cluster_files_reader(files_pattern,
...
@@ -189,7 +189,7 @@ def cluster_files_reader(files_pattern,
my_file_list
=
[]
my_file_list
=
[]
for
idx
,
fn
in
enumerate
(
file_list
):
for
idx
,
fn
in
enumerate
(
file_list
):
if
idx
%
trainer_count
==
trainer_id
:
if
idx
%
trainer_count
==
trainer_id
:
print
(
(
"append file: %s"
%
fn
)
)
print
(
"append file: %s"
%
fn
)
my_file_list
.
append
(
fn
)
my_file_list
.
append
(
fn
)
for
fn
in
my_file_list
:
for
fn
in
my_file_list
:
with
open
(
fn
,
"r"
)
as
f
:
with
open
(
fn
,
"r"
)
as
f
:
...
...
python/paddle/dataset/movielens.py
浏览文件 @
e8493620
...
@@ -16,7 +16,7 @@ Movielens 1-M dataset.
...
@@ -16,7 +16,7 @@ Movielens 1-M dataset.
Movielens 1-M dataset contains 1 million ratings from 6000 users on 4000
Movielens 1-M dataset contains 1 million ratings from 6000 users on 4000
movies, which was collected by GroupLens Research. This module will download
movies, which was collected by GroupLens Research. This module will download
Movielens 1-M dataset from
Movielens 1-M dataset from
http://files.grouplens.org/datasets/movielens/ml-1m.zip and parse training
http://files.grouplens.org/datasets/movielens/ml-1m.zip and parse training
set and test set into paddle reader creators.
set and test set into paddle reader creators.
...
@@ -243,7 +243,7 @@ def unittest():
...
@@ -243,7 +243,7 @@ def unittest():
for
test_count
,
_
in
enumerate
(
test
()()):
for
test_count
,
_
in
enumerate
(
test
()()):
pass
pass
print
(
(
train_count
,
test_count
)
)
print
(
train_count
,
test_count
)
def
fetch
():
def
fetch
():
...
...
python/paddle/dataset/mq2007.py
浏览文件 @
e8493620
...
@@ -53,7 +53,7 @@ class Query(object):
...
@@ -53,7 +53,7 @@ class Query(object):
----------
----------
query_id : int
query_id : int
query_id in dataset, mapping from query to relevance documents
query_id in dataset, mapping from query to relevance documents
relevance_score : int
relevance_score : int
relevance score of query and document pair
relevance score of query and document pair
feature_vector : array, dense feature
feature_vector : array, dense feature
feature in vector format
feature in vector format
...
@@ -92,7 +92,7 @@ class Query(object):
...
@@ -92,7 +92,7 @@ class Query(object):
sys
.
stdout
.
write
(
"expect 48 space split parts, get %d"
%
sys
.
stdout
.
write
(
"expect 48 space split parts, get %d"
%
(
len
(
parts
)))
(
len
(
parts
)))
return
None
return
None
# format : 0 qid:10 1:0.000272 2:0.000000 ....
# format : 0 qid:10 1:0.000272 2:0.000000 ....
self
.
relevance_score
=
int
(
parts
[
0
])
self
.
relevance_score
=
int
(
parts
[
0
])
self
.
query_id
=
int
(
parts
[
1
].
split
(
':'
)[
1
])
self
.
query_id
=
int
(
parts
[
1
].
split
(
':'
)[
1
])
for
p
in
parts
[
2
:]:
for
p
in
parts
[
2
:]:
...
@@ -295,7 +295,7 @@ def __reader__(filepath, format="pairwise", shuffle=False, fill_missing=-1):
...
@@ -295,7 +295,7 @@ def __reader__(filepath, format="pairwise", shuffle=False, fill_missing=-1):
--------
--------
filename : string
filename : string
fill_missing : fill the missing value. default in MQ2007 is -1
fill_missing : fill the missing value. default in MQ2007 is -1
Returns
Returns
------
------
yield
yield
...
@@ -330,4 +330,4 @@ if __name__ == "__main__":
...
@@ -330,4 +330,4 @@ if __name__ == "__main__":
mytest
=
functools
.
partial
(
mytest
=
functools
.
partial
(
__reader__
,
filepath
=
"MQ2007/MQ2007/Fold1/sample"
,
format
=
"listwise"
)
__reader__
,
filepath
=
"MQ2007/MQ2007/Fold1/sample"
,
format
=
"listwise"
)
for
label
,
query
in
mytest
():
for
label
,
query
in
mytest
():
print
(
(
label
,
query
)
)
print
(
label
,
query
)
python/paddle/dataset/sentiment.py
浏览文件 @
e8493620
...
@@ -47,7 +47,7 @@ def download_data_if_not_yet():
...
@@ -47,7 +47,7 @@ def download_data_if_not_yet():
nltk
.
download
(
nltk
.
download
(
'movie_reviews'
,
download_dir
=
paddle
.
dataset
.
common
.
DATA_HOME
)
'movie_reviews'
,
download_dir
=
paddle
.
dataset
.
common
.
DATA_HOME
)
print
(
"Download data set success....."
)
print
(
"Download data set success....."
)
print
(
(
"Path is "
+
nltk
.
data
.
find
(
'corpora/movie_reviews'
).
path
)
)
print
(
"Path is "
+
nltk
.
data
.
find
(
'corpora/movie_reviews'
).
path
)
def
get_word_dict
():
def
get_word_dict
():
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录