Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
cbabaa45
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
cbabaa45
编写于
8月 04, 2017
作者:
Y
Yancey1989
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
convert dataset into recordio format
上级
6512893b
变更
10
显示空白变更内容
内联
并排
Showing
10 changed file
with
46 addition
and
26 deletion
+46
-26
python/paddle/v2/dataset/cifar.py
python/paddle/v2/dataset/cifar.py
+4
-4
python/paddle/v2/dataset/common.py
python/paddle/v2/dataset/common.py
+24
-6
python/paddle/v2/dataset/conll05.py
python/paddle/v2/dataset/conll05.py
+2
-2
python/paddle/v2/dataset/imdb.py
python/paddle/v2/dataset/imdb.py
+2
-2
python/paddle/v2/dataset/imikolov.py
python/paddle/v2/dataset/imikolov.py
+3
-2
python/paddle/v2/dataset/mnist.py
python/paddle/v2/dataset/mnist.py
+2
-2
python/paddle/v2/dataset/movielens.py
python/paddle/v2/dataset/movielens.py
+2
-2
python/paddle/v2/dataset/sentiment.py
python/paddle/v2/dataset/sentiment.py
+2
-2
python/paddle/v2/dataset/uci_housing.py
python/paddle/v2/dataset/uci_housing.py
+2
-2
python/paddle/v2/dataset/wmt14.py
python/paddle/v2/dataset/wmt14.py
+3
-2
未找到文件。
python/paddle/v2/dataset/cifar.py
浏览文件 @
cbabaa45
...
@@ -133,7 +133,7 @@ def convert(path):
...
@@ -133,7 +133,7 @@ def convert(path):
"""
"""
Converts dataset to recordio format
Converts dataset to recordio format
"""
"""
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
train100
(),
10
,
"cifar_train100"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
train100
(),
10
00
,
"cifar_train100"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test100
(),
10
,
"cifar_test100"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test100
(),
10
00
,
"cifar_test100"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
train10
(),
10
,
"cifar_train10"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
train10
(),
10
00
,
"cifar_train10"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test10
(),
10
,
"cifar_test10"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test10
(),
10
00
,
"cifar_test10"
)
python/paddle/v2/dataset/common.py
浏览文件 @
cbabaa45
...
@@ -32,19 +32,24 @@ __all__ = [
...
@@ -32,19 +32,24 @@ __all__ = [
DATA_HOME
=
os
.
path
.
expanduser
(
'~/.cache/paddle/dataset'
)
DATA_HOME
=
os
.
path
.
expanduser
(
'~/.cache/paddle/dataset'
)
# When running unit tests, there could be multiple processes that
# When running unit tests, there could be multiple processes that
# trying to create DATA_HOME directory simultaneously, so we cannot
# trying to create DATA_HOME directory simultaneously, so we cannot
# use a if condition to check for the existence of the directory;
# use a if condition to check for the existence of the directory;
# instead, we use the filesystem as the synchronization mechanism by
# instead, we use the filesystem as the synchronization mechanism by
# catching returned errors.
# catching returned errors.
try
:
def
must_mkdirs
(
path
):
try
:
os
.
makedirs
(
DATA_HOME
)
os
.
makedirs
(
DATA_HOME
)
except
OSError
as
exc
:
except
OSError
as
exc
:
if
exc
.
errno
!=
errno
.
EEXIST
:
if
exc
.
errno
!=
errno
.
EEXIST
:
raise
raise
pass
pass
must_mkdirs
(
DATA_HOME
)
def
md5file
(
fname
):
def
md5file
(
fname
):
hash_md5
=
hashlib
.
md5
()
hash_md5
=
hashlib
.
md5
()
f
=
open
(
fname
,
"rb"
)
f
=
open
(
fname
,
"rb"
)
...
@@ -93,6 +98,19 @@ def fetch_all():
...
@@ -93,6 +98,19 @@ def fetch_all():
"fetch"
)()
"fetch"
)()
def
fetch_all_recordio
(
path
):
for
module_name
in
filter
(
lambda
x
:
not
x
.
startswith
(
"__"
),
dir
(
paddle
.
v2
.
dataset
)):
if
"convert"
in
dir
(
importlib
.
import_module
(
"paddle.v2.dataset.%s"
%
module_name
))
and
\
not
module_name
==
"common"
:
ds_path
=
os
.
path
.
join
(
path
,
module_name
)
must_mkdirs
(
ds_path
)
getattr
(
importlib
.
import_module
(
"paddle.v2.dataset.%s"
%
module_name
),
"convert"
)(
ds_path
)
def
split
(
reader
,
line_count
,
suffix
=
"%05d.pickle"
,
dumper
=
cPickle
.
dump
):
def
split
(
reader
,
line_count
,
suffix
=
"%05d.pickle"
,
dumper
=
cPickle
.
dump
):
"""
"""
you can call the function as:
you can call the function as:
...
...
python/paddle/v2/dataset/conll05.py
浏览文件 @
cbabaa45
...
@@ -233,5 +233,5 @@ def convert(path):
...
@@ -233,5 +233,5 @@ def convert(path):
"""
"""
Converts dataset to recordio format
Converts dataset to recordio format
"""
"""
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test
(),
10
,
"conl105_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test
(),
10
00
,
"conl105_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test
(),
10
,
"conl105_test"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test
(),
10
00
,
"conl105_test"
)
python/paddle/v2/dataset/imdb.py
浏览文件 @
cbabaa45
...
@@ -173,5 +173,5 @@ def convert(path):
...
@@ -173,5 +173,5 @@ def convert(path):
Converts dataset to recordio format
Converts dataset to recordio format
"""
"""
w
=
word_dict
()
w
=
word_dict
()
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
lambda
:
train
(
w
),
10
,
"imdb_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
lambda
:
train
(
w
),
10
00
,
"imdb_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
lambda
:
test
(
w
),
10
,
"imdb_test"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
lambda
:
test
(
w
),
10
00
,
"imdb_test"
)
python/paddle/v2/dataset/imikolov.py
浏览文件 @
cbabaa45
...
@@ -155,6 +155,7 @@ def convert(path):
...
@@ -155,6 +155,7 @@ def convert(path):
N
=
5
N
=
5
word_dict
=
build_dict
()
word_dict
=
build_dict
()
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
train
(
word_dict
,
N
),
10
,
"imikolov_train"
)
train
(
word_dict
,
N
),
1000
,
"imikolov_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test
(
word_dict
,
N
),
10
,
"imikolov_test"
)
test
(
word_dict
,
N
),
10
00
,
"imikolov_test"
)
python/paddle/v2/dataset/mnist.py
浏览文件 @
cbabaa45
...
@@ -119,5 +119,5 @@ def convert(path):
...
@@ -119,5 +119,5 @@ def convert(path):
"""
"""
Converts dataset to recordio format
Converts dataset to recordio format
"""
"""
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
train
(),
10
,
"minist_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
train
(),
10
00
,
"minist_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test
(),
10
,
"minist_test"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test
(),
10
00
,
"minist_test"
)
python/paddle/v2/dataset/movielens.py
浏览文件 @
cbabaa45
...
@@ -254,8 +254,8 @@ def convert(path):
...
@@ -254,8 +254,8 @@ def convert(path):
"""
"""
Converts dataset to recordio format
Converts dataset to recordio format
"""
"""
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
train
(),
10
,
"movielens_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
train
(),
10
00
,
"movielens_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test
(),
10
,
"movielens_test"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test
(),
10
00
,
"movielens_test"
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
python/paddle/v2/dataset/sentiment.py
浏览文件 @
cbabaa45
...
@@ -137,5 +137,5 @@ def convert(path):
...
@@ -137,5 +137,5 @@ def convert(path):
"""
"""
Converts dataset to recordio format
Converts dataset to recordio format
"""
"""
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
train
,
10
,
"sentiment_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
train
,
10
00
,
"sentiment_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test
,
10
,
"sentiment_test"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test
,
10
00
,
"sentiment_test"
)
python/paddle/v2/dataset/uci_housing.py
浏览文件 @
cbabaa45
...
@@ -119,5 +119,5 @@ def convert(path):
...
@@ -119,5 +119,5 @@ def convert(path):
"""
"""
Converts dataset to recordio format
Converts dataset to recordio format
"""
"""
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
train
(),
10
,
"uci_housing_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
train
(),
10
00
,
"uci_housing_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test
(),
10
,
"uci_houseing_test"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test
(),
10
00
,
"uci_houseing_test"
)
python/paddle/v2/dataset/wmt14.py
浏览文件 @
cbabaa45
...
@@ -169,5 +169,6 @@ def convert(path):
...
@@ -169,5 +169,6 @@ def convert(path):
Converts dataset to recordio format
Converts dataset to recordio format
"""
"""
dict_size
=
30000
dict_size
=
30000
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
train
(
dict_size
),
10
,
"wmt14_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test
(
dict_size
),
10
,
"wmt14_test"
)
train
(
dict_size
),
1000
,
"wmt14_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test
(
dict_size
),
1000
,
"wmt14_test"
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录