Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
807c7a47
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
807c7a47
编写于
8月 26, 2019
作者:
Z
Zeng Jinle
提交者:
GitHub
8月 26, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove recordio convert in dataset, test=develop (#19387)
上级
11070cbf
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
8 addition
and
135 deletion
+8
-135
python/paddle/dataset/cifar.py
python/paddle/dataset/cifar.py
+1
-11
python/paddle/dataset/common.py
python/paddle/dataset/common.py
+0
-14
python/paddle/dataset/conll05.py
python/paddle/dataset/conll05.py
+1
-9
python/paddle/dataset/imdb.py
python/paddle/dataset/imdb.py
+1
-10
python/paddle/dataset/imikolov.py
python/paddle/dataset/imikolov.py
+1
-13
python/paddle/dataset/mnist.py
python/paddle/dataset/mnist.py
+1
-9
python/paddle/dataset/movielens.py
python/paddle/dataset/movielens.py
+1
-10
python/paddle/dataset/sentiment.py
python/paddle/dataset/sentiment.py
+1
-9
python/paddle/dataset/uci_housing.py
python/paddle/dataset/uci_housing.py
+1
-9
python/paddle/dataset/wmt14.py
python/paddle/dataset/wmt14.py
+0
-10
python/paddle/dataset/wmt16.py
python/paddle/dataset/wmt16.py
+0
-31
未找到文件。
python/paddle/dataset/cifar.py
浏览文件 @
807c7a47
...
@@ -37,7 +37,7 @@ import tarfile
...
@@ -37,7 +37,7 @@ import tarfile
import
six
import
six
from
six.moves
import
cPickle
as
pickle
from
six.moves
import
cPickle
as
pickle
__all__
=
[
'train100'
,
'test100'
,
'train10'
,
'test10'
,
'convert'
]
__all__
=
[
'train100'
,
'test100'
,
'train10'
,
'test10'
]
URL_PREFIX
=
'https://www.cs.toronto.edu/~kriz/'
URL_PREFIX
=
'https://www.cs.toronto.edu/~kriz/'
CIFAR10_URL
=
URL_PREFIX
+
'cifar-10-python.tar.gz'
CIFAR10_URL
=
URL_PREFIX
+
'cifar-10-python.tar.gz'
...
@@ -144,13 +144,3 @@ def test10(cycle=False):
...
@@ -144,13 +144,3 @@ def test10(cycle=False):
def
fetch
():
def
fetch
():
paddle
.
dataset
.
common
.
download
(
CIFAR10_URL
,
'cifar'
,
CIFAR10_MD5
)
paddle
.
dataset
.
common
.
download
(
CIFAR10_URL
,
'cifar'
,
CIFAR10_MD5
)
paddle
.
dataset
.
common
.
download
(
CIFAR100_URL
,
'cifar'
,
CIFAR100_MD5
)
paddle
.
dataset
.
common
.
download
(
CIFAR100_URL
,
'cifar'
,
CIFAR100_MD5
)
def
convert
(
path
):
"""
Converts dataset to recordio format
"""
paddle
.
dataset
.
common
.
convert
(
path
,
train100
(),
1000
,
"cifar_train100"
)
paddle
.
dataset
.
common
.
convert
(
path
,
test100
(),
1000
,
"cifar_test100"
)
paddle
.
dataset
.
common
.
convert
(
path
,
train10
(),
1000
,
"cifar_train10"
)
paddle
.
dataset
.
common
.
convert
(
path
,
test10
(),
1000
,
"cifar_test10"
)
python/paddle/dataset/common.py
浏览文件 @
807c7a47
...
@@ -119,20 +119,6 @@ def fetch_all():
...
@@ -119,20 +119,6 @@ def fetch_all():
"fetch"
)()
"fetch"
)()
def
fetch_all_recordio
(
path
):
for
module_name
in
[
x
for
x
in
dir
(
paddle
.
dataset
)
if
not
x
.
startswith
(
"__"
)
]:
if
"convert"
in
dir
(
importlib
.
import_module
(
"paddle.dataset.%s"
%
module_name
))
and
\
not
module_name
==
"common"
:
ds_path
=
os
.
path
.
join
(
path
,
module_name
)
must_mkdirs
(
ds_path
)
getattr
(
importlib
.
import_module
(
"paddle.dataset.%s"
%
module_name
),
"convert"
)(
ds_path
)
def
split
(
reader
,
line_count
,
suffix
=
"%05d.pickle"
,
dumper
=
pickle
.
dump
):
def
split
(
reader
,
line_count
,
suffix
=
"%05d.pickle"
,
dumper
=
pickle
.
dump
):
"""
"""
you can call the function as:
you can call the function as:
...
...
python/paddle/dataset/conll05.py
浏览文件 @
807c7a47
...
@@ -29,7 +29,7 @@ import paddle.dataset.common
...
@@ -29,7 +29,7 @@ import paddle.dataset.common
import
paddle.compat
as
cpt
import
paddle.compat
as
cpt
from
six.moves
import
zip
,
range
from
six.moves
import
zip
,
range
__all__
=
[
'test, get_dict'
,
'get_embedding'
,
'convert'
]
__all__
=
[
'test, get_dict'
,
'get_embedding'
]
DATA_URL
=
'http://paddlemodels.bj.bcebos.com/conll05st/conll05st-tests.tar.gz'
DATA_URL
=
'http://paddlemodels.bj.bcebos.com/conll05st/conll05st-tests.tar.gz'
DATA_MD5
=
'387719152ae52d60422c016e92a742fc'
DATA_MD5
=
'387719152ae52d60422c016e92a742fc'
...
@@ -248,11 +248,3 @@ def fetch():
...
@@ -248,11 +248,3 @@ def fetch():
paddle
.
dataset
.
common
.
download
(
TRGDICT_URL
,
'conll05st'
,
TRGDICT_MD5
)
paddle
.
dataset
.
common
.
download
(
TRGDICT_URL
,
'conll05st'
,
TRGDICT_MD5
)
paddle
.
dataset
.
common
.
download
(
EMB_URL
,
'conll05st'
,
EMB_MD5
)
paddle
.
dataset
.
common
.
download
(
EMB_URL
,
'conll05st'
,
EMB_MD5
)
paddle
.
dataset
.
common
.
download
(
DATA_URL
,
'conll05st'
,
DATA_MD5
)
paddle
.
dataset
.
common
.
download
(
DATA_URL
,
'conll05st'
,
DATA_MD5
)
def
convert
(
path
):
"""
Converts dataset to recordio format
"""
paddle
.
dataset
.
common
.
convert
(
path
,
test
(),
1000
,
"conl105_train"
)
paddle
.
dataset
.
common
.
convert
(
path
,
test
(),
1000
,
"conl105_test"
)
python/paddle/dataset/imdb.py
浏览文件 @
807c7a47
...
@@ -29,7 +29,7 @@ import re
...
@@ -29,7 +29,7 @@ import re
import
string
import
string
import
six
import
six
__all__
=
[
'build_dict'
,
'train'
,
'test'
,
'convert'
]
__all__
=
[
'build_dict'
,
'train'
,
'test'
]
URL
=
'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz'
URL
=
'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz'
MD5
=
'7c2ac02c03563afcf9b574c7e56c153a'
MD5
=
'7c2ac02c03563afcf9b574c7e56c153a'
...
@@ -140,12 +140,3 @@ def word_dict():
...
@@ -140,12 +140,3 @@ def word_dict():
def
fetch
():
def
fetch
():
paddle
.
dataset
.
common
.
download
(
URL
,
'imdb'
,
MD5
)
paddle
.
dataset
.
common
.
download
(
URL
,
'imdb'
,
MD5
)
def
convert
(
path
):
"""
Converts dataset to recordio format
"""
w
=
word_dict
()
paddle
.
dataset
.
common
.
convert
(
path
,
lambda
:
train
(
w
),
1000
,
"imdb_train"
)
paddle
.
dataset
.
common
.
convert
(
path
,
lambda
:
test
(
w
),
1000
,
"imdb_test"
)
python/paddle/dataset/imikolov.py
浏览文件 @
807c7a47
...
@@ -26,7 +26,7 @@ import collections
...
@@ -26,7 +26,7 @@ import collections
import
tarfile
import
tarfile
import
six
import
six
__all__
=
[
'train'
,
'test'
,
'build_dict'
,
'convert'
]
__all__
=
[
'train'
,
'test'
,
'build_dict'
]
URL
=
'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz'
URL
=
'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz'
MD5
=
'30177ea32e27c525793142b6bf2c8e2d'
MD5
=
'30177ea32e27c525793142b6bf2c8e2d'
...
@@ -152,15 +152,3 @@ def test(word_idx, n, data_type=DataType.NGRAM):
...
@@ -152,15 +152,3 @@ def test(word_idx, n, data_type=DataType.NGRAM):
def
fetch
():
def
fetch
():
paddle
.
dataset
.
common
.
download
(
URL
,
"imikolov"
,
MD5
)
paddle
.
dataset
.
common
.
download
(
URL
,
"imikolov"
,
MD5
)
def
convert
(
path
):
"""
Converts dataset to recordio format
"""
N
=
5
word_dict
=
build_dict
()
paddle
.
dataset
.
common
.
convert
(
path
,
train
(
word_dict
,
N
),
1000
,
"imikolov_train"
)
paddle
.
dataset
.
common
.
convert
(
path
,
test
(
word_dict
,
N
),
1000
,
"imikolov_test"
)
python/paddle/dataset/mnist.py
浏览文件 @
807c7a47
...
@@ -25,7 +25,7 @@ import gzip
...
@@ -25,7 +25,7 @@ import gzip
import
numpy
import
numpy
import
struct
import
struct
from
six.moves
import
range
from
six.moves
import
range
__all__
=
[
'train'
,
'test'
,
'convert'
]
__all__
=
[
'train'
,
'test'
]
URL_PREFIX
=
'https://dataset.bj.bcebos.com/mnist/'
URL_PREFIX
=
'https://dataset.bj.bcebos.com/mnist/'
TEST_IMAGE_URL
=
URL_PREFIX
+
't10k-images-idx3-ubyte.gz'
TEST_IMAGE_URL
=
URL_PREFIX
+
't10k-images-idx3-ubyte.gz'
...
@@ -126,11 +126,3 @@ def fetch():
...
@@ -126,11 +126,3 @@ def fetch():
paddle
.
dataset
.
common
.
download
(
TRAIN_LABEL_URL
,
'mnist'
,
TRAIN_LABEL_MD5
)
paddle
.
dataset
.
common
.
download
(
TRAIN_LABEL_URL
,
'mnist'
,
TRAIN_LABEL_MD5
)
paddle
.
dataset
.
common
.
download
(
TEST_IMAGE_URL
,
'mnist'
,
TEST_IMAGE_MD5
)
paddle
.
dataset
.
common
.
download
(
TEST_IMAGE_URL
,
'mnist'
,
TEST_IMAGE_MD5
)
paddle
.
dataset
.
common
.
download
(
TEST_LABEL_URL
,
'mnist'
,
TEST_LABEL_MD5
)
paddle
.
dataset
.
common
.
download
(
TEST_LABEL_URL
,
'mnist'
,
TEST_LABEL_MD5
)
def
convert
(
path
):
"""
Converts dataset to recordio format
"""
paddle
.
dataset
.
common
.
convert
(
path
,
train
(),
1000
,
"minist_train"
)
paddle
.
dataset
.
common
.
convert
(
path
,
test
(),
1000
,
"minist_test"
)
python/paddle/dataset/movielens.py
浏览文件 @
807c7a47
...
@@ -35,8 +35,7 @@ import paddle.compat as cpt
...
@@ -35,8 +35,7 @@ import paddle.compat as cpt
__all__
=
[
__all__
=
[
'train'
,
'test'
,
'get_movie_title_dict'
,
'max_movie_id'
,
'max_user_id'
,
'train'
,
'test'
,
'get_movie_title_dict'
,
'max_movie_id'
,
'max_user_id'
,
'age_table'
,
'movie_categories'
,
'max_job_id'
,
'user_info'
,
'movie_info'
,
'age_table'
,
'movie_categories'
,
'max_job_id'
,
'user_info'
,
'movie_info'
'convert'
]
]
age_table
=
[
1
,
18
,
25
,
35
,
45
,
50
,
56
]
age_table
=
[
1
,
18
,
25
,
35
,
45
,
50
,
56
]
...
@@ -259,13 +258,5 @@ def fetch():
...
@@ -259,13 +258,5 @@ def fetch():
paddle
.
dataset
.
common
.
download
(
URL
,
"movielens"
,
MD5
)
paddle
.
dataset
.
common
.
download
(
URL
,
"movielens"
,
MD5
)
def
convert
(
path
):
"""
Converts dataset to recordio format
"""
paddle
.
dataset
.
common
.
convert
(
path
,
train
(),
1000
,
"movielens_train"
)
paddle
.
dataset
.
common
.
convert
(
path
,
test
(),
1000
,
"movielens_test"
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
()
unittest
()
python/paddle/dataset/sentiment.py
浏览文件 @
807c7a47
...
@@ -31,7 +31,7 @@ from nltk.corpus import movie_reviews
...
@@ -31,7 +31,7 @@ from nltk.corpus import movie_reviews
import
paddle.dataset.common
import
paddle.dataset.common
__all__
=
[
'train'
,
'test'
,
'get_word_dict'
,
'convert'
]
__all__
=
[
'train'
,
'test'
,
'get_word_dict'
]
NUM_TRAINING_INSTANCES
=
1600
NUM_TRAINING_INSTANCES
=
1600
NUM_TOTAL_INSTANCES
=
2000
NUM_TOTAL_INSTANCES
=
2000
...
@@ -134,11 +134,3 @@ def test():
...
@@ -134,11 +134,3 @@ def test():
def
fetch
():
def
fetch
():
nltk
.
download
(
'movie_reviews'
,
download_dir
=
paddle
.
dataset
.
common
.
DATA_HOME
)
nltk
.
download
(
'movie_reviews'
,
download_dir
=
paddle
.
dataset
.
common
.
DATA_HOME
)
def
convert
(
path
):
"""
Converts dataset to recordio format
"""
paddle
.
dataset
.
common
.
convert
(
path
,
train
,
1000
,
"sentiment_train"
)
paddle
.
dataset
.
common
.
convert
(
path
,
test
,
1000
,
"sentiment_test"
)
python/paddle/dataset/uci_housing.py
浏览文件 @
807c7a47
...
@@ -34,7 +34,7 @@ URL = 'http://paddlemodels.bj.bcebos.com/uci_housing/housing.data'
...
@@ -34,7 +34,7 @@ URL = 'http://paddlemodels.bj.bcebos.com/uci_housing/housing.data'
MD5
=
'd4accdce7a25600298819f8e28e8d593'
MD5
=
'd4accdce7a25600298819f8e28e8d593'
feature_names
=
[
feature_names
=
[
'CRIM'
,
'ZN'
,
'INDUS'
,
'CHAS'
,
'NOX'
,
'RM'
,
'AGE'
,
'DIS'
,
'RAD'
,
'TAX'
,
'CRIM'
,
'ZN'
,
'INDUS'
,
'CHAS'
,
'NOX'
,
'RM'
,
'AGE'
,
'DIS'
,
'RAD'
,
'TAX'
,
'PTRATIO'
,
'B'
,
'LSTAT'
,
'convert'
'PTRATIO'
,
'B'
,
'LSTAT'
]
]
UCI_TRAIN_DATA
=
None
UCI_TRAIN_DATA
=
None
...
@@ -147,11 +147,3 @@ def predict_reader():
...
@@ -147,11 +147,3 @@ def predict_reader():
def
fetch
():
def
fetch
():
paddle
.
dataset
.
common
.
download
(
URL
,
'uci_housing'
,
MD5
)
paddle
.
dataset
.
common
.
download
(
URL
,
'uci_housing'
,
MD5
)
def
convert
(
path
):
"""
Converts dataset to recordio format
"""
paddle
.
dataset
.
common
.
convert
(
path
,
train
(),
1000
,
"uci_housing_train"
)
paddle
.
dataset
.
common
.
convert
(
path
,
test
(),
1000
,
"uci_houseing_test"
)
python/paddle/dataset/wmt14.py
浏览文件 @
807c7a47
...
@@ -33,7 +33,6 @@ __all__ = [
...
@@ -33,7 +33,6 @@ __all__ = [
'train'
,
'train'
,
'test'
,
'test'
,
'get_dict'
,
'get_dict'
,
'convert'
,
]
]
URL_DEV_TEST
=
(
'http://www-lium.univ-lemans.fr/~schwenk/'
URL_DEV_TEST
=
(
'http://www-lium.univ-lemans.fr/~schwenk/'
...
@@ -167,12 +166,3 @@ def get_dict(dict_size, reverse=True):
...
@@ -167,12 +166,3 @@ def get_dict(dict_size, reverse=True):
def
fetch
():
def
fetch
():
paddle
.
dataset
.
common
.
download
(
URL_TRAIN
,
'wmt14'
,
MD5_TRAIN
)
paddle
.
dataset
.
common
.
download
(
URL_TRAIN
,
'wmt14'
,
MD5_TRAIN
)
paddle
.
dataset
.
common
.
download
(
URL_MODEL
,
'wmt14'
,
MD5_MODEL
)
paddle
.
dataset
.
common
.
download
(
URL_MODEL
,
'wmt14'
,
MD5_MODEL
)
def
convert
(
path
):
"""
Converts dataset to recordio format
"""
dict_size
=
30000
paddle
.
dataset
.
common
.
convert
(
path
,
train
(
dict_size
),
1000
,
"wmt14_train"
)
paddle
.
dataset
.
common
.
convert
(
path
,
test
(
dict_size
),
1000
,
"wmt14_test"
)
python/paddle/dataset/wmt16.py
浏览文件 @
807c7a47
...
@@ -43,7 +43,6 @@ __all__ = [
...
@@ -43,7 +43,6 @@ __all__ = [
"train"
,
"train"
,
"test"
,
"test"
,
"validation"
,
"validation"
,
"convert"
,
"fetch"
,
"fetch"
,
"get_dict"
,
"get_dict"
,
]
]
...
@@ -325,33 +324,3 @@ def fetch():
...
@@ -325,33 +324,3 @@ def fetch():
"""
"""
paddle
.
v4
.
dataset
.
common
.
download
(
DATA_URL
,
"wmt16"
,
DATA_MD5
,
paddle
.
v4
.
dataset
.
common
.
download
(
DATA_URL
,
"wmt16"
,
DATA_MD5
,
"wmt16.tar.gz"
)
"wmt16.tar.gz"
)
def
convert
(
path
,
src_dict_size
,
trg_dict_size
,
src_lang
):
"""Converts dataset to recordio format.
"""
paddle
.
dataset
.
common
.
convert
(
path
,
train
(
src_dict_size
=
src_dict_size
,
trg_dict_size
=
trg_dict_size
,
src_lang
=
src_lang
),
1000
,
"wmt16_train"
)
paddle
.
dataset
.
common
.
convert
(
path
,
test
(
src_dict_size
=
src_dict_size
,
trg_dict_size
=
trg_dict_size
,
src_lang
=
src_lang
),
1000
,
"wmt16_test"
)
paddle
.
dataset
.
common
.
convert
(
path
,
validation
(
src_dict_size
=
src_dict_size
,
trg_dict_size
=
trg_dict_size
,
src_lang
=
src_lang
),
1000
,
"wmt16_validation"
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录