Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
e915aa9c
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e915aa9c
编写于
6月 26, 2017
作者:
Y
Your Name
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix bugs
上级
97270b9f
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
21 addition
and
20 deletion
+21
-20
python/paddle/v2/dataset/cifar.py
python/paddle/v2/dataset/cifar.py
+1
-1
python/paddle/v2/dataset/conll05.py
python/paddle/v2/dataset/conll05.py
+2
-2
python/paddle/v2/dataset/imdb.py
python/paddle/v2/dataset/imdb.py
+5
-7
python/paddle/v2/dataset/imikolov.py
python/paddle/v2/dataset/imikolov.py
+7
-5
python/paddle/v2/dataset/mnist.py
python/paddle/v2/dataset/mnist.py
+1
-1
python/paddle/v2/dataset/movielens.py
python/paddle/v2/dataset/movielens.py
+2
-1
python/paddle/v2/dataset/sentiment.py
python/paddle/v2/dataset/sentiment.py
+1
-1
python/paddle/v2/dataset/uci_housing.py
python/paddle/v2/dataset/uci_housing.py
+1
-1
python/paddle/v2/dataset/wmt14.py
python/paddle/v2/dataset/wmt14.py
+1
-1
未找到文件。
python/paddle/v2/dataset/cifar.py
浏览文件 @
e915aa9c
...
@@ -34,7 +34,7 @@ import numpy
...
@@ -34,7 +34,7 @@ import numpy
import
paddle.v2.dataset.common
import
paddle.v2.dataset.common
import
tarfile
import
tarfile
__all__
=
[
'train100'
,
'test100'
,
'train10'
,
'test10'
]
__all__
=
[
'train100'
,
'test100'
,
'train10'
,
'test10'
,
'convert'
]
URL_PREFIX
=
'https://www.cs.toronto.edu/~kriz/'
URL_PREFIX
=
'https://www.cs.toronto.edu/~kriz/'
CIFAR10_URL
=
URL_PREFIX
+
'cifar-10-python.tar.gz'
CIFAR10_URL
=
URL_PREFIX
+
'cifar-10-python.tar.gz'
...
...
python/paddle/v2/dataset/conll05.py
浏览文件 @
e915aa9c
...
@@ -25,7 +25,7 @@ import gzip
...
@@ -25,7 +25,7 @@ import gzip
import
itertools
import
itertools
import
paddle.v2.dataset.common
import
paddle.v2.dataset.common
__all__
=
[
'test, get_dict'
,
'get_embedding'
]
__all__
=
[
'test, get_dict'
,
'get_embedding'
,
'convert'
]
DATA_URL
=
'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'
DATA_URL
=
'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'
DATA_MD5
=
'387719152ae52d60422c016e92a742fc'
DATA_MD5
=
'387719152ae52d60422c016e92a742fc'
...
@@ -229,7 +229,7 @@ def fetch():
...
@@ -229,7 +229,7 @@ def fetch():
paddle
.
v2
.
dataset
.
common
.
download
(
DATA_URL
,
'conll05st'
,
DATA_MD5
)
paddle
.
v2
.
dataset
.
common
.
download
(
DATA_URL
,
'conll05st'
,
DATA_MD5
)
def
convert
():
def
convert
(
path
):
"""
"""
Converts dataset to recordio format
Converts dataset to recordio format
"""
"""
...
...
python/paddle/v2/dataset/imdb.py
浏览文件 @
e915aa9c
...
@@ -28,7 +28,7 @@ import re
...
@@ -28,7 +28,7 @@ import re
import
string
import
string
import
threading
import
threading
__all__
=
[
'build_dict'
,
'train'
,
'test'
]
__all__
=
[
'build_dict'
,
'train'
,
'test'
,
'convert'
]
URL
=
'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz'
URL
=
'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz'
MD5
=
'7c2ac02c03563afcf9b574c7e56c153a'
MD5
=
'7c2ac02c03563afcf9b574c7e56c153a'
...
@@ -168,12 +168,10 @@ def fetch():
...
@@ -168,12 +168,10 @@ def fetch():
paddle
.
v2
.
dataset
.
common
.
download
(
URL
,
'imdb'
,
MD5
)
paddle
.
v2
.
dataset
.
common
.
download
(
URL
,
'imdb'
,
MD5
)
def
convert
():
def
convert
(
path
):
"""
"""
Converts dataset to recordio format
Converts dataset to recordio format
"""
"""
word_dict
=
ds
.
imdb
.
word_dict
()
w
=
word_dict
()
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
lambda
:
train
(
word_dict
),
10
,
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
lambda
:
train
(
w
),
10
,
"imdb_train"
)
"imdb_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
lambda
:
test
(
w
),
10
,
"imdb_test"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
lambda
:
test
(
word_dict
),
10
,
"imdb_test"
)
python/paddle/v2/dataset/imikolov.py
浏览文件 @
e915aa9c
...
@@ -18,11 +18,11 @@ This module will download dataset from
...
@@ -18,11 +18,11 @@ This module will download dataset from
http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set
http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set
into paddle reader creators.
into paddle reader creators.
"""
"""
import
paddle.v2.dataset.common
as
common
import
paddle.v2.dataset.common
import
collections
import
collections
import
tarfile
import
tarfile
__all__
=
[
'train'
,
'test'
,
'build_dict'
]
__all__
=
[
'train'
,
'test'
,
'build_dict'
,
'convert'
]
URL
=
'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz'
URL
=
'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz'
MD5
=
'30177ea32e27c525793142b6bf2c8e2d'
MD5
=
'30177ea32e27c525793142b6bf2c8e2d'
...
@@ -145,7 +145,7 @@ def test(word_idx, n, data_type=DataType.NGRAM):
...
@@ -145,7 +145,7 @@ def test(word_idx, n, data_type=DataType.NGRAM):
def
fetch
():
def
fetch
():
common
.
download
(
URL
,
"imikolov"
,
MD5
)
paddle
.
v2
.
dataset
.
common
.
download
(
URL
,
"imikolov"
,
MD5
)
def
convert
(
path
):
def
convert
(
path
):
...
@@ -154,5 +154,7 @@ def convert(path):
...
@@ -154,5 +154,7 @@ def convert(path):
"""
"""
N
=
5
N
=
5
word_dict
=
build_dict
()
word_dict
=
build_dict
()
common
.
convert
(
path
,
train
(
word_dict
,
N
),
10
,
"imikolov_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
common
.
convert
(
path
,
test
(
word_dict
,
N
),
10
,
"imikolov_test"
)
train
(
word_dict
,
N
),
10
,
"imikolov_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test
(
word_dict
,
N
),
10
,
"imikolov_test"
)
python/paddle/v2/dataset/mnist.py
浏览文件 @
e915aa9c
...
@@ -21,7 +21,7 @@ import paddle.v2.dataset.common
...
@@ -21,7 +21,7 @@ import paddle.v2.dataset.common
import
subprocess
import
subprocess
import
numpy
import
numpy
import
platform
import
platform
__all__
=
[
'train'
,
'test'
]
__all__
=
[
'train'
,
'test'
,
'convert'
]
URL_PREFIX
=
'http://yann.lecun.com/exdb/mnist/'
URL_PREFIX
=
'http://yann.lecun.com/exdb/mnist/'
TEST_IMAGE_URL
=
URL_PREFIX
+
't10k-images-idx3-ubyte.gz'
TEST_IMAGE_URL
=
URL_PREFIX
+
't10k-images-idx3-ubyte.gz'
...
...
python/paddle/v2/dataset/movielens.py
浏览文件 @
e915aa9c
...
@@ -30,7 +30,8 @@ import functools
...
@@ -30,7 +30,8 @@ import functools
__all__
=
[
__all__
=
[
'train'
,
'test'
,
'get_movie_title_dict'
,
'max_movie_id'
,
'max_user_id'
,
'train'
,
'test'
,
'get_movie_title_dict'
,
'max_movie_id'
,
'max_user_id'
,
'age_table'
,
'movie_categories'
,
'max_job_id'
,
'user_info'
,
'movie_info'
'age_table'
,
'movie_categories'
,
'max_job_id'
,
'user_info'
,
'movie_info'
,
'convert'
]
]
age_table
=
[
1
,
18
,
25
,
35
,
45
,
50
,
56
]
age_table
=
[
1
,
18
,
25
,
35
,
45
,
50
,
56
]
...
...
python/paddle/v2/dataset/sentiment.py
浏览文件 @
e915aa9c
...
@@ -28,7 +28,7 @@ from nltk.corpus import movie_reviews
...
@@ -28,7 +28,7 @@ from nltk.corpus import movie_reviews
import
paddle.v2.dataset.common
import
paddle.v2.dataset.common
__all__
=
[
'train'
,
'test'
,
'get_word_dict'
]
__all__
=
[
'train'
,
'test'
,
'get_word_dict'
,
'convert'
]
NUM_TRAINING_INSTANCES
=
1600
NUM_TRAINING_INSTANCES
=
1600
NUM_TOTAL_INSTANCES
=
2000
NUM_TOTAL_INSTANCES
=
2000
...
...
python/paddle/v2/dataset/uci_housing.py
浏览文件 @
e915aa9c
...
@@ -29,7 +29,7 @@ URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing
...
@@ -29,7 +29,7 @@ URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing
MD5
=
'd4accdce7a25600298819f8e28e8d593'
MD5
=
'd4accdce7a25600298819f8e28e8d593'
feature_names
=
[
feature_names
=
[
'CRIM'
,
'ZN'
,
'INDUS'
,
'CHAS'
,
'NOX'
,
'RM'
,
'AGE'
,
'DIS'
,
'RAD'
,
'TAX'
,
'CRIM'
,
'ZN'
,
'INDUS'
,
'CHAS'
,
'NOX'
,
'RM'
,
'AGE'
,
'DIS'
,
'RAD'
,
'TAX'
,
'PTRATIO'
,
'B'
,
'LSTAT'
'PTRATIO'
,
'B'
,
'LSTAT'
,
'convert'
]
]
UCI_TRAIN_DATA
=
None
UCI_TRAIN_DATA
=
None
...
...
python/paddle/v2/dataset/wmt14.py
浏览文件 @
e915aa9c
...
@@ -25,7 +25,7 @@ import gzip
...
@@ -25,7 +25,7 @@ import gzip
import
paddle.v2.dataset.common
import
paddle.v2.dataset.common
from
paddle.v2.parameters
import
Parameters
from
paddle.v2.parameters
import
Parameters
__all__
=
[
'train'
,
'test'
,
'build_dict'
]
__all__
=
[
'train'
,
'test'
,
'build_dict'
,
'convert'
]
URL_DEV_TEST
=
'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz'
URL_DEV_TEST
=
'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz'
MD5_DEV_TEST
=
'7d7897317ddd8ba0ae5c5fa7248d3ff5'
MD5_DEV_TEST
=
'7d7897317ddd8ba0ae5c5fa7248d3ff5'
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录