Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
e915aa9c
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
e915aa9c
编写于
6月 26, 2017
作者:
Y
Your Name
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix bugs
上级
97270b9f
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
21 addition
and
20 deletion
+21
-20
python/paddle/v2/dataset/cifar.py
python/paddle/v2/dataset/cifar.py
+1
-1
python/paddle/v2/dataset/conll05.py
python/paddle/v2/dataset/conll05.py
+2
-2
python/paddle/v2/dataset/imdb.py
python/paddle/v2/dataset/imdb.py
+5
-7
python/paddle/v2/dataset/imikolov.py
python/paddle/v2/dataset/imikolov.py
+7
-5
python/paddle/v2/dataset/mnist.py
python/paddle/v2/dataset/mnist.py
+1
-1
python/paddle/v2/dataset/movielens.py
python/paddle/v2/dataset/movielens.py
+2
-1
python/paddle/v2/dataset/sentiment.py
python/paddle/v2/dataset/sentiment.py
+1
-1
python/paddle/v2/dataset/uci_housing.py
python/paddle/v2/dataset/uci_housing.py
+1
-1
python/paddle/v2/dataset/wmt14.py
python/paddle/v2/dataset/wmt14.py
+1
-1
未找到文件。
python/paddle/v2/dataset/cifar.py
浏览文件 @
e915aa9c
...
...
@@ -34,7 +34,7 @@ import numpy
import
paddle.v2.dataset.common
import
tarfile
__all__
=
[
'train100'
,
'test100'
,
'train10'
,
'test10'
]
__all__
=
[
'train100'
,
'test100'
,
'train10'
,
'test10'
,
'convert'
]
URL_PREFIX
=
'https://www.cs.toronto.edu/~kriz/'
CIFAR10_URL
=
URL_PREFIX
+
'cifar-10-python.tar.gz'
...
...
python/paddle/v2/dataset/conll05.py
浏览文件 @
e915aa9c
...
...
@@ -25,7 +25,7 @@ import gzip
import
itertools
import
paddle.v2.dataset.common
__all__
=
[
'test, get_dict'
,
'get_embedding'
]
__all__
=
[
'test, get_dict'
,
'get_embedding'
,
'convert'
]
DATA_URL
=
'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'
DATA_MD5
=
'387719152ae52d60422c016e92a742fc'
...
...
@@ -229,7 +229,7 @@ def fetch():
paddle
.
v2
.
dataset
.
common
.
download
(
DATA_URL
,
'conll05st'
,
DATA_MD5
)
def
convert
():
def
convert
(
path
):
"""
Converts dataset to recordio format
"""
...
...
python/paddle/v2/dataset/imdb.py
浏览文件 @
e915aa9c
...
...
@@ -28,7 +28,7 @@ import re
import
string
import
threading
__all__
=
[
'build_dict'
,
'train'
,
'test'
]
__all__
=
[
'build_dict'
,
'train'
,
'test'
,
'convert'
]
URL
=
'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz'
MD5
=
'7c2ac02c03563afcf9b574c7e56c153a'
...
...
@@ -168,12 +168,10 @@ def fetch():
paddle
.
v2
.
dataset
.
common
.
download
(
URL
,
'imdb'
,
MD5
)
def
convert
():
def
convert
(
path
):
"""
Converts dataset to recordio format
"""
word_dict
=
ds
.
imdb
.
word_dict
()
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
lambda
:
train
(
word_dict
),
10
,
"imdb_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
lambda
:
test
(
word_dict
),
10
,
"imdb_test"
)
w
=
word_dict
()
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
lambda
:
train
(
w
),
10
,
"imdb_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
lambda
:
test
(
w
),
10
,
"imdb_test"
)
python/paddle/v2/dataset/imikolov.py
浏览文件 @
e915aa9c
...
...
@@ -18,11 +18,11 @@ This module will download dataset from
http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set
into paddle reader creators.
"""
import
paddle.v2.dataset.common
as
common
import
paddle.v2.dataset.common
import
collections
import
tarfile
__all__
=
[
'train'
,
'test'
,
'build_dict'
]
__all__
=
[
'train'
,
'test'
,
'build_dict'
,
'convert'
]
URL
=
'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz'
MD5
=
'30177ea32e27c525793142b6bf2c8e2d'
...
...
@@ -145,7 +145,7 @@ def test(word_idx, n, data_type=DataType.NGRAM):
def
fetch
():
common
.
download
(
URL
,
"imikolov"
,
MD5
)
paddle
.
v2
.
dataset
.
common
.
download
(
URL
,
"imikolov"
,
MD5
)
def
convert
(
path
):
...
...
@@ -154,5 +154,7 @@ def convert(path):
"""
N
=
5
word_dict
=
build_dict
()
common
.
convert
(
path
,
train
(
word_dict
,
N
),
10
,
"imikolov_train"
)
common
.
convert
(
path
,
test
(
word_dict
,
N
),
10
,
"imikolov_test"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
train
(
word_dict
,
N
),
10
,
"imikolov_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test
(
word_dict
,
N
),
10
,
"imikolov_test"
)
python/paddle/v2/dataset/mnist.py
浏览文件 @
e915aa9c
...
...
@@ -21,7 +21,7 @@ import paddle.v2.dataset.common
import
subprocess
import
numpy
import
platform
__all__
=
[
'train'
,
'test'
]
__all__
=
[
'train'
,
'test'
,
'convert'
]
URL_PREFIX
=
'http://yann.lecun.com/exdb/mnist/'
TEST_IMAGE_URL
=
URL_PREFIX
+
't10k-images-idx3-ubyte.gz'
...
...
python/paddle/v2/dataset/movielens.py
浏览文件 @
e915aa9c
...
...
@@ -30,7 +30,8 @@ import functools
__all__
=
[
'train'
,
'test'
,
'get_movie_title_dict'
,
'max_movie_id'
,
'max_user_id'
,
'age_table'
,
'movie_categories'
,
'max_job_id'
,
'user_info'
,
'movie_info'
'age_table'
,
'movie_categories'
,
'max_job_id'
,
'user_info'
,
'movie_info'
,
'convert'
]
age_table
=
[
1
,
18
,
25
,
35
,
45
,
50
,
56
]
...
...
python/paddle/v2/dataset/sentiment.py
浏览文件 @
e915aa9c
...
...
@@ -28,7 +28,7 @@ from nltk.corpus import movie_reviews
import
paddle.v2.dataset.common
__all__
=
[
'train'
,
'test'
,
'get_word_dict'
]
__all__
=
[
'train'
,
'test'
,
'get_word_dict'
,
'convert'
]
NUM_TRAINING_INSTANCES
=
1600
NUM_TOTAL_INSTANCES
=
2000
...
...
python/paddle/v2/dataset/uci_housing.py
浏览文件 @
e915aa9c
...
...
@@ -29,7 +29,7 @@ URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing
MD5
=
'd4accdce7a25600298819f8e28e8d593'
feature_names
=
[
'CRIM'
,
'ZN'
,
'INDUS'
,
'CHAS'
,
'NOX'
,
'RM'
,
'AGE'
,
'DIS'
,
'RAD'
,
'TAX'
,
'PTRATIO'
,
'B'
,
'LSTAT'
'PTRATIO'
,
'B'
,
'LSTAT'
,
'convert'
]
UCI_TRAIN_DATA
=
None
...
...
python/paddle/v2/dataset/wmt14.py
浏览文件 @
e915aa9c
...
...
@@ -25,7 +25,7 @@ import gzip
import
paddle.v2.dataset.common
from
paddle.v2.parameters
import
Parameters
__all__
=
[
'train'
,
'test'
,
'build_dict'
]
__all__
=
[
'train'
,
'test'
,
'build_dict'
,
'convert'
]
URL_DEV_TEST
=
'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz'
MD5_DEV_TEST
=
'7d7897317ddd8ba0ae5c5fa7248d3ff5'
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录