Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
bfc33108
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
bfc33108
编写于
3月 24, 2017
作者:
Y
Yancey
提交者:
GitHub
3月 24, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1687 from Yancey1989/dataset_cache_api
Add download api for dataset
上级
0b59be2b
14eb5b8e
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
68 addition
and
19 deletion
+68
-19
python/paddle/v2/dataset/cifar.py
python/paddle/v2/dataset/cifar.py
+10
-10
python/paddle/v2/dataset/common.py
python/paddle/v2/dataset/common.py
+12
-0
python/paddle/v2/dataset/conll05.py
python/paddle/v2/dataset/conll05.py
+8
-0
python/paddle/v2/dataset/imdb.py
python/paddle/v2/dataset/imdb.py
+4
-0
python/paddle/v2/dataset/imikolov.py
python/paddle/v2/dataset/imikolov.py
+4
-0
python/paddle/v2/dataset/mnist.py
python/paddle/v2/dataset/mnist.py
+7
-0
python/paddle/v2/dataset/movielens.py
python/paddle/v2/dataset/movielens.py
+8
-4
python/paddle/v2/dataset/sentiment.py
python/paddle/v2/dataset/sentiment.py
+4
-0
python/paddle/v2/dataset/uci_housing.py
python/paddle/v2/dataset/uci_housing.py
+4
-0
python/paddle/v2/dataset/wmt14.py
python/paddle/v2/dataset/wmt14.py
+7
-5
未找到文件。
python/paddle/v2/dataset/cifar.py
浏览文件 @
bfc33108
...
...
@@ -20,7 +20,7 @@ TODO(yuyang18): Complete the comments.
import
cPickle
import
itertools
import
numpy
import
paddle.v2.dataset.common
from
common
import
download
import
tarfile
__all__
=
[
'train100'
,
'test100'
,
'train10'
,
'test10'
]
...
...
@@ -55,23 +55,23 @@ def reader_creator(filename, sub_name):
def
train100
():
return
reader_creator
(
paddle
.
v2
.
dataset
.
common
.
download
(
CIFAR100_URL
,
'cifar'
,
CIFAR100_MD5
),
'train'
)
download
(
CIFAR100_URL
,
'cifar'
,
CIFAR100_MD5
),
'train'
)
def
test100
():
return
reader_creator
(
paddle
.
v2
.
dataset
.
common
.
download
(
CIFAR100_URL
,
'cifar'
,
CIFAR100_MD5
),
'test'
)
return
reader_creator
(
download
(
CIFAR100_URL
,
'cifar'
,
CIFAR100_MD5
),
'test'
)
def
train10
():
return
reader_creator
(
paddle
.
v2
.
dataset
.
common
.
download
(
CIFAR10_URL
,
'cifar'
,
CIFAR10_MD5
),
'data_batch'
)
download
(
CIFAR10_URL
,
'cifar'
,
CIFAR10_MD5
),
'data_batch'
)
def
test10
():
return
reader_creator
(
paddle
.
v2
.
dataset
.
common
.
download
(
CIFAR10_URL
,
'cifar'
,
CIFAR10_MD5
),
'test_batch'
)
download
(
CIFAR10_URL
,
'cifar'
,
CIFAR10_MD5
),
'test_batch'
)
def
fetch
():
download
(
CIFAR10_URL
,
'cifar'
,
CIFAR10_MD5
)
download
(
CIFAR100_URL
,
'cifar'
,
CIFAR100_MD5
)
python/paddle/v2/dataset/common.py
浏览文件 @
bfc33108
...
...
@@ -17,6 +17,8 @@ import hashlib
import
os
import
shutil
import
sys
import
importlib
import
paddle.v2.dataset
__all__
=
[
'DATA_HOME'
,
'download'
,
'md5file'
]
...
...
@@ -69,3 +71,13 @@ def dict_add(a_dict, ele):
a_dict
[
ele
]
+=
1
else
:
a_dict
[
ele
]
=
1
def
fetch_all
():
for
module_name
in
filter
(
lambda
x
:
not
x
.
startswith
(
"__"
),
dir
(
paddle
.
v2
.
dataset
)):
if
"fetch"
in
dir
(
importlib
.
import_module
(
"paddle.v2.dataset.%s"
%
module_name
)):
getattr
(
importlib
.
import_module
(
"paddle.v2.dataset.%s"
%
module_name
),
"fetch"
)()
python/paddle/v2/dataset/conll05.py
浏览文件 @
bfc33108
...
...
@@ -196,3 +196,11 @@ def test():
words_name
=
'conll05st-release/test.wsj/words/test.wsj.words.gz'
,
props_name
=
'conll05st-release/test.wsj/props/test.wsj.props.gz'
)
return
reader_creator
(
reader
,
word_dict
,
verb_dict
,
label_dict
)
def
fetch
():
download
(
WORDDICT_URL
,
'conll05st'
,
WORDDICT_MD5
)
download
(
VERBDICT_URL
,
'conll05st'
,
VERBDICT_MD5
)
download
(
TRGDICT_URL
,
'conll05st'
,
TRGDICT_MD5
)
download
(
EMB_URL
,
'conll05st'
,
EMB_MD5
)
download
(
DATA_URL
,
'conll05st'
,
DATA_MD5
)
python/paddle/v2/dataset/imdb.py
浏览文件 @
bfc33108
...
...
@@ -123,3 +123,7 @@ def test(word_idx):
def
word_dict
():
return
build_dict
(
re
.
compile
(
"aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"
),
150
)
def
fetch
():
paddle
.
v2
.
dataset
.
common
.
download
(
URL
,
'imdb'
,
MD5
)
python/paddle/v2/dataset/imikolov.py
浏览文件 @
bfc33108
...
...
@@ -89,3 +89,7 @@ def train(word_idx, n):
def
test
(
word_idx
,
n
):
return
reader_creator
(
'./simple-examples/data/ptb.valid.txt'
,
word_idx
,
n
)
def
fetch
():
paddle
.
v2
.
dataset
.
common
.
download
(
URL
,
"imikolov"
,
MD5
)
python/paddle/v2/dataset/mnist.py
浏览文件 @
bfc33108
...
...
@@ -106,3 +106,10 @@ def test():
TEST_IMAGE_MD5
),
paddle
.
v2
.
dataset
.
common
.
download
(
TEST_LABEL_URL
,
'mnist'
,
TEST_LABEL_MD5
),
100
)
def
fetch
():
paddle
.
v2
.
dataset
.
common
.
download
(
TRAIN_IMAGE_URL
,
'mnist'
,
TRAIN_IMAGE_MD5
)
paddle
.
v2
.
dataset
.
common
.
download
(
TRAIN_LABEL_URL
,
'mnist'
,
TRAIN_LABEL_MD5
)
paddle
.
v2
.
dataset
.
common
.
download
(
TEST_IMAGE_URL
,
'mnist'
,
TEST_IMAGE_MD5
)
paddle
.
v2
.
dataset
.
common
.
download
(
TEST_LABEL_URL
,
'mnist'
,
TRAIN_LABEL_MD5
)
python/paddle/v2/dataset/movielens.py
浏览文件 @
bfc33108
...
...
@@ -30,6 +30,9 @@ __all__ = [
age_table
=
[
1
,
18
,
25
,
35
,
45
,
50
,
56
]
URL
=
'http://files.grouplens.org/datasets/movielens/ml-1m.zip'
MD5
=
'c4d9eecfca2ab87c1945afe126590906'
class
MovieInfo
(
object
):
def
__init__
(
self
,
index
,
categories
,
title
):
...
...
@@ -77,10 +80,7 @@ USER_INFO = None
def
__initialize_meta_info__
():
fn
=
download
(
url
=
'http://files.grouplens.org/datasets/movielens/ml-1m.zip'
,
module_name
=
'movielens'
,
md5sum
=
'c4d9eecfca2ab87c1945afe126590906'
)
fn
=
download
(
URL
,
"movielens"
,
MD5
)
global
MOVIE_INFO
if
MOVIE_INFO
is
None
:
pattern
=
re
.
compile
(
r
'^(.*)\((\d+)\)$'
)
...
...
@@ -205,5 +205,9 @@ def unittest():
print
train_count
,
test_count
def
fetch
():
download
(
URL
,
"movielens"
,
MD5
)
if
__name__
==
'__main__'
:
unittest
()
python/paddle/v2/dataset/sentiment.py
浏览文件 @
bfc33108
...
...
@@ -125,3 +125,7 @@ def test():
"""
data_set
=
load_sentiment_data
()
return
reader_creator
(
data_set
[
NUM_TRAINING_INSTANCES
:])
def
fetch
():
nltk
.
download
(
'movie_reviews'
,
download_dir
=
common
.
DATA_HOME
)
python/paddle/v2/dataset/uci_housing.py
浏览文件 @
bfc33108
...
...
@@ -89,3 +89,7 @@ def test():
yield
d
[:
-
1
],
d
[
-
1
:]
return
reader
def
fetch
():
download
(
URL
,
'uci_housing'
,
MD5
)
python/paddle/v2/dataset/wmt14.py
浏览文件 @
bfc33108
...
...
@@ -16,7 +16,7 @@ wmt14 dataset
"""
import
tarfile
import
paddle.v2.dataset.common
from
paddle.v2.dataset.common
import
download
__all__
=
[
'train'
,
'test'
,
'build_dict'
]
...
...
@@ -95,11 +95,13 @@ def reader_creator(tar_file, file_name, dict_size):
def
train
(
dict_size
):
return
reader_creator
(
paddle
.
v2
.
dataset
.
common
.
download
(
URL_TRAIN
,
'wmt14'
,
MD5_TRAIN
),
'train/train'
,
dict_size
)
download
(
URL_TRAIN
,
'wmt14'
,
MD5_TRAIN
),
'train/train'
,
dict_size
)
def
test
(
dict_size
):
return
reader_creator
(
paddle
.
v2
.
dataset
.
common
.
download
(
URL_TRAIN
,
'wmt14'
,
MD5_TRAIN
),
'test/test'
,
dict_size
)
download
(
URL_TRAIN
,
'wmt14'
,
MD5_TRAIN
),
'test/test'
,
dict_size
)
def
fetch
():
download
(
URL_TRAIN
,
'wmt14'
,
MD5_TRAIN
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录