Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
283e82f4
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
283e82f4
编写于
2月 27, 2017
作者:
W
wangkuiyi
提交者:
GitHub
2月 27, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1450 from reyoung/feature/cifar_dataset
Feature/cifar dataset
上级
2f604064
0bcc4d48
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
109 addition
and
0 deletion
+109
-0
python/paddle/v2/dataset/cifar.py
python/paddle/v2/dataset/cifar.py
+109
-0
未找到文件。
python/paddle/v2/dataset/cifar.py
0 → 100644
浏览文件 @
283e82f4
"""
CIFAR Dataset.
URL: https://www.cs.toronto.edu/~kriz/cifar.html
the default train_creator, test_creator used for CIFAR-10 dataset.
"""
from
config
import
DATA_HOME
import
os
import
hashlib
import
urllib2
import
shutil
import
tarfile
import
cPickle
import
itertools
import
numpy
__all__
=
[
'cifar_100_train_creator'
,
'cifar_100_test_creator'
,
'train_creator'
,
'test_creator'
]
CIFAR10_URL
=
'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
CIFAR10_MD5
=
'c58f30108f718f92721af3b95e74349a'
CIFAR100_URL
=
'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
CIFAR100_MD5
=
'eb9058c3a382ffc7106e4002c42a8d85'
def
__read_batch__
(
filename
,
sub_name
):
def
reader
():
def
__read_one_batch_impl__
(
batch
):
data
=
batch
[
'data'
]
labels
=
batch
.
get
(
'labels'
,
batch
.
get
(
'fine_labels'
,
None
))
assert
labels
is
not
None
for
sample
,
label
in
itertools
.
izip
(
data
,
labels
):
yield
(
sample
/
255.0
).
astype
(
numpy
.
float32
),
int
(
label
)
with
tarfile
.
open
(
filename
,
mode
=
'r'
)
as
f
:
names
=
(
each_item
.
name
for
each_item
in
f
if
sub_name
in
each_item
.
name
)
for
name
in
names
:
batch
=
cPickle
.
load
(
f
.
extractfile
(
name
))
for
item
in
__read_one_batch_impl__
(
batch
):
yield
item
return
reader
def
download
(
url
,
md5
):
filename
=
os
.
path
.
split
(
url
)[
-
1
]
assert
DATA_HOME
is
not
None
filepath
=
os
.
path
.
join
(
DATA_HOME
,
md5
)
if
not
os
.
path
.
exists
(
filepath
):
os
.
makedirs
(
filepath
)
__full_file__
=
os
.
path
.
join
(
filepath
,
filename
)
def
__file_ok__
():
if
not
os
.
path
.
exists
(
__full_file__
):
return
False
md5_hash
=
hashlib
.
md5
()
with
open
(
__full_file__
,
'rb'
)
as
f
:
for
chunk
in
iter
(
lambda
:
f
.
read
(
4096
),
b
""
):
md5_hash
.
update
(
chunk
)
return
md5_hash
.
hexdigest
()
==
md5
while
not
__file_ok__
():
response
=
urllib2
.
urlopen
(
url
)
with
open
(
__full_file__
,
mode
=
'wb'
)
as
of
:
shutil
.
copyfileobj
(
fsrc
=
response
,
fdst
=
of
)
return
__full_file__
def
cifar_100_train_creator
():
fn
=
download
(
url
=
CIFAR100_URL
,
md5
=
CIFAR100_MD5
)
return
__read_batch__
(
fn
,
'train'
)
def
cifar_100_test_creator
():
fn
=
download
(
url
=
CIFAR100_URL
,
md5
=
CIFAR100_MD5
)
return
__read_batch__
(
fn
,
'test'
)
def
train_creator
():
"""
Default train reader creator. Use CIFAR-10 dataset.
"""
fn
=
download
(
url
=
CIFAR10_URL
,
md5
=
CIFAR10_MD5
)
return
__read_batch__
(
fn
,
'data_batch'
)
def
test_creator
():
"""
Default test reader creator. Use CIFAR-10 dataset.
"""
fn
=
download
(
url
=
CIFAR10_URL
,
md5
=
CIFAR10_MD5
)
return
__read_batch__
(
fn
,
'test_batch'
)
def
unittest
():
for
_
in
train_creator
()():
pass
for
_
in
test_creator
()():
pass
if
__name__
==
'__main__'
:
unittest
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录