Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
0d656996
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0d656996
编写于
3月 28, 2019
作者:
L
lidanqing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix some bugs of unzip and reading val list
test=develop
上级
b46e467a
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
46 addition
and
37 deletion
+46
-37
paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py
...uid/inference/tests/api/full_ILSVRC2012_val_preprocess.py
+46
-37
未找到文件。
paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py
浏览文件 @
0d656996
...
@@ -71,10 +71,14 @@ def process_image(img_path, mode, color_jitter, rotate):
...
@@ -71,10 +71,14 @@ def process_image(img_path, mode, color_jitter, rotate):
def
download_unzip
():
def
download_unzip
():
int8_download
=
'int8/download'
t
mp_folder
=
'int8/download
'
t
arget_name
=
'data
'
cache_folder
=
os
.
path
.
expanduser
(
'~/.cache/'
+
tmp_folder
)
cache_folder
=
os
.
path
.
expanduser
(
'~/.cache/paddle/dataset/'
+
int8_download
)
target_folder
=
os
.
path
.
join
(
cache_folder
,
target_name
)
data_urls
=
[]
data_urls
=
[]
data_md5s
=
[]
data_md5s
=
[]
...
@@ -89,8 +93,9 @@ def download_unzip():
...
@@ -89,8 +93,9 @@ def download_unzip():
data_md5s
.
append
(
'1e9f15f64e015e58d6f9ec3210ed18b5'
)
data_md5s
.
append
(
'1e9f15f64e015e58d6f9ec3210ed18b5'
)
file_names
=
[]
file_names
=
[]
for
i
in
range
(
0
,
len
(
data_urls
)):
for
i
in
range
(
0
,
len
(
data_urls
)):
download
(
data_urls
[
i
],
tmp
_folder
,
data_md5s
[
i
])
download
(
data_urls
[
i
],
cache
_folder
,
data_md5s
[
i
])
file_names
.
append
(
data_urls
[
i
].
split
(
'/'
)[
-
1
])
file_names
.
append
(
data_urls
[
i
].
split
(
'/'
)[
-
1
])
zip_path
=
os
.
path
.
join
(
cache_folder
,
'full_imagenet_val.tar.gz'
)
zip_path
=
os
.
path
.
join
(
cache_folder
,
'full_imagenet_val.tar.gz'
)
...
@@ -101,16 +106,15 @@ def download_unzip():
...
@@ -101,16 +106,15 @@ def download_unzip():
cat_command
+=
' '
+
os
.
path
.
join
(
cache_folder
,
file_name
)
cat_command
+=
' '
+
os
.
path
.
join
(
cache_folder
,
file_name
)
cat_command
+=
' > '
+
zip_path
cat_command
+=
' > '
+
zip_path
os
.
system
(
cat_command
)
os
.
system
(
cat_command
)
print
(
'Data is downloaded at {0}
\n
'
).
format
(
zip_path
)
if
not
os
.
path
.
exists
(
cache_folder
):
if
not
os
.
path
.
exists
(
target_folder
):
cmd
=
'mkdir {0} && tar xf {1} -C {0}'
.
format
(
cache_folder
,
zip_path
)
cmd
=
'mkdir {0} && tar xf {1} -C {0}'
.
format
(
target_folder
,
zip_path
)
os
.
system
(
cmd
)
cmd
=
'rm -rf {3} && ln -s {1} {0}'
.
format
(
"data"
,
cache_folder
,
zip_path
)
print
(
'Data is unzipped at {0}
\n
'
.
format
(
target_folder
))
os
.
system
(
cmd
)
data_dir
=
os
.
path
.
expanduser
(
cache_folder
+
'data'
)
data_dir
=
os
.
path
.
join
(
target_folder
,
'ILSVRC2012'
)
print
(
'ILSVRC2012 full val set at {0}
\n
'
.
format
(
data_dir
))
return
data_dir
return
data_dir
...
@@ -121,32 +125,37 @@ def reader():
...
@@ -121,32 +125,37 @@ def reader():
with
open
(
file_list
)
as
flist
:
with
open
(
file_list
)
as
flist
:
lines
=
[
line
.
strip
()
for
line
in
flist
]
lines
=
[
line
.
strip
()
for
line
in
flist
]
num_images
=
len
(
lines
)
num_images
=
len
(
lines
)
if
not
os
.
path
.
exists
(
output_file
):
with
open
(
output_file
,
"w+b"
)
as
of
:
print
(
#save num_images(int64_t) to file
'Preprocessing to binary file...<num_images><all images><all labels>...
\n
'
of
.
seek
(
0
)
)
num
=
np
.
array
(
int
(
num_images
)).
astype
(
'int64'
)
with
open
(
output_file
,
"w+b"
)
as
of
:
of
.
write
(
num
.
tobytes
())
#save num_images(int64_t) to file
for
idx
,
line
in
enumerate
(
lines
):
of
.
seek
(
0
)
img_path
,
label
=
line
.
split
()
num
=
np
.
array
(
int
(
num_images
)).
astype
(
'int64'
)
img_path
=
os
.
path
.
join
(
data_dir
,
img_path
)
of
.
write
(
num
.
tobytes
())
if
not
os
.
path
.
exists
(
img_path
):
for
idx
,
line
in
enumerate
(
lines
):
continue
img_path
,
label
=
line
.
split
()
img_path
=
os
.
path
.
join
(
data_dir
,
img_path
)
#save image(float32) to file
if
not
os
.
path
.
exists
(
img_path
):
img
=
process_image
(
continue
img_path
,
'val'
,
color_jitter
=
False
,
rotate
=
False
)
np_img
=
np
.
array
(
img
)
#save image(float32) to file
of
.
seek
(
SIZE_INT64
+
SIZE_FLOAT32
*
DATA_DIM
*
DATA_DIM
*
3
*
img
=
process_image
(
idx
)
img_path
,
'val'
,
color_jitter
=
False
,
rotate
=
False
)
of
.
write
(
np_img
.
astype
(
'float32'
).
tobytes
())
np_img
=
np
.
array
(
img
)
of
.
seek
(
SIZE_INT64
+
SIZE_FLOAT32
*
DATA_DIM
*
DATA_DIM
*
3
#save label(int64_t) to file
*
idx
)
label_int
=
(
int
)(
label
)
of
.
write
(
np_img
.
astype
(
'float32'
).
tobytes
())
np_label
=
np
.
array
(
label_int
)
of
.
seek
(
SIZE_INT64
+
SIZE_FLOAT32
*
DATA_DIM
*
DATA_DIM
*
3
*
#save label(int64_t) to file
num_images
+
idx
*
SIZE_INT64
)
label_int
=
(
int
)(
label
)
of
.
write
(
np_label
.
astype
(
'int64'
).
tobytes
())
np_label
=
np
.
array
(
label_int
)
of
.
seek
(
SIZE_INT64
+
SIZE_FLOAT32
*
DATA_DIM
*
DATA_DIM
*
3
*
num_images
+
idx
*
SIZE_INT64
)
of
.
write
(
np_label
.
astype
(
'int64'
).
tobytes
())
print
(
'The preprocessed binary file path {}
\n
'
.
format
(
output_file
))
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录