Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
1b45847e
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
1b45847e
编写于
4月 23, 2020
作者:
L
lidanqing
提交者:
GitHub
4月 23, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add user local data preprocess support (#23692) (#24075)
上级
ccd2d06a
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
124 addition
and
15 deletion
+124
-15
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+20
-2
paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py
...uid/inference/tests/api/full_ILSVRC2012_val_preprocess.py
+85
-3
paddle/fluid/inference/tests/api/full_pascalvoc_test_preprocess.py
...uid/inference/tests/api/full_pascalvoc_test_preprocess.py
+19
-10
未找到文件。
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
1b45847e
...
...
@@ -93,6 +93,13 @@ function(inference_analysis_api_qat_test_run TARGET_NAME test_binary fp32_model_
--iterations=2
)
endfunction
()
function
(
preprocess_data2bin_test_run target py_script_source data_dir output_file
)
py_test
(
${
target
}
SRCS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
py_script_source
}
ARGS --data_dir=
${
data_dir
}
--output_file=
${
output_file
}
--local
)
endfunction
()
if
(
NOT APPLE AND WITH_MKLML
)
# RNN1
set
(
RNN1_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/rnn1"
)
...
...
@@ -286,8 +293,6 @@ if(WITH_MKLDNN)
# download dataset if necessary
download_int8_data
(
${
INT8_DATA_DIR
}
"pascalvoc_val_head_300.tar.gz"
)
# download small demo set of pascalvoc for testing local userdata preprocessing
download_int8_data
(
${
INT8_DATA_DIR
}
"pascalvoc_small.tar.gz"
)
# build test binary to be used in subsequent tests
inference_analysis_api_test_build
(
${
INT8_OBJ_DETECT_TEST_APP
}
${
INT8_OBJ_DETECT_TEST_APP_SRC
}
)
...
...
@@ -320,6 +325,19 @@ if(WITH_MKLDNN)
set
(
MKLDNN_QUANTIZER_CONFIG_TEST_APP_SRC
"mkldnn_quantizer_config_tester.cc"
)
inference_analysis_api_test_build
(
${
MKLDNN_QUANTIZER_CONFIG_TEST_APP
}
${
MKLDNN_QUANTIZER_CONFIG_TEST_APP_SRC
}
)
inference_analysis_test_run
(
test_mkldnn_quantizer_config COMMAND
${
MKLDNN_QUANTIZER_CONFIG_TEST_APP
}
)
# preprocess data2bin imagenet
download_int8_data
(
${
INT8_DATA_DIR
}
"imagenet_small.tar.gz"
)
set
(
IMAGENET_SMALL_DATA_DIR
"
${
INT8_DATA_DIR
}
/imagenet_small"
)
set
(
IMAGENET_SMALL_OUTPUT_FILE
"imagenet_small.bin"
)
preprocess_data2bin_test_run
(
preprocess_local_imagenet
"full_ILSVRC2012_val_preprocess.py"
${
IMAGENET_SMALL_DATA_DIR
}
${
IMAGENET_SMALL_OUTPUT_FILE
}
)
# preprocess data2bin pascalvoc
download_int8_data
(
${
INT8_DATA_DIR
}
"pascalvoc_small.tar.gz"
)
set
(
PASCALVOC_SMALL_DATA_DIR
"
${
INT8_DATA_DIR
}
/pascalvoc_small"
)
set
(
PASCALVOC_SMALL_OUTPUT_FILE
"pascalvoc_small.bin"
)
preprocess_data2bin_test_run
(
preprocess_local_pascalvoc
"full_pascalvoc_test_preprocess.py"
${
PASCALVOC_SMALL_DATA_DIR
}
${
PASCALVOC_SMALL_OUTPUT_FILE
}
)
endif
()
# bert, max_len=20, embedding_dim=128
...
...
paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py
浏览文件 @
1b45847e
...
...
@@ -24,6 +24,7 @@ import math
from
paddle.dataset.common
import
download
import
tarfile
import
StringIO
import
argparse
random
.
seed
(
0
)
np
.
random
.
seed
(
0
)
...
...
@@ -131,7 +132,7 @@ def check_integrity(filename, target_hash):
return
False
def
convert
(
tar_file
,
output_file
):
def
convert
_Imagenet_tar2bin
(
tar_file
,
output_file
):
print
(
'Converting 50000 images to binary file ...
\n
'
)
tar
=
tarfile
.
open
(
name
=
tar_file
,
mode
=
'r:gz'
)
...
...
@@ -205,9 +206,90 @@ def run_convert():
"Can not convert the dataset to binary file with try limit {0}"
.
format
(
try_limit
))
download_concat
(
cache_folder
,
zip_path
)
convert
(
zip_path
,
output_file
)
convert
_Imagenet_tar2bin
(
zip_path
,
output_file
)
print
(
"
\n
Success! The binary file can be found at {0}"
.
format
(
output_file
))
def
convert_Imagenet_local2bin
(
args
):
data_dir
=
args
.
data_dir
label_list_path
=
os
.
path
.
join
(
args
.
data_dir
,
args
.
label_list
)
bin_file_path
=
os
.
path
.
join
(
args
.
data_dir
,
args
.
output_file
)
assert
data_dir
,
'Once set --local, user need to provide the --data_dir'
with
open
(
label_list_path
)
as
flist
:
lines
=
[
line
.
strip
()
for
line
in
flist
]
num_images
=
len
(
lines
)
with
open
(
bin_file_path
,
"w+b"
)
as
of
:
of
.
seek
(
0
)
num
=
np
.
array
(
int
(
num_images
)).
astype
(
'int64'
)
of
.
write
(
num
.
tobytes
())
for
idx
,
line
in
enumerate
(
lines
):
img_path
,
label
=
line
.
split
()
img_path
=
os
.
path
.
join
(
data_dir
,
img_path
)
if
not
os
.
path
.
exists
(
img_path
):
continue
#save image(float32) to file
img
=
Image
.
open
(
img_path
)
img
=
process_image
(
img
)
np_img
=
np
.
array
(
img
)
of
.
seek
(
SIZE_INT64
+
SIZE_FLOAT32
*
DATA_DIM
*
DATA_DIM
*
3
*
idx
)
of
.
write
(
np_img
.
astype
(
'float32'
).
tobytes
())
#save label(int64_t) to file
label_int
=
(
int
)(
label
)
np_label
=
np
.
array
(
label_int
)
of
.
seek
(
SIZE_INT64
+
SIZE_FLOAT32
*
DATA_DIM
*
DATA_DIM
*
3
*
num_images
+
idx
*
SIZE_INT64
)
of
.
write
(
np_label
.
astype
(
'int64'
).
tobytes
())
# The bin file should contain
# number of images + all images data + all corresponding labels
# so the file target_size should be as follows
target_size
=
SIZE_INT64
+
num_images
*
3
*
args
.
data_dim
*
args
.
data_dim
*
SIZE_FLOAT32
+
num_images
*
SIZE_INT64
if
(
os
.
path
.
getsize
(
bin_file_path
)
==
target_size
):
print
(
"Success! The user data output binary file can be found at: {0}"
.
format
(
bin_file_path
))
else
:
print
(
"Conversion failed!"
)
def
main_preprocess_Imagenet
(
args
):
parser
=
argparse
.
ArgumentParser
(
description
=
"Convert the full Imagenet val set or local data to binary file."
,
usage
=
None
,
add_help
=
True
)
parser
.
add_argument
(
'--local'
,
action
=
"store_true"
,
help
=
"If used, user need to set --data_dir and then convert file"
)
parser
.
add_argument
(
"--data_dir"
,
default
=
""
,
type
=
str
,
help
=
"Dataset root directory"
)
parser
.
add_argument
(
"--label_list"
,
type
=
str
,
default
=
"val_list.txt"
,
help
=
"List of object labels with same sequence as denoted in the annotation file"
)
parser
.
add_argument
(
"--output_file"
,
type
=
str
,
default
=
"imagenet_small.bin"
,
help
=
"File path of the output binary file"
)
parser
.
add_argument
(
"--data_dim"
,
type
=
int
,
default
=
DATA_DIM
,
help
=
"Image preprocess with data_dim width and height"
)
args
=
parser
.
parse_args
()
if
args
.
local
:
convert_Imagenet_local2bin
(
args
)
else
:
run_convert
()
if
__name__
==
'__main__'
:
run_convert
(
)
main_preprocess_Imagenet
(
sys
.
argv
)
paddle/fluid/inference/tests/api/full_pascalvoc_test_preprocess.py
浏览文件 @
1b45847e
...
...
@@ -28,6 +28,8 @@ DATA_URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.t
DATA_DIR
=
os
.
path
.
expanduser
(
"~/.cache/paddle/dataset/pascalvoc/"
)
TAR_FILE
=
"VOCtest_06-Nov-2007.tar"
TAR_PATH
=
os
.
path
.
join
(
DATA_DIR
,
TAR_FILE
)
SIZE_FLOAT32
=
4
SIZE_INT64
=
8
RESIZE_H
=
300
RESIZE_W
=
300
MEAN_VALUE
=
[
127.5
,
127.5
,
127.5
]
...
...
@@ -60,6 +62,7 @@ def preprocess(img):
def
convert_pascalvoc_local2bin
(
args
):
data_dir
=
os
.
path
.
expanduser
(
args
.
data_dir
)
label_fpath
=
os
.
path
.
join
(
data_dir
,
args
.
label_file
)
assert
data_dir
,
'Once set --local, user need to provide the --data_dir'
flabel
=
open
(
label_fpath
)
label_list
=
[
line
.
strip
()
for
line
in
flabel
]
...
...
@@ -128,10 +131,14 @@ def convert_pascalvoc_local2bin(args):
f1
.
close
()
object_nums_sum
=
sum
(
object_nums
)
target_size
=
8
+
image_nums
*
3
*
args
.
resize_h
*
args
.
resize_h
*
4
+
image_nums
*
8
+
object_nums_sum
*
(
8
+
4
*
4
+
8
)
# The data should be contains
# number of images + all images data + an array that represent object numbers of each image
# + labels of all objects in images + bboxes of all objects + difficulties of all objects
# so the target size should be as follows:
target_size
=
SIZE_INT64
+
image_nums
*
3
*
args
.
resize_h
*
args
.
resize_h
*
SIZE_FLOAT32
+
image_nums
*
SIZE_INT64
+
object_nums_sum
*
(
SIZE_INT64
+
4
*
SIZE_FLOAT32
+
SIZE_INT64
)
if
(
os
.
path
.
getsize
(
output_file_path
)
==
target_size
):
print
(
"Success!
\n
The output binary file can be found at: "
,
print
(
"Success!
\n
The
local data
output binary file can be found at: "
,
output_file_path
)
else
:
print
(
"Conversion failed!"
)
...
...
@@ -223,6 +230,9 @@ def convert_pascalvoc_tar2bin(tar_path, data_out_path):
if
line_idx
%
per_percentage
:
print_processbar
(
line_idx
/
per_percentage
)
# The data should be stored in binary in following sequence:
# number of images->all images data->an array that represent object numbers in each image
# ->labels of all objects in images->bboxes of all objects->difficulties of all objects
f1
.
write
(
np
.
array
(
object_nums
).
astype
(
'uint64'
).
tobytes
())
f1
.
write
(
np
.
array
(
lbls
).
astype
(
'int64'
).
tobytes
())
f1
.
write
(
np
.
array
(
boxes
).
astype
(
'float32'
).
tobytes
())
...
...
@@ -269,12 +279,11 @@ def main_pascalvoc_preprocess(args):
usage
=
None
,
add_help
=
True
)
parser
.
add_argument
(
'--choice'
,
choices
=
[
'local'
,
'VOC_test_2007'
],
required
=
True
)
'--local'
,
action
=
"store_true"
,
help
=
"If used, user need to set --data_dir and then convert file"
)
parser
.
add_argument
(
"--data_dir"
,
default
=
"./third_party/inference_demo/int8v2/pascalvoc_small"
,
type
=
str
,
help
=
"Dataset root directory"
)
"--data_dir"
,
default
=
""
,
type
=
str
,
help
=
"Dataset root directory"
)
parser
.
add_argument
(
"--img_annotation_list"
,
type
=
str
,
...
...
@@ -313,9 +322,9 @@ def main_pascalvoc_preprocess(args):
default
=
AP_VERSION
,
help
=
"Image preprocess with ap_version"
)
args
=
parser
.
parse_args
()
if
args
.
choice
==
'local'
:
if
args
.
local
:
convert_pascalvoc_local2bin
(
args
)
el
if
args
.
choice
==
'VOC_test_2007'
:
el
se
:
run_convert
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录