Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
1b45847e
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
1b45847e
编写于
4月 23, 2020
作者:
L
lidanqing
提交者:
GitHub
4月 23, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add user local data preprocess support (#23692) (#24075)
上级
ccd2d06a
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
124 addition
and
15 deletion
+124
-15
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+20
-2
paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py
...uid/inference/tests/api/full_ILSVRC2012_val_preprocess.py
+85
-3
paddle/fluid/inference/tests/api/full_pascalvoc_test_preprocess.py
...uid/inference/tests/api/full_pascalvoc_test_preprocess.py
+19
-10
未找到文件。
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
1b45847e
...
...
@@ -93,6 +93,13 @@ function(inference_analysis_api_qat_test_run TARGET_NAME test_binary fp32_model_
--iterations=2
)
endfunction
()
function
(
preprocess_data2bin_test_run target py_script_source data_dir output_file
)
py_test
(
${
target
}
SRCS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
py_script_source
}
ARGS --data_dir=
${
data_dir
}
--output_file=
${
output_file
}
--local
)
endfunction
()
if
(
NOT APPLE AND WITH_MKLML
)
# RNN1
set
(
RNN1_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/rnn1"
)
...
...
@@ -286,8 +293,6 @@ if(WITH_MKLDNN)
# download dataset if necessary
download_int8_data
(
${
INT8_DATA_DIR
}
"pascalvoc_val_head_300.tar.gz"
)
# download small demo set of pascalvoc for testing local userdata preprocessing
download_int8_data
(
${
INT8_DATA_DIR
}
"pascalvoc_small.tar.gz"
)
# build test binary to be used in subsequent tests
inference_analysis_api_test_build
(
${
INT8_OBJ_DETECT_TEST_APP
}
${
INT8_OBJ_DETECT_TEST_APP_SRC
}
)
...
...
@@ -320,6 +325,19 @@ if(WITH_MKLDNN)
set
(
MKLDNN_QUANTIZER_CONFIG_TEST_APP_SRC
"mkldnn_quantizer_config_tester.cc"
)
inference_analysis_api_test_build
(
${
MKLDNN_QUANTIZER_CONFIG_TEST_APP
}
${
MKLDNN_QUANTIZER_CONFIG_TEST_APP_SRC
}
)
inference_analysis_test_run
(
test_mkldnn_quantizer_config COMMAND
${
MKLDNN_QUANTIZER_CONFIG_TEST_APP
}
)
# preprocess data2bin imagenet
download_int8_data
(
${
INT8_DATA_DIR
}
"imagenet_small.tar.gz"
)
set
(
IMAGENET_SMALL_DATA_DIR
"
${
INT8_DATA_DIR
}
/imagenet_small"
)
set
(
IMAGENET_SMALL_OUTPUT_FILE
"imagenet_small.bin"
)
preprocess_data2bin_test_run
(
preprocess_local_imagenet
"full_ILSVRC2012_val_preprocess.py"
${
IMAGENET_SMALL_DATA_DIR
}
${
IMAGENET_SMALL_OUTPUT_FILE
}
)
# preprocess data2bin pascalvoc
download_int8_data
(
${
INT8_DATA_DIR
}
"pascalvoc_small.tar.gz"
)
set
(
PASCALVOC_SMALL_DATA_DIR
"
${
INT8_DATA_DIR
}
/pascalvoc_small"
)
set
(
PASCALVOC_SMALL_OUTPUT_FILE
"pascalvoc_small.bin"
)
preprocess_data2bin_test_run
(
preprocess_local_pascalvoc
"full_pascalvoc_test_preprocess.py"
${
PASCALVOC_SMALL_DATA_DIR
}
${
PASCALVOC_SMALL_OUTPUT_FILE
}
)
endif
()
# bert, max_len=20, embedding_dim=128
...
...
paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py
浏览文件 @
1b45847e
...
...
@@ -24,6 +24,7 @@ import math
from
paddle.dataset.common
import
download
import
tarfile
import
StringIO
import
argparse
random
.
seed
(
0
)
np
.
random
.
seed
(
0
)
...
...
@@ -131,7 +132,7 @@ def check_integrity(filename, target_hash):
return
False
def
convert
(
tar_file
,
output_file
):
def
convert
_Imagenet_tar2bin
(
tar_file
,
output_file
):
print
(
'Converting 50000 images to binary file ...
\n
'
)
tar
=
tarfile
.
open
(
name
=
tar_file
,
mode
=
'r:gz'
)
...
...
@@ -205,9 +206,90 @@ def run_convert():
"Can not convert the dataset to binary file with try limit {0}"
.
format
(
try_limit
))
download_concat
(
cache_folder
,
zip_path
)
convert
(
zip_path
,
output_file
)
convert
_Imagenet_tar2bin
(
zip_path
,
output_file
)
print
(
"
\n
Success! The binary file can be found at {0}"
.
format
(
output_file
))
def
convert_Imagenet_local2bin
(
args
):
data_dir
=
args
.
data_dir
label_list_path
=
os
.
path
.
join
(
args
.
data_dir
,
args
.
label_list
)
bin_file_path
=
os
.
path
.
join
(
args
.
data_dir
,
args
.
output_file
)
assert
data_dir
,
'Once set --local, user need to provide the --data_dir'
with
open
(
label_list_path
)
as
flist
:
lines
=
[
line
.
strip
()
for
line
in
flist
]
num_images
=
len
(
lines
)
with
open
(
bin_file_path
,
"w+b"
)
as
of
:
of
.
seek
(
0
)
num
=
np
.
array
(
int
(
num_images
)).
astype
(
'int64'
)
of
.
write
(
num
.
tobytes
())
for
idx
,
line
in
enumerate
(
lines
):
img_path
,
label
=
line
.
split
()
img_path
=
os
.
path
.
join
(
data_dir
,
img_path
)
if
not
os
.
path
.
exists
(
img_path
):
continue
#save image(float32) to file
img
=
Image
.
open
(
img_path
)
img
=
process_image
(
img
)
np_img
=
np
.
array
(
img
)
of
.
seek
(
SIZE_INT64
+
SIZE_FLOAT32
*
DATA_DIM
*
DATA_DIM
*
3
*
idx
)
of
.
write
(
np_img
.
astype
(
'float32'
).
tobytes
())
#save label(int64_t) to file
label_int
=
(
int
)(
label
)
np_label
=
np
.
array
(
label_int
)
of
.
seek
(
SIZE_INT64
+
SIZE_FLOAT32
*
DATA_DIM
*
DATA_DIM
*
3
*
num_images
+
idx
*
SIZE_INT64
)
of
.
write
(
np_label
.
astype
(
'int64'
).
tobytes
())
# The bin file should contain
# number of images + all images data + all corresponding labels
# so the file target_size should be as follows
target_size
=
SIZE_INT64
+
num_images
*
3
*
args
.
data_dim
*
args
.
data_dim
*
SIZE_FLOAT32
+
num_images
*
SIZE_INT64
if
(
os
.
path
.
getsize
(
bin_file_path
)
==
target_size
):
print
(
"Success! The user data output binary file can be found at: {0}"
.
format
(
bin_file_path
))
else
:
print
(
"Conversion failed!"
)
def
main_preprocess_Imagenet
(
args
):
parser
=
argparse
.
ArgumentParser
(
description
=
"Convert the full Imagenet val set or local data to binary file."
,
usage
=
None
,
add_help
=
True
)
parser
.
add_argument
(
'--local'
,
action
=
"store_true"
,
help
=
"If used, user need to set --data_dir and then convert file"
)
parser
.
add_argument
(
"--data_dir"
,
default
=
""
,
type
=
str
,
help
=
"Dataset root directory"
)
parser
.
add_argument
(
"--label_list"
,
type
=
str
,
default
=
"val_list.txt"
,
help
=
"List of object labels with same sequence as denoted in the annotation file"
)
parser
.
add_argument
(
"--output_file"
,
type
=
str
,
default
=
"imagenet_small.bin"
,
help
=
"File path of the output binary file"
)
parser
.
add_argument
(
"--data_dim"
,
type
=
int
,
default
=
DATA_DIM
,
help
=
"Image preprocess with data_dim width and height"
)
args
=
parser
.
parse_args
()
if
args
.
local
:
convert_Imagenet_local2bin
(
args
)
else
:
run_convert
()
if
__name__
==
'__main__'
:
run_convert
(
)
main_preprocess_Imagenet
(
sys
.
argv
)
paddle/fluid/inference/tests/api/full_pascalvoc_test_preprocess.py
浏览文件 @
1b45847e
...
...
@@ -28,6 +28,8 @@ DATA_URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.t
DATA_DIR
=
os
.
path
.
expanduser
(
"~/.cache/paddle/dataset/pascalvoc/"
)
TAR_FILE
=
"VOCtest_06-Nov-2007.tar"
TAR_PATH
=
os
.
path
.
join
(
DATA_DIR
,
TAR_FILE
)
SIZE_FLOAT32
=
4
SIZE_INT64
=
8
RESIZE_H
=
300
RESIZE_W
=
300
MEAN_VALUE
=
[
127.5
,
127.5
,
127.5
]
...
...
@@ -60,6 +62,7 @@ def preprocess(img):
def
convert_pascalvoc_local2bin
(
args
):
data_dir
=
os
.
path
.
expanduser
(
args
.
data_dir
)
label_fpath
=
os
.
path
.
join
(
data_dir
,
args
.
label_file
)
assert
data_dir
,
'Once set --local, user need to provide the --data_dir'
flabel
=
open
(
label_fpath
)
label_list
=
[
line
.
strip
()
for
line
in
flabel
]
...
...
@@ -128,10 +131,14 @@ def convert_pascalvoc_local2bin(args):
f1
.
close
()
object_nums_sum
=
sum
(
object_nums
)
target_size
=
8
+
image_nums
*
3
*
args
.
resize_h
*
args
.
resize_h
*
4
+
image_nums
*
8
+
object_nums_sum
*
(
8
+
4
*
4
+
8
)
# The data should be contains
# number of images + all images data + an array that represent object numbers of each image
# + labels of all objects in images + bboxes of all objects + difficulties of all objects
# so the target size should be as follows:
target_size
=
SIZE_INT64
+
image_nums
*
3
*
args
.
resize_h
*
args
.
resize_h
*
SIZE_FLOAT32
+
image_nums
*
SIZE_INT64
+
object_nums_sum
*
(
SIZE_INT64
+
4
*
SIZE_FLOAT32
+
SIZE_INT64
)
if
(
os
.
path
.
getsize
(
output_file_path
)
==
target_size
):
print
(
"Success!
\n
The output binary file can be found at: "
,
print
(
"Success!
\n
The
local data
output binary file can be found at: "
,
output_file_path
)
else
:
print
(
"Conversion failed!"
)
...
...
@@ -223,6 +230,9 @@ def convert_pascalvoc_tar2bin(tar_path, data_out_path):
if
line_idx
%
per_percentage
:
print_processbar
(
line_idx
/
per_percentage
)
# The data should be stored in binary in following sequence:
# number of images->all images data->an array that represent object numbers in each image
# ->labels of all objects in images->bboxes of all objects->difficulties of all objects
f1
.
write
(
np
.
array
(
object_nums
).
astype
(
'uint64'
).
tobytes
())
f1
.
write
(
np
.
array
(
lbls
).
astype
(
'int64'
).
tobytes
())
f1
.
write
(
np
.
array
(
boxes
).
astype
(
'float32'
).
tobytes
())
...
...
@@ -269,12 +279,11 @@ def main_pascalvoc_preprocess(args):
usage
=
None
,
add_help
=
True
)
parser
.
add_argument
(
'--choice'
,
choices
=
[
'local'
,
'VOC_test_2007'
],
required
=
True
)
'--local'
,
action
=
"store_true"
,
help
=
"If used, user need to set --data_dir and then convert file"
)
parser
.
add_argument
(
"--data_dir"
,
default
=
"./third_party/inference_demo/int8v2/pascalvoc_small"
,
type
=
str
,
help
=
"Dataset root directory"
)
"--data_dir"
,
default
=
""
,
type
=
str
,
help
=
"Dataset root directory"
)
parser
.
add_argument
(
"--img_annotation_list"
,
type
=
str
,
...
...
@@ -313,9 +322,9 @@ def main_pascalvoc_preprocess(args):
default
=
AP_VERSION
,
help
=
"Image preprocess with ap_version"
)
args
=
parser
.
parse_args
()
if
args
.
choice
==
'local'
:
if
args
.
local
:
convert_pascalvoc_local2bin
(
args
)
el
if
args
.
choice
==
'VOC_test_2007'
:
el
se
:
run_convert
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录