提交 9240e532 编写于 作者: L lidanqing 提交者: Tao Luo

add local user data conversion into full_pascalvoc_test_preprocess.py (#19283)

* add local user data conversion into full_pascalvoc_test_preprocess.py
test=develop

* change PADDLE_ENFORCE to PADDLE_ENFORCE_GE
test=develop

* change according to reviews
test=develop
上级 2e3ee579
...@@ -317,7 +317,7 @@ static void PrintTime(int batch_size, int repeat, int num_threads, int tid, ...@@ -317,7 +317,7 @@ static void PrintTime(int batch_size, int repeat, int num_threads, int tid,
double batch_latency, int epoch = 1, double batch_latency, int epoch = 1,
const framework::proto::VarType::Type data_type = const framework::proto::VarType::Type data_type =
framework::proto::VarType::FP32) { framework::proto::VarType::FP32) {
PADDLE_ENFORCE(batch_size > 0, "Non-positive batch size."); PADDLE_ENFORCE_GT(batch_size, 0, "Non-positive batch size.");
double sample_latency = batch_latency / batch_size; double sample_latency = batch_latency / batch_size;
LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid
<< " ======"; << " ======";
......
...@@ -250,6 +250,9 @@ if(WITH_MKLDNN) ...@@ -250,6 +250,9 @@ if(WITH_MKLDNN)
# download dataset if necessary # download dataset if necessary
download_int8_data(${INT8_DATA_DIR} "pascalvoc_val_head_300.tar.gz") download_int8_data(${INT8_DATA_DIR} "pascalvoc_val_head_300.tar.gz")
# download small demo set of pascalvoc for testing local userdata preprocessing
download_int8_data(${INT8_DATA_DIR} "pascalvoc_small.tar.gz")
# build test binary to be used in subsequent tests # build test binary to be used in subsequent tests
inference_analysis_api_int8_test_build(${INT8_OBJ_DETECT_TEST_APP} ${INT8_OBJ_DETECT_TEST_APP_SRC}) inference_analysis_api_int8_test_build(${INT8_OBJ_DETECT_TEST_APP} ${INT8_OBJ_DETECT_TEST_APP_SRC})
......
...@@ -144,8 +144,8 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData( ...@@ -144,8 +144,8 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
int32_t num_images = FLAGS_warmup_batch_size) { int32_t num_images = FLAGS_warmup_batch_size) {
int test_data_batch_size = test_data[0][0].shape[0]; int test_data_batch_size = test_data[0][0].shape[0];
auto iterations = test_data.size(); auto iterations = test_data.size();
PADDLE_ENFORCE( PADDLE_ENFORCE_LE(
static_cast<size_t>(num_images) <= iterations * test_data_batch_size, static_cast<size_t>(num_images), iterations * test_data_batch_size,
"The requested quantization warmup data size " + "The requested quantization warmup data size " +
std::to_string(num_images) + " is bigger than all test data size."); std::to_string(num_images) + " is bigger than all test data size.");
...@@ -235,8 +235,8 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData( ...@@ -235,8 +235,8 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
static_cast<int64_t *>(difficult.data.data()) + objects_accum); static_cast<int64_t *>(difficult.data.data()) + objects_accum);
objects_accum = objects_accum + objects_remain; objects_accum = objects_accum + objects_remain;
} }
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
static_cast<size_t>(num_objects) == static_cast<size_t>(objects_accum), static_cast<size_t>(num_objects), static_cast<size_t>(objects_accum),
"The requested num of objects " + std::to_string(num_objects) + "The requested num of objects " + std::to_string(num_objects) +
" is the same as objects_accum."); " is the same as objects_accum.");
...@@ -274,7 +274,7 @@ TEST(Analyzer_int8_mobilenet_ssd, quantization) { ...@@ -274,7 +274,7 @@ TEST(Analyzer_int8_mobilenet_ssd, quantization) {
q_cfg.mkldnn_quantizer_config()->SetWarmupData(warmup_data); q_cfg.mkldnn_quantizer_config()->SetWarmupData(warmup_data);
q_cfg.mkldnn_quantizer_config()->SetWarmupBatchSize(FLAGS_warmup_batch_size); q_cfg.mkldnn_quantizer_config()->SetWarmupBatchSize(FLAGS_warmup_batch_size);
// 0 is avg_cose, 1 is top1_acc, 2 is top5_acc or mAP // 0 is avg_cost, 1 is top1_acc, 2 is top5_acc or mAP
CompareQuantizedAndAnalysis(&cfg, &q_cfg, input_slots_all, 2); CompareQuantizedAndAnalysis(&cfg, &q_cfg, input_slots_all, 2);
} }
......
...@@ -11,7 +11,8 @@ ...@@ -11,7 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import xml.etree.ElementTree as ET
import xml.etree.ElementTree
from PIL import Image from PIL import Image
import numpy as np import numpy as np
import os import os
...@@ -21,6 +22,7 @@ import tarfile ...@@ -21,6 +22,7 @@ import tarfile
import StringIO import StringIO
import hashlib import hashlib
import tarfile import tarfile
import argparse
DATA_URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar" DATA_URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar"
DATA_DIR = os.path.expanduser("~/.cache/paddle/dataset/pascalvoc/") DATA_DIR = os.path.expanduser("~/.cache/paddle/dataset/pascalvoc/")
...@@ -28,8 +30,8 @@ TAR_FILE = "VOCtest_06-Nov-2007.tar" ...@@ -28,8 +30,8 @@ TAR_FILE = "VOCtest_06-Nov-2007.tar"
TAR_PATH = os.path.join(DATA_DIR, TAR_FILE) TAR_PATH = os.path.join(DATA_DIR, TAR_FILE)
RESIZE_H = 300 RESIZE_H = 300
RESIZE_W = 300 RESIZE_W = 300
mean_value = [127.5, 127.5, 127.5] MEAN_VALUE = [127.5, 127.5, 127.5]
ap_version = '11point' AP_VERSION = '11point'
DATA_OUT = 'pascalvoc_full.bin' DATA_OUT = 'pascalvoc_full.bin'
DATA_OUT_PATH = os.path.join(DATA_DIR, DATA_OUT) DATA_OUT_PATH = os.path.join(DATA_DIR, DATA_OUT)
BIN_TARGETHASH = "f6546cadc42f5ff13178b84ed29b740b" BIN_TARGETHASH = "f6546cadc42f5ff13178b84ed29b740b"
...@@ -40,10 +42,8 @@ BIN_FULLSIZE = 5348678856 ...@@ -40,10 +42,8 @@ BIN_FULLSIZE = 5348678856
def preprocess(img): def preprocess(img):
img_width, img_height = img.size img_width, img_height = img.size
img = img.resize((RESIZE_W, RESIZE_H), Image.ANTIALIAS) img = img.resize((RESIZE_W, RESIZE_H), Image.ANTIALIAS)
img = np.array(img) img = np.array(img)
# HWC to CHW # HWC to CHW
if len(img.shape) == 3: if len(img.shape) == 3:
img = np.swapaxes(img, 1, 2) img = np.swapaxes(img, 1, 2)
...@@ -51,12 +51,92 @@ def preprocess(img): ...@@ -51,12 +51,92 @@ def preprocess(img):
# RBG to BGR # RBG to BGR
img = img[[2, 1, 0], :, :] img = img[[2, 1, 0], :, :]
img = img.astype('float32') img = img.astype('float32')
img_mean = np.array(mean_value)[:, np.newaxis, np.newaxis].astype('float32') img_mean = np.array(MEAN_VALUE)[:, np.newaxis, np.newaxis].astype('float32')
img -= img_mean img -= img_mean
img = img * 0.007843 img = img * 0.007843
return img return img
def convert_pascalvoc_local2bin(args):
data_dir = os.path.expanduser(args.data_dir)
label_fpath = os.path.join(data_dir, args.label_file)
flabel = open(label_fpath)
label_list = [line.strip() for line in flabel]
img_annotation_list_path = os.path.join(data_dir, args.img_annotation_list)
flist = open(img_annotation_list_path)
lines = [line.strip() for line in flist]
output_file_path = os.path.join(data_dir, args.output_file)
f1 = open(output_file_path, "w+b")
f1.seek(0)
image_nums = len(lines)
f1.write(np.array(image_nums).astype('int64').tobytes())
boxes = []
lbls = []
difficults = []
object_nums = []
for line in lines:
image_path, label_path = line.split()
image_path = os.path.join(data_dir, image_path)
label_path = os.path.join(data_dir, label_path)
im = Image.open(image_path)
if im.mode == 'L':
im = im.convert('RGB')
im_width, im_height = im.size
im = preprocess(im)
np_im = np.array(im)
f1.write(np_im.astype('float32').tobytes())
# layout: label | xmin | ymin | xmax | ymax | difficult
bbox_labels = []
root = xml.etree.ElementTree.parse(label_path).getroot()
objects = root.findall('object')
objects_size = len(objects)
object_nums.append(objects_size)
for object in objects:
bbox_sample = []
# start from 1
bbox_sample.append(
float(label_list.index(object.find('name').text)))
bbox = object.find('bndbox')
difficult = float(object.find('difficult').text)
bbox_sample.append(float(bbox.find('xmin').text) / im_width)
bbox_sample.append(float(bbox.find('ymin').text) / im_height)
bbox_sample.append(float(bbox.find('xmax').text) / im_width)
bbox_sample.append(float(bbox.find('ymax').text) / im_height)
bbox_sample.append(difficult)
bbox_labels.append(bbox_sample)
bbox_labels = np.array(bbox_labels)
if len(bbox_labels) == 0: continue
lbls.extend(bbox_labels[:, 0])
boxes.extend(bbox_labels[:, 1:5])
difficults.extend(bbox_labels[:, -1])
f1.write(np.array(object_nums).astype('uint64').tobytes())
f1.write(np.array(lbls).astype('int64').tobytes())
f1.write(np.array(boxes).astype('float32').tobytes())
f1.write(np.array(difficults).astype('int64').tobytes())
f1.close()
object_nums_sum = sum(object_nums)
target_size = 8 + image_nums * 3 * args.resize_h * args.resize_h * 4 + image_nums * 8 + object_nums_sum * (
8 + 4 * 4 + 8)
if (os.path.getsize(output_file_path) == target_size):
print("Success! \nThe output binary file can be found at: ",
output_file_path)
else:
print("Conversion failed!")
def print_processbar(done_percentage): def print_processbar(done_percentage):
done_filled = done_percentage * '=' done_filled = done_percentage * '='
empty_filled = (100 - done_percentage) * ' ' empty_filled = (100 - done_percentage) * ' '
...@@ -65,7 +145,7 @@ def print_processbar(done_percentage): ...@@ -65,7 +145,7 @@ def print_processbar(done_percentage):
sys.stdout.flush() sys.stdout.flush()
def convert_pascalvoc(tar_path, data_out_path): def convert_pascalvoc_tar2bin(tar_path, data_out_path):
print("Start converting ...\n") print("Start converting ...\n")
images = {} images = {}
gt_labels = {} gt_labels = {}
...@@ -87,12 +167,12 @@ def convert_pascalvoc(tar_path, data_out_path): ...@@ -87,12 +167,12 @@ def convert_pascalvoc(tar_path, data_out_path):
f_test = tar.extractfile(TEST_LIST_KEY).read() f_test = tar.extractfile(TEST_LIST_KEY).read()
lines = f_test.split('\n') lines = f_test.split('\n')
del lines[-1] del lines[-1]
line_len = len(lines) image_nums = len(lines)
per_percentage = line_len / 100 per_percentage = image_nums / 100
f1 = open(data_out_path, "w+b") f1 = open(data_out_path, "w+b")
f1.seek(0) f1.seek(0)
f1.write(np.array(line_len).astype('int64').tobytes()) f1.write(np.array(image_nums).astype('int64').tobytes())
for tarInfo in tar: for tarInfo in tar:
if tarInfo.isfile(): if tarInfo.isfile():
tmp_filename = tarInfo.name tmp_filename = tarInfo.name
...@@ -115,7 +195,7 @@ def convert_pascalvoc(tar_path, data_out_path): ...@@ -115,7 +195,7 @@ def convert_pascalvoc(tar_path, data_out_path):
# layout: label | xmin | ymin | xmax | ymax | difficult # layout: label | xmin | ymin | xmax | ymax | difficult
bbox_labels = [] bbox_labels = []
root = ET.fromstring(gt_labels[name_prefix]) root = xml.etree.ElementTree.fromstring(gt_labels[name_prefix])
objects = root.findall('object') objects = root.findall('object')
objects_size = len(objects) objects_size = len(objects)
...@@ -179,9 +259,48 @@ def run_convert(): ...@@ -179,9 +259,48 @@ def run_convert():
retry = retry + 1 retry = retry + 1
else: else:
download_pascalvoc(DATA_URL, DATA_DIR, TAR_TARGETHASH, TAR_PATH) download_pascalvoc(DATA_URL, DATA_DIR, TAR_TARGETHASH, TAR_PATH)
convert_pascalvoc(TAR_PATH, DATA_OUT_PATH) convert_pascalvoc_tar2bin(TAR_PATH, DATA_OUT_PATH)
print("Success! \nThe binary file can be found at %s\n" % DATA_OUT_PATH) print("Success!\nThe binary file can be found at %s\n" % DATA_OUT_PATH)
def main_pascalvoc_preprocess(args):
parser = argparse.ArgumentParser(
description="Convert the full pascalvoc val set or local data to binary file."
)
parser.add_argument(
'--choice', choices=['local', 'VOC_test_2007'], required=True)
parser.add_argument(
"--data_dir",
default="/home/li/AIPG-Paddle/paddle/build/third_party/inference_demo/int8v2/pascalvoc_small",
type=str,
help="Dataset root directory")
parser.add_argument(
"--img_annotation_list",
type=str,
default="test_100.txt",
help="A file containing the image file path and relevant annotation file path"
)
parser.add_argument(
"--label_file",
type=str,
default="label_list",
help="List the labels in the same sequence as denoted in the annotation file"
)
parser.add_argument(
"--output_file",
type=str,
default="pascalvoc_small.bin",
help="File path of the output binary file")
parser.add_argument("--resize_h", type=int, default=RESIZE_H)
parser.add_argument("--resize_w", type=int, default=RESIZE_W)
parser.add_argument("--mean_value", type=str, default=MEAN_VALUE)
parser.add_argument("--ap_version", type=str, default=AP_VERSION)
args = parser.parse_args()
if args.choice == 'local':
convert_pascalvoc_local2bin(args)
elif args.choice == 'VOC_test_2007':
run_convert()
if __name__ == "__main__": if __name__ == "__main__":
run_convert() main_pascalvoc_preprocess(sys.argv)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from full_pascalvoc_test_preprocess import main_pascalvoc_preprocess
import numpy as np
import paddle.fluid.core as core
import paddle.fluid as fluid
import unittest
import os
class Test_Preprocess(unittest.TestCase):
def test_local_convert(self):
os.system("python full_pascalvoc_test_preprocess.py --choice=local")
def test_online_convert(self):
os.system(
"python full_pascalvoc_test_preprocess.py --choice=VOC_test_2007")
if __name__ == '__main__':
unittest.main()
...@@ -445,9 +445,12 @@ void TestPrediction(const PaddlePredictor::Config *config, ...@@ -445,9 +445,12 @@ void TestPrediction(const PaddlePredictor::Config *config,
void SummarizeAccuracy(float avg_acc_fp32, float avg_acc_int8, void SummarizeAccuracy(float avg_acc_fp32, float avg_acc_int8,
int compared_idx) { int compared_idx) {
PADDLE_ENFORCE(compared_idx <= 2 && compared_idx >= 1, PADDLE_ENFORCE_LE(compared_idx, 2,
"Compare either top1 accuracy either mAP(top5), the " "Compare either top1 accuracy or mAP (top5), the "
"compared_idx is out of range"); "compared_idx is out of range");
PADDLE_ENFORCE_GE(compared_idx, 1,
"Compare either top1 accuracy or mAP (top5), the "
"compared_idx is out of range");
std::string prefix = (compared_idx == 1) ? "top1_accuracy " : "mAP "; std::string prefix = (compared_idx == 1) ? "top1_accuracy " : "mAP ";
LOG(INFO) << "--- Accuracy summary --- "; LOG(INFO) << "--- Accuracy summary --- ";
LOG(INFO) << "Accepted " << prefix LOG(INFO) << "Accepted " << prefix
...@@ -485,8 +488,23 @@ void CompareAccuracy( ...@@ -485,8 +488,23 @@ void CompareAccuracy(
float total_accs_quant{0}; float total_accs_quant{0};
float total_accs_ref{0}; float total_accs_ref{0};
for (size_t i = 0; i < output_slots_quant.size(); ++i) { for (size_t i = 0; i < output_slots_quant.size(); ++i) {
PADDLE_ENFORCE(output_slots_quant[i].size() >= 2UL); if (compared_idx == 1) {
PADDLE_ENFORCE(output_slots_ref[i].size() >= 2UL); PADDLE_ENFORCE_GE(
output_slots_quant[i].size(), 2UL,
"To achieve top 1 accuracy, output_slots_quant[i].size()>=2");
PADDLE_ENFORCE_GE(
output_slots_ref[i].size(), 2UL,
"To achieve top 1 accuracy, output_slots_ref[i].size()>=2");
} else if (compared_idx == 2) {
PADDLE_ENFORCE_GE(output_slots_quant[i].size(), 3UL,
"To achieve mAP, output_slots_quant[i].size()>=3");
PADDLE_ENFORCE_GE(output_slots_ref[i].size(), 3UL,
"To achieve mAP, output_slots_ref[i].size()>=3");
} else {
throw std::invalid_argument(
"CompareAccuracy: compared_idx is out of range.");
}
if (output_slots_quant[i][compared_idx].lod.size() > 0 || if (output_slots_quant[i][compared_idx].lod.size() > 0 ||
output_slots_ref[i][compared_idx].lod.size() > 0) output_slots_ref[i][compared_idx].lod.size() > 0)
throw std::invalid_argument("CompareAccuracy: output has nonempty LoD."); throw std::invalid_argument("CompareAccuracy: output has nonempty LoD.");
...@@ -535,8 +553,8 @@ void CompareNativeAndAnalysis( ...@@ -535,8 +553,8 @@ void CompareNativeAndAnalysis(
std::vector<std::vector<PaddleTensor>> native_outputs, analysis_outputs; std::vector<std::vector<PaddleTensor>> native_outputs, analysis_outputs;
TestOneThreadPrediction(config, inputs, &native_outputs, false); TestOneThreadPrediction(config, inputs, &native_outputs, false);
TestOneThreadPrediction(config, inputs, &analysis_outputs, true); TestOneThreadPrediction(config, inputs, &analysis_outputs, true);
PADDLE_ENFORCE(native_outputs.size() > 0, "Native output is empty."); PADDLE_ENFORCE_GT(native_outputs.size(), 0, "Native output is empty.");
PADDLE_ENFORCE(analysis_outputs.size() > 0, "Analysis output is empty."); PADDLE_ENFORCE_GT(analysis_outputs.size(), 0, "Analysis output is empty.");
CompareResult(analysis_outputs.back(), native_outputs.back()); CompareResult(analysis_outputs.back(), native_outputs.back());
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册