From 9240e5325c2e18d7ee11799209d0edc4d3f19788 Mon Sep 17 00:00:00 2001 From: lidanqing Date: Thu, 22 Aug 2019 08:49:58 +0200 Subject: [PATCH] add local user data conversion into full_pascalvoc_test_preprocess.py (#19283) * add local user data conversion into full_pascalvoc_test_preprocess.py test=develop * change PADDLE_ENFORCE to PADDLE_ENFORCE_GE test=develop * change according to reviews test=develop --- paddle/fluid/inference/api/helper.h | 2 +- .../fluid/inference/tests/api/CMakeLists.txt | 3 + .../analyzer_int8_object_detection_tester.cc | 10 +- .../api/full_pascalvoc_test_preprocess.py | 147 ++++++++++++++++-- .../api/test_detection_dataset_preprocess.py | 35 +++++ .../fluid/inference/tests/api/tester_helper.h | 32 +++- 6 files changed, 202 insertions(+), 27 deletions(-) create mode 100644 paddle/fluid/inference/tests/api/test_detection_dataset_preprocess.py diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h index e5820c3637b..907d35b298c 100644 --- a/paddle/fluid/inference/api/helper.h +++ b/paddle/fluid/inference/api/helper.h @@ -317,7 +317,7 @@ static void PrintTime(int batch_size, int repeat, int num_threads, int tid, double batch_latency, int epoch = 1, const framework::proto::VarType::Type data_type = framework::proto::VarType::FP32) { - PADDLE_ENFORCE(batch_size > 0, "Non-positive batch size."); + PADDLE_ENFORCE_GT(batch_size, 0, "Non-positive batch size."); double sample_latency = batch_latency / batch_size; LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid << " ======"; diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 2310c9fbd1f..9de67e9ca91 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -250,6 +250,9 @@ if(WITH_MKLDNN) # download dataset if necessary download_int8_data(${INT8_DATA_DIR} "pascalvoc_val_head_300.tar.gz") + # download small demo set of pascalvoc for testing local userdata preprocessing + download_int8_data(${INT8_DATA_DIR} "pascalvoc_small.tar.gz") + # build test binary to be used in subsequent tests inference_analysis_api_int8_test_build(${INT8_OBJ_DETECT_TEST_APP} ${INT8_OBJ_DETECT_TEST_APP_SRC}) diff --git a/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc b/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc index 334fdb6ce9d..72da7c48b25 100644 --- a/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc @@ -144,8 +144,8 @@ std::shared_ptr> GetWarmupData( int32_t num_images = FLAGS_warmup_batch_size) { int test_data_batch_size = test_data[0][0].shape[0]; auto iterations = test_data.size(); - PADDLE_ENFORCE( - static_cast(num_images) <= iterations * test_data_batch_size, + PADDLE_ENFORCE_LE( + static_cast(num_images), iterations * test_data_batch_size, "The requested quantization warmup data size " + std::to_string(num_images) + " is bigger than all test data size."); @@ -235,8 +235,8 @@ std::shared_ptr> GetWarmupData( static_cast(difficult.data.data()) + objects_accum); objects_accum = objects_accum + objects_remain; } - PADDLE_ENFORCE( - static_cast(num_objects) == static_cast(objects_accum), + PADDLE_ENFORCE_EQ( + static_cast(num_objects), static_cast(objects_accum), "The requested num of objects " + std::to_string(num_objects) + " is the same as objects_accum."); @@ -274,7 +274,7 @@ TEST(Analyzer_int8_mobilenet_ssd, quantization) { q_cfg.mkldnn_quantizer_config()->SetWarmupData(warmup_data); q_cfg.mkldnn_quantizer_config()->SetWarmupBatchSize(FLAGS_warmup_batch_size); - // 0 is avg_cose, 1 is top1_acc, 2 is top5_acc or mAP + // 0 is avg_cost, 1 is top1_acc, 2 is top5_acc or mAP CompareQuantizedAndAnalysis(&cfg, &q_cfg, input_slots_all, 2); } diff --git a/paddle/fluid/inference/tests/api/full_pascalvoc_test_preprocess.py b/paddle/fluid/inference/tests/api/full_pascalvoc_test_preprocess.py index 2ca8e582f8c..d703a129706 100644 --- a/paddle/fluid/inference/tests/api/full_pascalvoc_test_preprocess.py +++ b/paddle/fluid/inference/tests/api/full_pascalvoc_test_preprocess.py @@ -11,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import xml.etree.ElementTree as ET + +import xml.etree.ElementTree from PIL import Image import numpy as np import os @@ -21,6 +22,7 @@ import tarfile import StringIO import hashlib import tarfile +import argparse DATA_URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar" DATA_DIR = os.path.expanduser("~/.cache/paddle/dataset/pascalvoc/") @@ -28,8 +30,8 @@ TAR_FILE = "VOCtest_06-Nov-2007.tar" TAR_PATH = os.path.join(DATA_DIR, TAR_FILE) RESIZE_H = 300 RESIZE_W = 300 -mean_value = [127.5, 127.5, 127.5] -ap_version = '11point' +MEAN_VALUE = [127.5, 127.5, 127.5] +AP_VERSION = '11point' DATA_OUT = 'pascalvoc_full.bin' DATA_OUT_PATH = os.path.join(DATA_DIR, DATA_OUT) BIN_TARGETHASH = "f6546cadc42f5ff13178b84ed29b740b" @@ -40,10 +42,8 @@ BIN_FULLSIZE = 5348678856 def preprocess(img): img_width, img_height = img.size - img = img.resize((RESIZE_W, RESIZE_H), Image.ANTIALIAS) img = np.array(img) - # HWC to CHW if len(img.shape) == 3: img = np.swapaxes(img, 1, 2) @@ -51,12 +51,92 @@ def preprocess(img): # RBG to BGR img = img[[2, 1, 0], :, :] img = img.astype('float32') - img_mean = np.array(mean_value)[:, np.newaxis, np.newaxis].astype('float32') + img_mean = np.array(MEAN_VALUE)[:, np.newaxis, np.newaxis].astype('float32') img -= img_mean img = img * 0.007843 return img +def convert_pascalvoc_local2bin(args): + data_dir = os.path.expanduser(args.data_dir) + label_fpath = os.path.join(data_dir, args.label_file) + flabel = open(label_fpath) + label_list = [line.strip() for line in flabel] + + img_annotation_list_path = os.path.join(data_dir, args.img_annotation_list) + flist = open(img_annotation_list_path) + lines = [line.strip() for line in flist] + + output_file_path = os.path.join(data_dir, args.output_file) + f1 = open(output_file_path, "w+b") + f1.seek(0) + image_nums = len(lines) + f1.write(np.array(image_nums).astype('int64').tobytes()) + + boxes = [] + lbls = [] + difficults = [] + object_nums = [] + + for line in lines: + image_path, label_path = line.split() + image_path = os.path.join(data_dir, image_path) + label_path = os.path.join(data_dir, label_path) + + im = Image.open(image_path) + if im.mode == 'L': + im = im.convert('RGB') + im_width, im_height = im.size + + im = preprocess(im) + np_im = np.array(im) + f1.write(np_im.astype('float32').tobytes()) + + # layout: label | xmin | ymin | xmax | ymax | difficult + bbox_labels = [] + root = xml.etree.ElementTree.parse(label_path).getroot() + + objects = root.findall('object') + objects_size = len(objects) + object_nums.append(objects_size) + + for object in objects: + bbox_sample = [] + # start from 1 + bbox_sample.append( + float(label_list.index(object.find('name').text))) + bbox = object.find('bndbox') + difficult = float(object.find('difficult').text) + bbox_sample.append(float(bbox.find('xmin').text) / im_width) + bbox_sample.append(float(bbox.find('ymin').text) / im_height) + bbox_sample.append(float(bbox.find('xmax').text) / im_width) + bbox_sample.append(float(bbox.find('ymax').text) / im_height) + bbox_sample.append(difficult) + bbox_labels.append(bbox_sample) + + bbox_labels = np.array(bbox_labels) + if len(bbox_labels) == 0: continue + + lbls.extend(bbox_labels[:, 0]) + boxes.extend(bbox_labels[:, 1:5]) + difficults.extend(bbox_labels[:, -1]) + + f1.write(np.array(object_nums).astype('uint64').tobytes()) + f1.write(np.array(lbls).astype('int64').tobytes()) + f1.write(np.array(boxes).astype('float32').tobytes()) + f1.write(np.array(difficults).astype('int64').tobytes()) + f1.close() + + object_nums_sum = sum(object_nums) + target_size = 8 + image_nums * 3 * args.resize_h * args.resize_h * 4 + image_nums * 8 + object_nums_sum * ( + 8 + 4 * 4 + 8) + if (os.path.getsize(output_file_path) == target_size): + print("Success! \nThe output binary file can be found at: ", + output_file_path) + else: + print("Conversion failed!") + + def print_processbar(done_percentage): done_filled = done_percentage * '=' empty_filled = (100 - done_percentage) * ' ' @@ -65,7 +145,7 @@ def print_processbar(done_percentage): sys.stdout.flush() -def convert_pascalvoc(tar_path, data_out_path): +def convert_pascalvoc_tar2bin(tar_path, data_out_path): print("Start converting ...\n") images = {} gt_labels = {} @@ -87,12 +167,12 @@ def convert_pascalvoc(tar_path, data_out_path): f_test = tar.extractfile(TEST_LIST_KEY).read() lines = f_test.split('\n') del lines[-1] - line_len = len(lines) - per_percentage = line_len / 100 + image_nums = len(lines) + per_percentage = image_nums / 100 f1 = open(data_out_path, "w+b") f1.seek(0) - f1.write(np.array(line_len).astype('int64').tobytes()) + f1.write(np.array(image_nums).astype('int64').tobytes()) for tarInfo in tar: if tarInfo.isfile(): tmp_filename = tarInfo.name @@ -115,7 +195,7 @@ def convert_pascalvoc(tar_path, data_out_path): # layout: label | xmin | ymin | xmax | ymax | difficult bbox_labels = [] - root = ET.fromstring(gt_labels[name_prefix]) + root = xml.etree.ElementTree.fromstring(gt_labels[name_prefix]) objects = root.findall('object') objects_size = len(objects) @@ -179,9 +259,48 @@ def run_convert(): retry = retry + 1 else: download_pascalvoc(DATA_URL, DATA_DIR, TAR_TARGETHASH, TAR_PATH) - convert_pascalvoc(TAR_PATH, DATA_OUT_PATH) - print("Success! \nThe binary file can be found at %s\n" % DATA_OUT_PATH) + convert_pascalvoc_tar2bin(TAR_PATH, DATA_OUT_PATH) + print("Success!\nThe binary file can be found at %s\n" % DATA_OUT_PATH) + + +def main_pascalvoc_preprocess(args): + parser = argparse.ArgumentParser( + description="Convert the full pascalvoc val set or local data to binary file." + ) + parser.add_argument( + '--choice', choices=['local', 'VOC_test_2007'], required=True) + parser.add_argument( + "--data_dir", + default="/home/li/AIPG-Paddle/paddle/build/third_party/inference_demo/int8v2/pascalvoc_small", + type=str, + help="Dataset root directory") + parser.add_argument( + "--img_annotation_list", + type=str, + default="test_100.txt", + help="A file containing the image file path and relevant annotation file path" + ) + parser.add_argument( + "--label_file", + type=str, + default="label_list", + help="List the labels in the same sequence as denoted in the annotation file" + ) + parser.add_argument( + "--output_file", + type=str, + default="pascalvoc_small.bin", + help="File path of the output binary file") + parser.add_argument("--resize_h", type=int, default=RESIZE_H) + parser.add_argument("--resize_w", type=int, default=RESIZE_W) + parser.add_argument("--mean_value", type=str, default=MEAN_VALUE) + parser.add_argument("--ap_version", type=str, default=AP_VERSION) + args = parser.parse_args() + if args.choice == 'local': + convert_pascalvoc_local2bin(args) + elif args.choice == 'VOC_test_2007': + run_convert() if __name__ == "__main__": - run_convert() + main_pascalvoc_preprocess(sys.argv) diff --git a/paddle/fluid/inference/tests/api/test_detection_dataset_preprocess.py b/paddle/fluid/inference/tests/api/test_detection_dataset_preprocess.py new file mode 100644 index 00000000000..4576d60a3d2 --- /dev/null +++ b/paddle/fluid/inference/tests/api/test_detection_dataset_preprocess.py @@ -0,0 +1,35 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +from full_pascalvoc_test_preprocess import main_pascalvoc_preprocess +import numpy as np +import paddle.fluid.core as core +import paddle.fluid as fluid +import unittest +import os + + +class Test_Preprocess(unittest.TestCase): + def test_local_convert(self): + os.system("python full_pascalvoc_test_preprocess.py --choice=local") + + def test_online_convert(self): + os.system( + "python full_pascalvoc_test_preprocess.py --choice=VOC_test_2007") + + +if __name__ == '__main__': + unittest.main() diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 2d4e75b827b..f502e05dce4 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -445,9 +445,12 @@ void TestPrediction(const PaddlePredictor::Config *config, void SummarizeAccuracy(float avg_acc_fp32, float avg_acc_int8, int compared_idx) { - PADDLE_ENFORCE(compared_idx <= 2 && compared_idx >= 1, - "Compare either top1 accuracy either mAP(top5), the " - "compared_idx is out of range"); + PADDLE_ENFORCE_LE(compared_idx, 2, + "Compare either top1 accuracy or mAP (top5), the " + "compared_idx is out of range"); + PADDLE_ENFORCE_GE(compared_idx, 1, + "Compare either top1 accuracy or mAP (top5), the " + "compared_idx is out of range"); std::string prefix = (compared_idx == 1) ? "top1_accuracy " : "mAP "; LOG(INFO) << "--- Accuracy summary --- "; LOG(INFO) << "Accepted " << prefix @@ -485,8 +488,23 @@ void CompareAccuracy( float total_accs_quant{0}; float total_accs_ref{0}; for (size_t i = 0; i < output_slots_quant.size(); ++i) { - PADDLE_ENFORCE(output_slots_quant[i].size() >= 2UL); - PADDLE_ENFORCE(output_slots_ref[i].size() >= 2UL); + if (compared_idx == 1) { + PADDLE_ENFORCE_GE( + output_slots_quant[i].size(), 2UL, + "To achieve top 1 accuracy, output_slots_quant[i].size()>=2"); + PADDLE_ENFORCE_GE( + output_slots_ref[i].size(), 2UL, + "To achieve top 1 accuracy, output_slots_ref[i].size()>=2"); + } else if (compared_idx == 2) { + PADDLE_ENFORCE_GE(output_slots_quant[i].size(), 3UL, + "To achieve mAP, output_slots_quant[i].size()>=3"); + PADDLE_ENFORCE_GE(output_slots_ref[i].size(), 3UL, + "To achieve mAP, output_slots_ref[i].size()>=3"); + } else { + throw std::invalid_argument( + "CompareAccuracy: compared_idx is out of range."); + } + if (output_slots_quant[i][compared_idx].lod.size() > 0 || output_slots_ref[i][compared_idx].lod.size() > 0) throw std::invalid_argument("CompareAccuracy: output has nonempty LoD."); @@ -535,8 +553,8 @@ void CompareNativeAndAnalysis( std::vector> native_outputs, analysis_outputs; TestOneThreadPrediction(config, inputs, &native_outputs, false); TestOneThreadPrediction(config, inputs, &analysis_outputs, true); - PADDLE_ENFORCE(native_outputs.size() > 0, "Native output is empty."); - PADDLE_ENFORCE(analysis_outputs.size() > 0, "Analysis output is empty."); + PADDLE_ENFORCE_GT(native_outputs.size(), 0, "Native output is empty."); + PADDLE_ENFORCE_GT(analysis_outputs.size(), 0, "Analysis output is empty."); CompareResult(analysis_outputs.back(), native_outputs.back()); } -- GitLab