diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 8c451153230d77071b7f4b816f18151c04592579..0d25c159fd21d7515de73a307c546d9278bd2948 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -182,11 +182,10 @@ void AnalysisConfig::EnableNgraph() { #endif } -std::shared_ptr AnalysisConfig::mkldnn_quantizer_config() - const { +MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const { PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_, "MkldnnQuantizer was not enabled yet."); - return mkldnn_quantizer_config_; + return mkldnn_quantizer_config_.get(); } void AnalysisConfig::EnableTensorRtEngine( diff --git a/paddle/fluid/inference/api/analysis_predictor_tester.cc b/paddle/fluid/inference/api/analysis_predictor_tester.cc index 6f57f8b20287d8427321aa17bf8c6d094d776802..44b1b8071de9d0e825ea4c8ee895c44b8951f14f 100644 --- a/paddle/fluid/inference/api/analysis_predictor_tester.cc +++ b/paddle/fluid/inference/api/analysis_predictor_tester.cc @@ -260,7 +260,7 @@ class MkldnnQuantizerTest : public testing::Test { predictor.reset(new AnalysisPredictor(config)); auto* predictor_p = static_cast(predictor.get()); - auto qconfig = std::make_shared(); + auto qconfig = new MkldnnQuantizerConfig(); mkldnn_quantizer.reset( new AnalysisPredictor::MkldnnQuantizer(*predictor_p, qconfig)); diff --git a/paddle/fluid/inference/api/mkldnn_quantizer.h b/paddle/fluid/inference/api/mkldnn_quantizer.h index f4b0df5d742ed12f856fc7982d955e89288a1888..aea4a0ac93d253fe6b81fb726b8b19369dabd169 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer.h +++ b/paddle/fluid/inference/api/mkldnn_quantizer.h @@ -45,9 +45,8 @@ using VarQuantScale = class AnalysisPredictor::MkldnnQuantizer { public: - explicit MkldnnQuantizer( - AnalysisPredictor& predictor, // NOLINT - const std::shared_ptr& qconfig) + explicit MkldnnQuantizer(AnalysisPredictor& predictor, // NOLINT + const MkldnnQuantizerConfig* qconfig) : predictor_(predictor), qconfig_(qconfig) {} // Execute full quantization procedure. @@ -95,7 +94,7 @@ class AnalysisPredictor::MkldnnQuantizer { private: AnalysisPredictor& predictor_; - const std::shared_ptr qconfig_; + const MkldnnQuantizerConfig* qconfig_; // A map: variable name -> scale VarQuantScale scales_; diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index ba4429a7562fde230f07b99751ae359386a9ca56..951cb669cca003f31718337018487d5b1e619618 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -210,7 +210,7 @@ struct AnalysisConfig { */ bool mkldnn_quantizer_enabled() const { return use_mkldnn_quantizer_; } - std::shared_ptr mkldnn_quantizer_config() const; + MkldnnQuantizerConfig* mkldnn_quantizer_config() const; /** Specify the memory buffer of program and parameter * @param prog_buffer the memory buffer of program. diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 8ec9806f5fbfd36303585fece033bbcbe484adce..27f0e30d021534fd147e928b9eaf3c4ff040468e 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -17,7 +17,9 @@ #include #include #include +#include #include +#include #include #include "paddle/fluid/inference/api/analysis_predictor.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" @@ -45,6 +47,10 @@ static void BindNativePredictor(py::module *m); static void BindAnalysisConfig(py::module *m); static void BindAnalysisPredictor(py::module *m); +#ifdef PADDLE_WITH_MKLDNN +static void BindMkldnnQuantizerConfig(py::module *m); +#endif + void BindInferenceApi(py::module *m) { BindPaddleDType(m); BindPaddleBuf(m); @@ -55,7 +61,9 @@ void BindInferenceApi(py::module *m) { BindNativePredictor(m); BindAnalysisConfig(m); BindAnalysisPredictor(m); - +#ifdef PADDLE_WITH_MKLDNN + BindMkldnnQuantizerConfig(m); +#endif m->def("create_paddle_predictor", &paddle::CreatePaddlePredictor); m->def("create_paddle_predictor", @@ -249,6 +257,11 @@ void BindAnalysisConfig(py::module *m) { .def("cpu_math_library_num_threads", &AnalysisConfig::cpu_math_library_num_threads) .def("to_native_config", &AnalysisConfig::ToNativeConfig) + .def("enable_quantizer", &AnalysisConfig::EnableMkldnnQuantizer) +#ifdef PADDLE_WITH_MKLDNN + .def("quantizer_config", &AnalysisConfig::mkldnn_quantizer_config, + py::return_value_policy::reference) +#endif .def("set_mkldnn_op", &AnalysisConfig::SetMKLDNNOp) .def("set_model_buffer", &AnalysisConfig::SetModelBuffer) .def("model_from_memory", &AnalysisConfig::model_from_memory) @@ -256,6 +269,28 @@ void BindAnalysisConfig(py::module *m) { py::return_value_policy::reference); } +#ifdef PADDLE_WITH_MKLDNN +void BindMkldnnQuantizerConfig(py::module *m) { + py::class_ quantizer_config(*m, + "MkldnnQuantizerConfig"); + quantizer_config.def(py::init()) + .def(py::init<>()) + .def("set_quant_data", + [](MkldnnQuantizerConfig &self, + const std::vector &data) { + auto warmup_data = + std::make_shared>(data); + self.SetWarmupData(warmup_data); + return; + }) + .def("set_quant_batch_size", &MkldnnQuantizerConfig::SetWarmupBatchSize) + .def( + "set_enabled_op_types", + (void (MkldnnQuantizerConfig::*)(std::unordered_set &)) & + MkldnnQuantizerConfig::SetEnabledOpTypes); +} +#endif + void BindAnalysisPredictor(py::module *m) { py::class_(*m, "AnalysisPredictor") .def(py::init()) @@ -272,7 +307,9 @@ void BindAnalysisPredictor(py::module *m) { .def("zero_copy_run", &AnalysisPredictor::ZeroCopyRun) .def("clone", &AnalysisPredictor::Clone) .def("scope", &AnalysisPredictor::scope, - py::return_value_policy::reference); + py::return_value_policy::reference) + .def("SaveOptimModel", &AnalysisPredictor::SaveOptimModel, + py::arg("dir")); } } // namespace pybind diff --git a/python/paddle/fluid/contrib/slim/core/compressor.py b/python/paddle/fluid/contrib/slim/core/compressor.py index b97508018ac6da47bfdefadd06a6c3788cb7bd77..2344d95eaee98c6a842eec2f335c59bd1d4e5539 100644 --- a/python/paddle/fluid/contrib/slim/core/compressor.py +++ b/python/paddle/fluid/contrib/slim/core/compressor.py @@ -467,6 +467,10 @@ class Compressor(object): for strategy in self.strategies: strategy.on_compression_begin(context) + if 'MKLDNNPostTrainingQuantStrategy' in [ + i.__class__.__name__ for i in self.strategies + ]: + return None start = context.epoch_id self._eval(context) for epoch in range(start, self.epoch): diff --git a/python/paddle/fluid/contrib/slim/quantization/__init__.py b/python/paddle/fluid/contrib/slim/quantization/__init__.py index 1c51aa15373779b06273296a27d913c070079f41..445cbc776a3236ab3f91e39eb833def890c17ca2 100644 --- a/python/paddle/fluid/contrib/slim/quantization/__init__.py +++ b/python/paddle/fluid/contrib/slim/quantization/__init__.py @@ -18,5 +18,8 @@ from . import quantization_pass from .quantization_pass import * from . import quantization_strategy from .quantization_strategy import * +from . import mkldnn_post_training_strategy +from .mkldnn_post_training_strategy import * __all__ = quantization_pass.__all__ + quantization_strategy.__all__ +__all__ += mkldnn_post_training_strategy.__all__ diff --git a/python/paddle/fluid/contrib/slim/quantization/mkldnn_post_training_strategy.py b/python/paddle/fluid/contrib/slim/quantization/mkldnn_post_training_strategy.py new file mode 100644 index 0000000000000000000000000000000000000000..6d3a1cf3e82a10007dbbfd848c90fbe1c88a1f4b --- /dev/null +++ b/python/paddle/fluid/contrib/slim/quantization/mkldnn_post_training_strategy.py @@ -0,0 +1,120 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import logging +import six +import numpy as np +from .... import core +from ..core.strategy import Strategy + +__all__ = ['MKLDNNPostTrainingQuantStrategy'] + +logging.basicConfig(format='%(asctime)s-%(levelname)s: %(message)s') +_logger = logging.getLogger(__name__) +_logger.setLevel(logging.INFO) + + +class MKLDNNPostTrainingQuantStrategy(Strategy): + """ + The strategy for MKL-DNN Post Training quantization strategy. + """ + + def __init__(self, + int8_model_save_path=None, + fp32_model_path=None, + cpu_math_library_num_threads=1): + """ + Args: + int8_model_save_path(str): int8_model_save_path is used to save an int8 ProgramDesc + with fp32 weights which is used for MKL-DNN int8 inference. For post training quantization, + MKLDNNPostTrainingQuantStrategy only supports converting a fp32 ProgramDesc + with fp32 weights to an int8 ProgramDesc with fp32 weights now. The saved + int8 ProgramDesc with fp32 weights only can be executed with MKL-DNN enabled. + None means it doesn't save int8 ProgramDesc with fp32 weights. default: None. + fp32_model_path(str): fp32_model_path is used to load an original fp32 ProgramDesc with fp32 weights. + None means it doesn't have a fp32 ProgramDesc with fp32 weights. default: None. + cpu_math_library_num_threads(int): The number of cpu math library threads which is used on + MKLDNNPostTrainingQuantStrategy. 1 means it only uses one cpu math library + thread. default: 1 + """ + + super(MKLDNNPostTrainingQuantStrategy, self).__init__(0, 0) + self.int8_model_save_path = int8_model_save_path + if fp32_model_path is None: + raise Exception("fp32_model_path is None") + self.fp32_model_path = fp32_model_path + self.cpu_math_library_num_threads = cpu_math_library_num_threads + + def on_compression_begin(self, context): + """ + Prepare the data and quantify the model + """ + + super(MKLDNNPostTrainingQuantStrategy, + self).on_compression_begin(context) + _logger.info('InferQuantStrategy::on_compression_begin') + + # Prepare the Analysis Config + infer_config = core.AnalysisConfig("AnalysisConfig") + infer_config.switch_ir_optim(True) + infer_config.disable_gpu() + infer_config.set_model(self.fp32_model_path) + infer_config.enable_mkldnn() + infer_config.set_cpu_math_library_num_threads( + self.cpu_math_library_num_threads) + + # Prepare the data for calculating the quantization scales + warmup_reader = context.eval_reader() + if six.PY2: + data = warmup_reader.next() + + if six.PY3: + data = warmup_reader.__next__() + + # TODO (Intel) Remove limits that MKLDNNPostTrainingQuantStrategy + # only support image classification + num_images = len(data) + images = core.PaddleTensor() + images.name = "x" + images.shape = [num_images, ] + list(data[0][0].shape) + images.dtype = core.PaddleDType.FLOAT32 + image_data = [img.tolist() for (img, _) in data] + image_data = np.array(image_data).astype("float32") + image_data = image_data.ravel() + images.data = core.PaddleBuf(image_data.tolist()) + + labels = core.PaddleTensor() + labels.name = "y" + labels.shape = [num_images, 1] + labels.dtype = core.PaddleDType.INT64 + label_data = [label for (_, label) in data] + labels.data = core.PaddleBuf(label_data) + + warmup_data = [images, labels] + + # Enable the INT8 Quantization + infer_config.enable_quantizer() + infer_config.quantizer_config().set_quant_data(warmup_data) + infer_config.quantizer_config().set_quant_batch_size(num_images) + + # Run INT8 MKL-DNN Quantization + predictor = core.create_paddle_predictor(infer_config) + if self.int8_model_save_path: + if not os.path.exists(self.int8_model_save_path): + os.makedirs(self.int8_model_save_path) + predictor.SaveOptimModel(self.int8_model_save_path) + + _logger.info( + 'Finish MKLDNNPostTrainingQuantStrategy::on_compresseion_begin') diff --git a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt index 848f063f67716f6d348ba21d697ad7373783ee22..53a89a3facf0ca51e0aee29de82f5e929e5ed1b6 100644 --- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt +++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt @@ -1,11 +1,63 @@ file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") +function(inference_analysis_python_api_int8_test target model_dir data_dir filename) + py_test(${target} SRCS ${filename} + ENVS FLAGS_OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI} + ARGS --infer_model ${model_dir}/model + --infer_data ${data_dir}/data.bin + --int8_model_save_path int8_models/${target} + --warmup_batch_size 100 + --batch_size 50) +endfunction() + # NOTE: TODOOOOOOOOOOO # temporarily disable test_distillation_strategy since it always failed on a specified machine with 4 GPUs # Need to figure out the root cause and then add it back list(REMOVE_ITEM TEST_OPS test_distillation_strategy) +# int8 image classification python api test +if(LINUX AND WITH_MKLDNN) + set(INT8_DATA_DIR "${INFERENCE_DEMO_INSTALL_DIR}/int8v2") + set(MKLDNN_INT8_TEST_FILE "test_mkldnn_int8_quantization_strategy.py") + + # googlenet int8 + set(INT8_GOOGLENET_MODEL_DIR "${INT8_DATA_DIR}/googlenet") + inference_analysis_python_api_int8_test(test_slim_int8_googlenet ${INT8_GOOGLENET_MODEL_DIR} ${INT8_DATA_DIR} ${MKLDNN_INT8_TEST_FILE}) + + # mobilenet int8 + set(INT8_MOBILENET_MODEL_DIR "${INT8_DATA_DIR}/mobilenet") + inference_analysis_python_api_int8_test(test_slim_int8_mobilenet ${INT8_MOBILENET_MODEL_DIR} ${INT8_DATA_DIR} ${MKLDNN_INT8_TEST_FILE}) + + # temporarily adding WITH_SLIM_MKLDNN_FULL_TEST FLAG for QA testing the following UTs locally, + # since the following UTs cost too much time on CI test. + if (WITH_SLIM_MKLDNN_FULL_TEST) + # resnet50 int8 + set(INT8_RESNET50_MODEL_DIR "${INT8_DATA_DIR}/resnet50") + inference_analysis_python_api_int8_test(test_slim_int8_resnet50 ${INT8_RESNET50_MODEL_DIR} ${INT8_DATA_DIR} ${MKLDNN_INT8_TEST_FILE}) + + # mobilenetv2 int8 + set(INT8_MOBILENETV2_MODEL_DIR "${INT8_DATA_DIR}/mobilenetv2") + inference_analysis_python_api_int8_test(test_slim_int8_mobilenetv2 ${INT8_MOBILENETV2_MODEL_DIR} ${INT8_DATA_DIR} ${MKLDNN_INT8_TEST_FILE}) + + # resnet101 int8 + set(INT8_RESNET101_MODEL_DIR "${INT8_DATA_DIR}/resnet101") + inference_analysis_python_api_int8_test(test_slim_int8_resnet101 ${INT8_RESNET101_MODEL_DIR} ${INT8_DATA_DIR} ${MKLDNN_INT8_TEST_FILE}) + + # vgg16 int8 + set(INT8_VGG16_MODEL_DIR "${INT8_DATA_DIR}/vgg16") + inference_analysis_python_api_int8_test(test_slim_int8_vgg16 ${INT8_VGG16_MODEL_DIR} ${INT8_DATA_DIR} ${MKLDNN_INT8_TEST_FILE}) + + # vgg19 int8 + set(INT8_VGG19_MODEL_DIR "${INT8_DATA_DIR}/vgg19") + inference_analysis_python_api_int8_test(test_slim_int8_vgg19 ${INT8_VGG19_MODEL_DIR} ${INT8_DATA_DIR} ${MKLDNN_INT8_TEST_FILE}) + endif() +endif() + +# Since test_mkldnn_int8_quantization_strategy only supports testing on Linux +# with MKL-DNN, we remove it here for not repeating test, or not testing on other systems. +list(REMOVE_ITEM TEST_OPS test_mkldnn_int8_quantization_strategy) + foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) endforeach() diff --git a/python/paddle/fluid/contrib/slim/tests/quantization/config_mkldnn_int8.yaml b/python/paddle/fluid/contrib/slim/tests/quantization/config_mkldnn_int8.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e0df9c58a2081275ad090857668c90e2efc8d55 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/tests/quantization/config_mkldnn_int8.yaml @@ -0,0 +1,28 @@ +#int8_model_save_path(str): int8_model_save_path is used to save an int8 ProgramDesc with +# fp32 weights which is used for MKL-DNN int8 inference. For post training quantization, +# MKLDNNPostTrainingQuantStrategy only supports converting a fp32 ProgramDesc +# with fp32 weights to an int8 ProgramDesc with fp32 weights now. The saved +# int8 ProgramDesc with fp32 weights only can be executed with MKL-DNN enabled. +# None means it doesn't save int8 ProgramDesc with fp32 weights. default: None. +# +#fp32_model_path(str): fp32_model_path is used to load an original fp32 ProgramDesc with fp32 weights. +# None means it doesn't have a fp32 ProgramDesc with fp32 weights. default: None. +# +#cpu_math_library_num_threads(int): The number of cpu math library threads which is used on +# MKLDNNPostTrainingQuantStrategy. 1 means it only uses one cpu math library +# thread. default: 1 +# Note: Here we set the cpu_math_library_num_threads to 4 which is the maximum number of +# cpu math library threads on CI machine. +# +version: 1.0 +strategies: + mkldnn_post_training_strategy: + class: 'MKLDNNPostTrainingQuantStrategy' + int8_model_save_path: 'OUTPUT_PATH' + fp32_model_path: 'MODEL_PATH' + cpu_math_library_num_threads: 4 +compressor: + epoch: 0 + checkpoint_path: '' + strategies: + - mkldnn_post_training_strategy diff --git a/python/paddle/fluid/contrib/slim/tests/test_mkldnn_int8_quantization_strategy.py b/python/paddle/fluid/contrib/slim/tests/test_mkldnn_int8_quantization_strategy.py new file mode 100644 index 0000000000000000000000000000000000000000..5f10b3281284403214dc1c43440008faa67fb667 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/tests/test_mkldnn_int8_quantization_strategy.py @@ -0,0 +1,216 @@ +# copyright (c) 2019 paddlepaddle authors. all rights reserved. +# +# licensed under the apache license, version 2.0 (the "license"); +# you may not use this file except in compliance with the license. +# you may obtain a copy of the license at +# +# http://www.apache.org/licenses/license-2.0 +# +# unless required by applicable law or agreed to in writing, software +# distributed under the license is distributed on an "as is" basis, +# without warranties or conditions of any kind, either express or implied. +# see the license for the specific language governing permissions and +# limitations under the license. + +import unittest +import os +import sys +import argparse +import shutil +import logging +import struct +import six +import numpy as np +import paddle +import paddle.fluid as fluid +from paddle.fluid.contrib.slim.core import Compressor + +logging.basicConfig(format='%(asctime)s-%(levelname)s: %(message)s') +_logger = logging.getLogger(__name__) +_logger.setLevel(logging.INFO) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('--batch_size', type=int, default=1, help='batch size') + parser.add_argument( + '--infer_model', + type=str, + default='', + help='infer_model is used to load an original fp32 ProgramDesc with fp32 weights' + ) + parser.add_argument('--infer_data', type=str, default='', help='data file') + parser.add_argument( + '--int8_model_save_path', + type=str, + default='./output', + help='infer_data is used to save an int8 ProgramDesc with fp32 weights') + parser.add_argument( + '--warmup_batch_size', + type=int, + default=100, + help='batch size for quantization warmup') + parser.add_argument( + '--accuracy_diff_threshold', + type=float, + default=0.01, + help='accepted accuracy drop threshold.') + + test_args, args = parser.parse_known_args(namespace=unittest) + + return test_args, sys.argv[:1] + args + + +class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase): + """ + Test API of Post Training quantization strategy for int8 with MKL-DNN. + """ + + def _reader_creator(self, data_file='data.bin', cycle=False): + def reader(): + with open(data_file, 'rb') as fp: + num = fp.read(8) + num = struct.unpack('q', num)[0] + imgs_offset = 8 + img_ch = 3 + img_w = 224 + img_h = 224 + img_pixel_size = 4 + img_size = img_ch * img_h * img_w * img_pixel_size + label_size = 8 + labels_offset = imgs_offset + num * img_size + step = 0 + + while step < num: + fp.seek(imgs_offset + img_size * step) + img = fp.read(img_size) + img = struct.unpack_from('{}f'.format(img_ch * img_w * + img_h), img) + img = np.array(img) + img.shape = (img_ch, img_w, img_h) + fp.seek(labels_offset + label_size * step) + label = fp.read(label_size) + label = struct.unpack('q', label)[0] + yield img, int(label) + step += 1 + if cycle and step == num: + step = 0 + + return reader + + def _update_config_file(self, fp32_model_path, output_path): + config_path = './quantization/config_mkldnn_int8.yaml' + new_config_path = './quantization/temp.yaml' + shutil.copy(config_path, new_config_path) + + with open(new_config_path, 'r+') as fp: + data = fp.read() + data = data.replace('MODEL_PATH', fp32_model_path) + data = data.replace('OUTPUT_PATH', output_path) + with open(new_config_path, 'w') as fp: + fp.write(data) + + return new_config_path + + def _predict(self, test_reader=None, model_path=None): + place = fluid.CPUPlace() + exe = fluid.Executor(place) + inference_scope = fluid.executor.global_scope() + with fluid.scope_guard(inference_scope): + if os.path.exists(os.path.join(model_path, '__model__')): + [inference_program, feed_target_names, + fetch_targets] = fluid.io.load_inference_model(model_path, exe) + else: + [inference_program, feed_target_names, + fetch_targets] = fluid.io.load_inference_model( + model_path, exe, 'model', 'params') + + dshape = [3, 224, 224] + top1 = 0.0 + top5 = 0.0 + total_samples = 0 + for _, data in enumerate(test_reader()): + if six.PY2: + images = map(lambda x: x[0].reshape(dshape), data) + if six.PY3: + images = list(map(lambda x: x[0].reshape(dshape), data)) + images = np.array(images).astype('float32') + labels = np.array([x[1] for x in data]).astype("int64") + labels = labels.reshape([-1, 1]) + out = exe.run(inference_program, + feed={ + feed_target_names[0]: images, + feed_target_names[1]: labels + }, + fetch_list=fetch_targets) + top1 += np.sum(out[1]) * len(data) + top5 += np.sum(out[2]) * len(data) + total_samples += len(data) + return top1 / total_samples, top5 / total_samples + + def _warmup(self, reader=None, config_path=''): + com_pass = Compressor( + place=None, + scope=None, + train_program=None, + train_reader=None, + train_feed_list=[], + train_fetch_list=[], + eval_program=None, + eval_reader=reader, + eval_feed_list=[], + eval_fetch_list=[], + teacher_programs=[], + checkpoint_path='', + train_optimizer=None, + distiller_optimizer=None) + com_pass.config(config_path) + com_pass.run() + + def test_compression(self): + if not fluid.core.is_compiled_with_mkldnn(): + return + + int8_model_path = test_case_args.int8_model_save_path + data_path = test_case_args.infer_data + fp32_model_path = test_case_args.infer_model + batch_size = test_case_args.batch_size + + warmup_batch_size = test_case_args.warmup_batch_size + accuracy_diff_threshold = test_case_args.accuracy_diff_threshold + + _logger.info( + 'FP32 & INT8 prediction run: batch_size {0}, warmup batch size {1}.'. + format(batch_size, warmup_batch_size)) + + #warmup dataset, only use the first batch data + warmup_reader = paddle.batch( + self._reader_creator(data_path, False), + batch_size=warmup_batch_size) + config_path = self._update_config_file(fp32_model_path, int8_model_path) + self._warmup(warmup_reader, config_path) + + _logger.info('--- INT8 prediction start ---') + val_reader = paddle.batch( + self._reader_creator(data_path, False), batch_size=batch_size) + int8_model_result = self._predict(val_reader, int8_model_path) + _logger.info('--- FP32 prediction start ---') + val_reader = paddle.batch( + self._reader_creator(data_path, False), batch_size=batch_size) + fp32_model_result = self._predict(val_reader, fp32_model_path) + + _logger.info('--- comparing outputs ---') + _logger.info('Avg top1 INT8 accuracy: {0:.4f}'.format(int8_model_result[ + 0])) + _logger.info('Avg top1 FP32 accuracy: {0:.4f}'.format(fp32_model_result[ + 0])) + _logger.info('Accepted accuracy drop threshold: {0}'.format( + accuracy_diff_threshold)) + assert fp32_model_result[0] - int8_model_result[ + 0] <= accuracy_diff_threshold + + +if __name__ == '__main__': + global test_case_args + test_case_args, remaining_args = parse_args() + unittest.main(argv=remaining_args)