diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index bda2f74205d29895a1022d605cb050d5774f1eee..e081070eb8aed343b08b38ae64049447d52609b5 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -403,9 +403,6 @@ paddle.fluid.contrib.QuantizeTranspiler.__init__ (ArgSpec(args=['self', 'weight_ paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 (ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.QuantizeTranspiler.freeze_program (ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None)), ('document', '909675a1ab055c69b436a7893fcae4fd')) paddle.fluid.contrib.QuantizeTranspiler.training_transpile (ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6dd9909f10b283ba2892a99058a72884')) -paddle.fluid.contrib.Calibrator.__init__ (ArgSpec(args=['self'], varargs='args', keywords='kwargs', defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.contrib.Calibrator.sample_data (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '3b8c85ca1e2cf753cc8c90a6c6992958')) -paddle.fluid.contrib.Calibrator.save_int8_model (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.distributed_batch_reader (ArgSpec(args=['batch_reader'], varargs=None, keywords=None, defaults=None), ('document', 'b60796eb0a481484dd34e345f0eaa4d5')) paddle.fluid.contrib.reader.ctr_reader.ctr_reader (ArgSpec(args=['feed_dict', 'file_type', 'file_format', 'dense_slot_index', 'sparse_slot_index', 'capacity', 'thread_num', 'batch_size', 'file_list', 'slots', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b2ebf3de2a6ef1af2c3b88d2db7591ab')) paddle.fluid.contrib.Compressor.__init__ (ArgSpec(args=['self', 'place', 'scope', 'train_program', 'train_reader', 'train_feed_list', 'train_fetch_list', 'eval_program', 'eval_reader', 'eval_feed_list', 'eval_fetch_list', 'teacher_programs', 'checkpoint_path', 'train_optimizer', 'distiller_optimizer', 'search_space'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, [], None, None, None, None)), ('document', 'c195b3bba26169cff9439e8c467557c0')) diff --git a/python/paddle/fluid/contrib/__init__.py b/python/paddle/fluid/contrib/__init__.py index 72437c0138fba692ea1e202c19fe2b5a75f11080..ff478200aefa5524b7cfb70996ba9e3ee50db6f2 100644 --- a/python/paddle/fluid/contrib/__init__.py +++ b/python/paddle/fluid/contrib/__init__.py @@ -22,8 +22,6 @@ from . import op_frequence from .op_frequence import * from . import quantize from .quantize import * -from . import int8_inference -from .int8_inference import * from . import reader from .reader import * from . import slim @@ -44,7 +42,6 @@ __all__ += decoder.__all__ __all__ += memory_usage_calc.__all__ __all__ += op_frequence.__all__ __all__ += quantize.__all__ -__all__ += int8_inference.__all__ __all__ += reader.__all__ __all__ += slim.__all__ __all__ += utils.__all__ diff --git a/python/paddle/fluid/contrib/int8_inference/README.md b/python/paddle/fluid/contrib/int8_inference/README.md deleted file mode 100644 index 7dc7c8d2a374a1d589ccb072b5bf6cce1f6ddda7..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/contrib/int8_inference/README.md +++ /dev/null @@ -1,86 +0,0 @@ -# Offline INT8 Calibration Tool - -PaddlePaddle supports offline INT8 calibration to accelerate the inference speed. In this document, we provide the instructions on how to enable INT8 calibration and show the ResNet-50 and MobileNet-V1 results in accuracy. - -## 0. Prerequisite -You need to install at least PaddlePaddle-1.3 python package `pip install paddlepaddle==1.3`. - -## 1. How to generate INT8 model -You can refer to the unit test in [test_calibration_resnet50.py](../tests/test_calibration_resnet50.py). Basically, there are three steps: -* Construct calibration object. - -```python -calibrator = int8_utility.Calibrator( # Step 1 - program=infer_program, # required, FP32 program - pretrained_model=model_path, # required, FP32 pretrained model - algo=algo, # required, calibration algorithm; default is max, the alternative is KL (Kullback–Leibler divergence) - exe=exe, # required, executor - output=int8_model, # required, INT8 model - feed_var_names=feed_dict, # required, feed dict - fetch_list=fetch_targets) # required, fetch targets -``` - -* Call the calibrator.sample_data() after executor run. -```python -_, acc1, _ = exe.run( - program, - feed={feed_dict[0]: image, - feed_dict[1]: label}, - fetch_list=fetch_targets) - -calibrator.sample_data() # Step 2 -``` - -* Call the calibrator.save_int8_model() after sampling over specified iterations (e.g., iterations = 50) -```python -calibrator.save_int8_model() # Step 3 -``` - -## 2. How to run INT8 model -You can load INT8 model by load_inference_model [API](https://github.com/PaddlePaddle/Paddle/blob/8b50ad80ff6934512d3959947ac1e71ea3fb9ea3/python/paddle/fluid/io.py#L991) and run INT8 inference similar as [FP32](https://github.com/PaddlePaddle/models/blob/develop/fluid/PaddleCV/object_detection/eval.py "FP32"). - -```python -[infer_program, feed_dict, - fetch_targets] = fluid.io.load_inference_model(model_path, exe) -``` - -## 3. Result -We provide the results of accuracy and performance measured on Intel(R) Xeon(R) Gold 6271 (single core). - -**I. Top-1 Accuracy on Intel(R) Xeon(R) Gold 6271** - -| Model | Dataset | FP32 Accuracy | INT8 Accuracy | Accuracy Diff | -| :------------: | :------------: | :------------: | :------------: | :------------: | -| ResNet-50 | Full ImageNet Val | 76.63% | 76.23% | 0.40% | -| MobileNet-V1 | Full ImageNet Val | 70.78% | 70.47% | 0.31% | - -**II. Throughput on Intel(R) Xeon(R) Gold 6271 (batch size 1 on single core)** - -| Model | Dataset | FP32 Throughput | INT8 Throughput | Ratio(INT8/FP32) | -| :------------: | :------------: | :------------: | :------------: | :------------: | -| ResNet-50 | Full ImageNet Val | 11.54 images/s | 32.2 images/s | 2.79 | -| MobileNet-V1 | Full ImageNet Val | 49.21 images/s | 108.37 images/s | 2.2 | - -Please note that [full ImageNet validation dataset](http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_val.tar "full ImageNet validation dataset") can be downloaded by script `test_calibration.py` with `DATASET=full`. - -Notes: -* The accuracy measurement requires the model with `label`. -* The INT8 theoretical speedup is 4X on Intel® Xeon® Cascadelake Server (please refer to `The theoretical peak compute gains are 4x int8 OPS over fp32 OPS.` in [Reference](https://software.intel.com/en-us/articles/lower-numerical-precision-deep-learning-inference-and-training "Reference")). Therefore, op-level gain is 4X and topology-level is smaller. - -## 4. How to reproduce the results -* Small dataset for ResNet-50 (Single core) -```bash -FLAGS_use_mkldnn=true python python/paddle/fluid/contrib/tests/test_calibration_resnet50.py -``` ->Note: Change `test_calibration_resnet50.py` to `test_calibration_mobilenetv1.py` for MobileNet-V1. Same for the following commands. - -* Full dataset for ResNet-50 (Single core) -```bash -FLAGS_use_mkldnn=true DATASET=full python python/paddle/fluid/contrib/tests/test_calibration_resnet50.py -``` - -* Full dataset for ResNet-50 (Multi-core) -```bash -FLAGS_use_mkldnn=true OMP_NUM_THREADS=20 DATASET=full python python/paddle/fluid/contrib/tests/test_calibration_resnet50.py -``` -> Notes: This is an example command with 20 cores by using set `OMP_NUM_THREADS` value. diff --git a/python/paddle/fluid/contrib/int8_inference/__init__.py b/python/paddle/fluid/contrib/int8_inference/__init__.py deleted file mode 100644 index 45547201d598c809f7dcf3a1a09103ae5de3e4c6..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/contrib/int8_inference/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -from . import utility -from .utility import * - -__all__ = utility.__all__ diff --git a/python/paddle/fluid/contrib/int8_inference/utility.py b/python/paddle/fluid/contrib/int8_inference/utility.py deleted file mode 100644 index 605dfdf53d0cb44972defcc3f86aa95982b82e41..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/contrib/int8_inference/utility.py +++ /dev/null @@ -1,736 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.fluid import core -import numpy as np -import math -import os -from paddle.fluid.executor import global_scope -from paddle.fluid import io - -__all__ = ['Calibrator'] - - -class Calibrator(object): - ''' - The calibrator class transforms the program and updates the calculated scale into it. - This is INT8 v1 calibration tool, mainly for the support of ResNet-50 and MobileNet. - ''' - # TODO(guomingz): Below op list will be updated once more INT8 op kernels are supported. - non_conv_int8_op_type = ("pool2d") - supported_int8_op_type = ("conv2d", "pool2d") - const_sign_op_type = ('pool2d', 'reshape', 'concat', 'transpose') - u8_max = 255 - s8_max = 127 - - def __init__(self, *args, **kwargs): - self.program = kwargs['program'] - self.pretrained_model = kwargs['pretrained_model'] - self.debug = kwargs['debug'] if 'debug' in kwargs else False - self.algo = kwargs['algo'] - self.output = kwargs['output'] - self.feed_var_names = kwargs['feed_var_names'] - self.fetch_list = kwargs['fetch_list'] - self.exe = kwargs['exe'] - - self._conv_input_var_name = [] - self._conv_output_var_name = [] - self._pool2d_output_var_name = [] - self._weights_var_name = [] - self._residual_input_var_name = [] - self._int8_output_var_op_index_dict = {} - self._conv_op_index = [ - index for index, value in enumerate(self.program.global_block().ops) - if value.type == 'conv2d' - ] - - self._var_max_value_map = {} - self._var_max_range = {} - self._weights_scaling_factor = {} - self._u8_output_var = [] - self._s8_output_var = [] - self._persistable_vars = [] - self._sampling_data = {} - - self.__init_analysis() - self.__generate_output_program() - - def save_int8_model(self): - self.__sampling(self._sampling_data) - self.__save_scale() - self.__update_program() - self.__update_output_program_attr() - self.__display_debug() - self.__save_offline_model() - - def sample_data(self): - ''' - Sampling the tensor data of variable. - ''' - for i in self.sampling_program.list_vars(): - if i.name in self.sampling_vars: - np_data = np.array(global_scope().find_var(i.name).get_tensor()) - if i.name not in self._sampling_data: - self._sampling_data[i.name] = [] - self._sampling_data[i.name].append(np_data) - - def __save_offline_model(self): - ''' - Save the quantized model to the disk. - ''' - io.save_inference_model(self.output, self.feed_var_names, - self.fetch_list, self.exe, - self.sampling_program) - - def __display_debug(self): - if self.debug: - self.__dot(self._output_program) - print(self._output_program) - - def __get_max_range_by_var_name(self, program, var_name): - """ - Check the specified variable was generated from Relu layer or not. - If the variable was the output of one of the pool2d/reshape/concat - /transpose, we keep trace the ancestor of this variable; - If the variable was the output the conv op, we check it's has_relu - attr; - Otherwise, we return the Calibrator.s8 as default value. - Returns: - Return Calibrator.u8_max if the variable was generated by Relu, - otherwise it will returns Calibrator.s8 - """ - search_end_index = -1 - input_index_name = {} - output_index_name = {} - ops_type = [] - - for index, op in enumerate(program.current_block().ops): - ops_type.append(op.type) - - input_index_name[index] = op.input_arg_names - - output_index_name[index] = op.output_arg_names - if var_name in op.output_arg_names: - search_end_index = index - - # analysis - while search_end_index >= 0: - if ops_type[search_end_index] == "relu": - return Calibrator.u8_max - - input_name = input_index_name[search_end_index][0] - - for i in output_index_name.keys(): - if input_name in output_index_name[i]: - search_end_index = i - break - - if ops_type[ - search_end_index] not in Calibrator.const_sign_op_type and ops_type[ - search_end_index] != 'conv2d': - return Calibrator.s8_max - - if ops_type[search_end_index] != 'conv2d': - continue - - if program.current_block().ops[search_end_index].has_attr( - 'fuse_relu') and program.current_block().ops[ - search_end_index].attr('fuse_relu'): - return Calibrator.u8_max - else: - return Calibrator.s8_max - - return Calibrator.s8_max - - def __check_op_type_with_specified_var_as_input(self, - program, - var_name, - start_index=0): - ''' - Check whether all the type of ops that use the specified variable as the - input.If one of those op is not int8-enabled, return False. - ''' - op_type_list = [ - op.type for op in program.current_block().ops[start_index:] - if var_name in op.input_arg_names - ] - for i in op_type_list: - if not i in Calibrator.supported_int8_op_type: - return False - return True - - def __check_var_source_dt(self, var_name): - ''' - Check whether the specified variable is the output of int8 conv op or not. - If true, return the original op index. - If false, return -1 - ''' - return self._int8_output_var_op_index_dict[ - var_name] if var_name in self._int8_output_var_op_index_dict else -1 - - def __update_int8_output_var_op_index_dict(self, index, var_name=None): - ''' - Update the int8_output_variable/op_index dictionary - ''' - for k, v in self._int8_output_var_op_index_dict.items(): - if v >= index: - self._int8_output_var_op_index_dict[k] = v + 1 - if var_name: - self._int8_output_var_op_index_dict[var_name] = index - - def __update_program(self): - ''' - Update the program with the quantize/dequantize op insertion. - ''' - quantize_index, dequantize_index = self.__get_quantize_dequantize_combination( - self._output_program) - inserted_op_length = 0 - calc_max_func = self.__get_optimal_scaling_factor if self.algo == "KL" else np.max - insert_op_collection = sorted(quantize_index + dequantize_index) - - for index in insert_op_collection: - if index in quantize_index: - quantize_tmp = self._output_program.current_block().create_var( - name="quantize_{}_tmp".format(index), - dtype=core.VarDesc.VarType.UINT8) - original_out_name = self._output_program.current_block().ops[ - index + inserted_op_length - 1].output_names[0] - original_out = self._output_program.current_block().ops[ - index + inserted_op_length - 1].output(original_out_name)[0] - - op = self._output_program.current_block()._insert_op( - index=index + inserted_op_length, - type="quantize", - inputs={"Input": original_out}, - outputs={"Output": quantize_tmp}, ) - - op._set_attr("data_format", "MKLDNNLAYOUT") - op._set_attr("use_mkldnn", 1) - op._set_attr( - "Scale", self._var_max_range[original_out] / - calc_max_func(self._var_max_value_map[original_out])) - - if self.__get_max_range_by_var_name( - self._output_program, - original_out) == Calibrator.s8_max: - op._set_attr("is_negative_input", 1) - - self.__update_int8_output_var_op_index_dict( - index + inserted_op_length, "quantize_{}_tmp".format(index)) - - inserted_op_length += 1 - for op in self._output_program.current_block().ops[ - index + inserted_op_length:]: - for j in op.input_names: - if op.input(j) and op.input( - j - )[0] == original_out and op.type in Calibrator.supported_int8_op_type: - op.desc.set_input(j, - ["{}".format(quantize_tmp.name)]) - else: - start_index = index + inserted_op_length - dequantize_tmp_var = self._output_program.current_block( - ).create_var( - name="dequantize_{}_tmp".format(index + 1), - dtype="float32", ) - original_out_var = None - - for original_input in self._output_program.current_block().ops[ - start_index].input_arg_names: - index_res = self.__get_op_index_by_output_var( - self._output_program, original_input) - if index_res != -1: - original_out_var = original_input - break - - if original_out_var: - op = self._output_program.current_block()._insert_op( - index=start_index, - type="dequantize", - inputs={"Input": original_out_var}, - outputs={"Output": dequantize_tmp_var}) - op._set_attr("data_format", "MKLDNNLAYOUT") - op._set_attr("use_mkldnn", 1) - op._set_attr("Scale", self._var_max_range[original_out_var] - / calc_max_func(self._var_max_value_map[ - original_out_var])) - - for op_index in range( - start_index + 1, - len(self._output_program.current_block().ops)): - if self._output_program.current_block( - ).ops[op_index].type == "conv2d" and self._output_program.current_block( - ).ops[op_index].attr("force_fp32_output"): - continue - else: - for j in self._output_program.current_block().ops[ - op_index].input_names: - if len(self._output_program.current_block().ops[ - op_index].input(j) - ) and self._output_program.current_block( - ).ops[op_index].input(j)[ - 0] == original_out_var: - self._output_program.current_block( - ).ops[op_index].desc.set_input( - j, - ["{}".format(dequantize_tmp_var.name)]) - - inserted_op_length += 1 - - op._set_attr("data_format", "MKLDNNLAYOUT") - op._set_attr("use_mkldnn", 1) - - def __update_output_program_attr(self): - for i in self._output_program.list_vars(): - if i.name in self._persistable_vars: - i.persistable = False - os.system("rm -rf {}/{}".format(self.pretrained_model, i.name)) - - for i in self._u8_output_var: - self._output_program.current_block().var(i).desc.set_dtype( - core.VarDesc.VarType.UINT8) - - for i in self._s8_output_var: - self._output_program.current_block().var(i).desc.set_dtype( - core.VarDesc.VarType.INT8) - - @property - def sampling_program(self): - return self._output_program - - @property - def sampling_vars(self): - return self._weights_var_name + self._conv_input_var_name + self._conv_output_var_name + self._residual_input_var_name + self._pool2d_output_var_name - - def _is_close(self, a, b, rel_tol=1e-09, abs_tol=0.0): - return abs(a - b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol) - - def __generate_output_program(self): - for i in self.program.list_vars(): - if not i.persistable and i.name in self.sampling_vars: - i.persistable = True - self._persistable_vars.append(i.name) - - self._output_program = self.program.clone() - - def __save_scale(self): - ''' - Update the convolution scale information. - ''' - func = self.__get_optimal_scaling_factor if self.algo == 'KL' else np.max - for i in self._conv_op_index[1:]: - weights_var_name = self.program.current_block().ops[i].input( - 'Filter')[0] - input_var_name = self.program.current_block().ops[i].input('Input')[ - 0] - output_var_name = self.program.current_block().ops[i].output( - 'Output')[0] - self._output_program.current_block().ops[i]._set_attr( - "Scale_weights", self._weights_scaling_factor[weights_var_name]) - - self._output_program.current_block().ops[i]._set_attr( - "Scale_in", self._var_max_range[input_var_name] / - func(self._var_max_value_map[input_var_name])) - self._output_program.current_block().ops[i]._set_attr( - "Scale_out", self._var_max_range[output_var_name] / - func(self._var_max_value_map[output_var_name])) - if self._output_program.current_block().ops[i].desc.input( - "ResidualData"): - residual_var_name = self._output_program.current_block().ops[ - i].desc.input("ResidualData")[0] - self._output_program.current_block().ops[i]._set_attr( - "Scale_in_eltwise", self._var_max_range[residual_var_name] / - func(self._var_max_value_map[residual_var_name])) - - def __sampling(self, sampling_data): - ''' - Sampling the variables data range. - ''' - for i in self.program.list_vars(): - if i.name not in self.sampling_vars: - continue - - if i.name in self._weights_var_name: - scaling_factor_per_channel = [] - data = sampling_data[i.name][0] - for j in range(data.shape[0]): - var_value = float(np.max(np.abs(data[j]))) - if not self._is_close(var_value, 0.0): - scaling_factor_per_channel.append(Calibrator.s8_max / - var_value) - else: - scaling_factor_per_channel.append(0.0) - self._weights_scaling_factor[ - i.name] = scaling_factor_per_channel - else: - if i.name in self._conv_output_var_name: - op_pos = self.__get_op_index_by_output_var(self.program, - i.name) - cur_op = self.program.current_block().ops[op_pos] - - if cur_op.has_attr('fuse_relu') and cur_op.attr( - 'fuse_relu'): - max_range = Calibrator.u8_max - self._u8_output_var.append(i.name) - else: - max_range = Calibrator.s8_max - self._s8_output_var.append(i.name) - else: - max_range = self.__get_max_range_by_var_name(self.program, - i.name) - max_value = [[np.abs(np_data)] - for np_data in sampling_data[i.name]] - - self._var_max_range[i.name] = max_range - self._var_max_value_map[i.name] = max_value - - def __check_force_fp32_attr_by_output_var(self, program, var_name): - for op in program.current_block().ops: - if op.type == "conv2d" and var_name in op.output_arg_names: - return op.attr("force_fp32_output") - return False - - def __get_op_index_by_output_var(self, program, var_name, start_index=0): - ''' - Check whether the specified input variable is the output of the - conv/pool2d op's output or not. - - Returns: - The index if the variable is the output of any conv/pool2d op's - output. - -1 when the variable is not the output of any conv/pool2d op's - output. - ''' - for index, op in enumerate(program.current_block().ops[start_index:]): - if var_name in op.output_arg_names and op.type in Calibrator.supported_int8_op_type: - return index - return -1 - - def __get_op_index_by_input_var(self, program, var_name, start_index=0): - ''' - Get the op index by specified input variable. - Returns: - The op index if the variable is the input of this op or -1 if the - variable is not the input of any op. - ''' - for index, op in enumerate(program.current_block().ops[start_index:]): - if var_name in op.input_arg_names: - return index - - return -1 - - def __get_quantize_dequantize_combination(self, program): - """ - Get the quantize/dequantize op index for further inserting. - Args: - The program desc. - Returns: - Two lists contains the quantize op and dequantize op index information. - """ - quantize_op_index = [] - dequantize_op_index = [] - minimal_conv_count = 2 # there must be two conv ops if not enable the first conv int8. - if len(self._conv_op_index) < minimal_conv_count: - return [], [] - - for index, value in enumerate(self._conv_op_index): - if index == 0: - quantize_op_index.append(self._conv_op_index[index + 1]) - elif index == len(self._conv_op_index) - 1: - output_var = program.current_block().ops[value].output( - "Output")[0] - if self.__check_op_type_with_specified_var_as_input( - program, output_var, index): - dequantize_op_index.append(self._conv_op_index[index] + 2) - else: - program.current_block().ops[value]._set_attr( - "force_fp32_output", True) - - elif self._conv_op_index[index] + 1 < self._conv_op_index[index + - 1]: - - program.current_block().ops[self._conv_op_index[ - index]]._set_attr("force_fp32_output", True) - - for op_index in range(self._conv_op_index[index + 1], - self._conv_op_index[index], -1): - op_type = program.current_block().ops[op_index].type - op_has_int8_input = False - input_var_name = None - input_length = len(program.current_block().ops[op_index] - .input_arg_names) - - for var_name in program.current_block().ops[ - op_index].input_arg_names: - if self.__check_var_source_dt(var_name) != -1: - op_has_int8_input = True - input_var_name = var_name - break - - if op_has_int8_input: - if op_type == "conv2d": - if program.current_block().ops[op_index + - 1].type == "conv2d": - continue - elif program.current_block( - ).ops[op_index + - 1].type in Calibrator.non_conv_int8_op_type: - dequantize_op_index.append(op_index + 2) - break - else: - program.current_block().ops[op_index]._set_attr( - "force_fp32_output", True) - continue - elif not self.__check_force_fp32_attr_by_output_var( - program, input_var_name - ) and op_index not in dequantize_op_index: - share_input_flag = True - for input_attr_name in program.current_block().ops[ - op_index].input_names: - input_var_name = program.current_block().ops[ - op_index].input(input_attr_name)[0] - cousin_op_index = self.__get_op_index_by_input_var( - program, input_var_name) - if cousin_op_index != -1 and cousin_op_index in dequantize_op_index: - share_input_flag = False - break - if share_input_flag: - dequantize_op_index.append(op_index) - - elif input_length: - output_is_to_int8_op = False - share_input_flag = True - for var_name in program.current_block().ops[ - op_index].input_arg_names: - if not self.__check_op_type_with_specified_var_as_input( - program, var_name): - share_input_flag = False - break - - for var_name in program.current_block().ops[ - op_index].output_arg_names: - if self.__get_op_index_by_output_var( - program, var_name, op_index) != -1: - output_is_to_int8_op = True - break - - if share_input_flag or output_is_to_int8_op: - quantize_op_index.append(op_index) - - return quantize_op_index, dequantize_op_index - - def __init_analysis(self): - ''' - Collect the variable names for sampling. - ''' - start_index = 1 #analysis the conv op detail from second conv op. - - for i in self._conv_op_index[start_index:]: - self._weights_var_name.append(self.program.current_block().ops[i] - .input('Filter')[0]) - self._conv_input_var_name.append(self.program.current_block().ops[i] - .input('Input')[0]) - self._conv_output_var_name.append(self.program.current_block().ops[ - i].output('Output')[0]) - self._int8_output_var_op_index_dict[self.program.current_block() - .ops[i].output('Output')[0]] = i - if self.program.current_block().ops[i].desc.input("ResidualData"): - self._residual_input_var_name.append(self.program.current_block( - ).ops[i].desc.input("ResidualData")[0]) - - if self.program.current_block().ops[i + 1].type == "pool2d": - self._pool2d_output_var_name.append(self.program.current_block( - ).ops[i + 1].output('Out')[0]) - - def __expand_quantized_bins(self, quantized_bins, reference_bins): - expanded_quantized_bins = [0] * len(reference_bins) - num_merged_bins = len(reference_bins) / len(quantized_bins) - j_start = 0 - j_end = num_merged_bins - for idx in xrange(len(quantized_bins)): - zero_count = reference_bins[j_start:j_end].count(0) - num_merged_bins = j_end - j_start - if zero_count == num_merged_bins: - avg_bin_ele = 0 - else: - avg_bin_ele = quantized_bins[idx] / ( - num_merged_bins - zero_count + 0.0) - for idx1 in xrange(j_start, j_end): - expanded_quantized_bins[idx1] = (0 if reference_bins[idx1] == 0 - else avg_bin_ele) - j_start += num_merged_bins - j_end += num_merged_bins - if (idx + 1) == len(quantized_bins) - 1: - j_end = len(reference_bins) - return expanded_quantized_bins - - def __safe_entropy(self, reference_distr_P, P_sum, candidate_distr_Q, - Q_sum): - ''' - Calculate the entropy. - ''' - assert len(reference_distr_P) == len(candidate_distr_Q) - tmp_sum1 = 0 - tmp_sum2 = 0 - for idx in range(len(reference_distr_P)): - p_idx = reference_distr_P[idx] - q_idx = candidate_distr_Q[idx] - if p_idx == 0: - tmp_sum1 += 0 - tmp_sum2 += 0 - else: - if q_idx == 0: - print("Fatal error!, idx = " + str(idx) + - " qindex = 0! p_idx = " + str(p_idx)) - tmp_sum1 += p_idx * (math.log(Q_sum * p_idx)) - tmp_sum2 += p_idx * (math.log(P_sum * q_idx)) - return (tmp_sum1 - tmp_sum2) / P_sum - - # Reference: http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf - def __get_optimal_scaling_factor(self, - activation_blob, - num_quantized_bins=255): - ''' - Using the KL-divergenc method to get the more precise scaling factor. - ''' - max_val = np.max(activation_blob) - min_val = np.min(activation_blob) - if min_val >= 0: - hist, hist_edeges = np.histogram( - activation_blob, bins=2048, range=(min_val, max_val)) - ending_iter = 2047 - starting_iter = int(ending_iter * 0.7) - else: - th = max(abs(max_val), abs(min_val)) - hist, hist_edeges = np.histogram( - activation_blob, bins=2048, range=(-th, th)) - starting_iter = 0 - ending_iter = 2047 - if abs(max_val) > abs(min_val): - while starting_iter < ending_iter: - if hist[starting_iter] == 0: - starting_iter += 1 - continue - else: - break - starting_iter += int((ending_iter - starting_iter) * 0.6) - else: - while ending_iter > 0: - if hist[ending_iter] == 0: - ending_iter -= 1 - continue - else: - break - starting_iter = int(0.6 * ending_iter) - bin_width = hist_edeges[1] - hist_edeges[0] - - P_sum = len(np.array(activation_blob).ravel()) - min_kl_divergence = 0 - min_kl_index = 0 - kl_inited = False - for i in range(starting_iter, ending_iter + 1): - reference_distr_P = hist[0:i].tolist() - outliers_count = sum(hist[i:2048]) - if reference_distr_P[i - 1] == 0: - continue - reference_distr_P[i - 1] += outliers_count - reference_distr_bins = reference_distr_P[:] - candidate_distr_Q = hist[0:i].tolist() - num_merged_bins = i / num_quantized_bins - candidate_distr_Q_quantized = [0] * num_quantized_bins - j_start = 0 - j_end = num_merged_bins - for idx in xrange(num_quantized_bins): - candidate_distr_Q_quantized[idx] = sum(candidate_distr_Q[ - j_start:j_end]) - j_start += num_merged_bins - j_end += num_merged_bins - if (idx + 1) == num_quantized_bins - 1: - j_end = i - candidate_distr_Q = self.__expand_quantized_bins( - candidate_distr_Q_quantized, reference_distr_bins) - Q_sum = sum(candidate_distr_Q) - kl_divergence = self.__safe_entropy(reference_distr_P, P_sum, - candidate_distr_Q, Q_sum) - if not kl_inited: - min_kl_divergence = kl_divergence - min_kl_index = i - kl_inited = True - elif kl_divergence < min_kl_divergence: - min_kl_divergence = kl_divergence - min_kl_index = i - else: - pass - if min_kl_index == 0: - while starting_iter > 0: - if hist[starting_iter] == 0: - starting_iter -= 1 - continue - else: - break - min_kl_index = starting_iter - return (min_kl_index + 0.5) * bin_width - - @staticmethod - def __dot(program, output_name="model.dot"): - ''' - Generate the graphiz dot file for debugging. - ''' - dot_graph = "" - dot_nodes = [] - dot_edges = [] - dot_graph += "digraph pm {\n" - for block in program.blocks: - ops = list(block.ops) - for index, op in enumerate(ops): - op_type = op.type - op_name = op_type + "_" + op.output_arg_names[0].replace( - ".", "_") + "___" + str(index) - for name in op.input_arg_names: - name = name.replace(".", "_") - dot_edge = name + " -> " + op_name - if dot_edge not in dot_edges: - dot_edges.append(dot_edge) - dot_node = name + " [shape=oval, style=filled, fillcolor=yellow]" - if dot_node not in dot_nodes: - dot_nodes.append(dot_node) - - for name in op.output_arg_names: - name = name.replace(".", "_") - dot_edge = op_name + " -> " + name - if dot_edge not in dot_edges: - dot_edges.append(dot_edge) - if op_type in Calibrator.supported_int8_op_type: - if op_type == "conv2d" and op.has_attr( - 'force_fp32_output') and op.attr( - "force_fp32_output"): - dot_node = op_name + " [shape=box, style=filled, color=deeppink]" - else: - dot_node = op_name + " [shape=box, style=filled, color=greenyellow]" - elif op_type in ["quantize", "dequantize"]: - dot_node = op_name + " [shape=box, style=filled, color=gold]" - else: - dot_node = op_name + " [shape=box, style=filled, fillcolor=red]" - - if dot_node not in dot_nodes: - dot_nodes.append(dot_node) - - for dot_edge in dot_edges: - dot_graph += dot_edge + "\n" - for dot_node in dot_nodes: - dot_graph += dot_node + "\n" - dot_graph += "}" - - with open(output_name, 'w') as f: - f.write(dot_graph) diff --git a/python/paddle/fluid/contrib/tests/CMakeLists.txt b/python/paddle/fluid/contrib/tests/CMakeLists.txt index b538e38ab73ea163df3ebe3c8da9356e9071b507..7431b11817894ed002dd3ceb2de661dbe5c76be8 100644 --- a/python/paddle/fluid/contrib/tests/CMakeLists.txt +++ b/python/paddle/fluid/contrib/tests/CMakeLists.txt @@ -1,15 +1,6 @@ file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") -if(APPLE OR WIN32 OR NOT WITH_MKL) - list(REMOVE_ITEM TEST_OPS test_calibration_resnet50) - list(REMOVE_ITEM TEST_OPS test_calibration_mobilenetv1) -endif() - foreach(src ${TEST_OPS}) - if(src MATCHES "test_calibration_*") - py_test(${src} SRCS ${src}.py ENVS FLAGS_use_mkldnn=true FLAGS_OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI}) - else() py_test(${src} SRCS ${src}.py) - endif() endforeach() diff --git a/python/paddle/fluid/contrib/tests/test_calibration_mobilenetv1.py b/python/paddle/fluid/contrib/tests/test_calibration_mobilenetv1.py deleted file mode 100644 index 214d6c7557f9d5194e1913610fd7f7d784c61fed..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/contrib/tests/test_calibration_mobilenetv1.py +++ /dev/null @@ -1,59 +0,0 @@ -# copyright (c) 2018 paddlepaddle authors. all rights reserved. -# -# licensed under the apache license, version 2.0 (the "license"); -# you may not use this file except in compliance with the license. -# you may obtain a copy of the license at -# -# http://www.apache.org/licenses/license-2.0 -# -# unless required by applicable law or agreed to in writing, software -# distributed under the license is distributed on an "as is" basis, -# without warranties or conditions of any kind, either express or implied. -# see the license for the specific language governing permissions and -# limitations under the license. -import unittest -import sys -from test_calibration_resnet50 import TestCalibration - - -class TestCalibrationForMobilenetv1(TestCalibration): - def download_model(self): - # mobilenetv1 fp32 data - data_urls = [ - 'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' - ] - data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] - self.model_cache_folder = self.download_data(data_urls, data_md5s, - "mobilenetv1_fp32") - self.model = "MobileNet-V1" - self.algo = "KL" - - def test_calibration(self): - self.download_model() - print("Start FP32 inference for {0} on {1} images ...".format( - self.model, self.infer_iterations * self.batch_size)) - (fp32_throughput, fp32_latency, - fp32_acc1) = self.run_program(self.model_cache_folder + "/model") - print("Start INT8 calibration for {0} on {1} images ...".format( - self.model, self.sample_iterations * self.batch_size)) - self.run_program( - self.model_cache_folder + "/model", True, algo=self.algo) - print("Start INT8 inference for {0} on {1} images ...".format( - self.model, self.infer_iterations * self.batch_size)) - (int8_throughput, int8_latency, - int8_acc1) = self.run_program(self.int8_model) - delta_value = fp32_acc1 - int8_acc1 - self.assertLess(delta_value, 0.01) - print( - "FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}". - format(self.model, self.batch_size, fp32_throughput, fp32_latency, - fp32_acc1)) - print( - "INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}". - format(self.model, self.batch_size, int8_throughput, int8_latency, - int8_acc1)) - sys.stdout.flush() - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/contrib/tests/test_calibration_resnet50.py b/python/paddle/fluid/contrib/tests/test_calibration_resnet50.py deleted file mode 100644 index a5286e5b0a6858a795bb221ad02f9d466eb7d751..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/contrib/tests/test_calibration_resnet50.py +++ /dev/null @@ -1,326 +0,0 @@ -# copyright (c) 2018 paddlepaddle authors. all rights reserved. -# -# licensed under the apache license, version 2.0 (the "license"); -# you may not use this file except in compliance with the license. -# you may obtain a copy of the license at -# -# http://www.apache.org/licenses/license-2.0 -# -# unless required by applicable law or agreed to in writing, software -# distributed under the license is distributed on an "as is" basis, -# without warranties or conditions of any kind, either express or implied. -# see the license for the specific language governing permissions and -# limitations under the license. -import unittest -import os -import numpy as np -import time -import sys -import random -import paddle -import paddle.fluid as fluid -import functools -import contextlib -from paddle.dataset.common import download -from PIL import Image, ImageEnhance -import math -import paddle.fluid.contrib.int8_inference.utility as int8_utility - -random.seed(0) -np.random.seed(0) - -DATA_DIM = 224 - -THREAD = 1 -BUF_SIZE = 102400 - -DATA_DIR = 'data/ILSVRC2012' - -img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) -img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) - - -# TODO(guomingz): Remove duplicated code from resize_short, crop_image, process_image, _reader_creator -def resize_short(img, target_size): - percent = float(target_size) / min(img.size[0], img.size[1]) - resized_width = int(round(img.size[0] * percent)) - resized_height = int(round(img.size[1] * percent)) - img = img.resize((resized_width, resized_height), Image.LANCZOS) - return img - - -def crop_image(img, target_size, center): - width, height = img.size - size = target_size - if center == True: - w_start = (width - size) / 2 - h_start = (height - size) / 2 - else: - w_start = np.random.randint(0, width - size + 1) - h_start = np.random.randint(0, height - size + 1) - w_end = w_start + size - h_end = h_start + size - img = img.crop((w_start, h_start, w_end, h_end)) - return img - - -def process_image(sample, mode, color_jitter, rotate): - img_path = sample[0] - - img = Image.open(img_path) - - img = resize_short(img, target_size=256) - img = crop_image(img, target_size=DATA_DIM, center=True) - - if img.mode != 'RGB': - img = img.convert('RGB') - - img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 - img -= img_mean - img /= img_std - - return img, sample[1] - - -def _reader_creator(file_list, - mode, - shuffle=False, - color_jitter=False, - rotate=False, - data_dir=DATA_DIR): - def reader(): - with open(file_list) as flist: - full_lines = [line.strip() for line in flist] - if shuffle: - np.random.shuffle(full_lines) - - lines = full_lines - - for line in lines: - img_path, label = line.split() - img_path = os.path.join(data_dir, img_path) - if not os.path.exists(img_path): - continue - yield img_path, int(label) - - mapper = functools.partial( - process_image, mode=mode, color_jitter=color_jitter, rotate=rotate) - - return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE) - - -def val(data_dir=DATA_DIR): - file_list = os.path.join(data_dir, 'val_list.txt') - return _reader_creator(file_list, 'val', shuffle=False, data_dir=data_dir) - - -class TestCalibration(unittest.TestCase): - def setUp(self): - self.int8_download = 'int8/download' - self.cache_folder = os.path.expanduser('~/.cache/paddle/dataset/' + - self.int8_download) - - data_urls = [] - data_md5s = [] - self.data_cache_folder = '' - if os.environ.get('DATASET') == 'full': - data_urls.append( - 'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partaa' - ) - data_md5s.append('60f6525b0e1d127f345641d75d41f0a8') - data_urls.append( - 'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partab' - ) - data_md5s.append('1e9f15f64e015e58d6f9ec3210ed18b5') - self.data_cache_folder = self.download_data(data_urls, data_md5s, - "full_data", False) - else: - data_urls.append( - 'http://paddle-inference-dist.bj.bcebos.com/int8/calibration_test_data.tar.gz' - ) - data_md5s.append('1b6c1c434172cca1bf9ba1e4d7a3157d') - self.data_cache_folder = self.download_data(data_urls, data_md5s, - "small_data", False) - - # reader/decorator.py requires the relative path to the data folder - cmd = 'rm -rf {0} && ln -s {1} {0}'.format("data", - self.data_cache_folder) - os.system(cmd) - - self.batch_size = 1 if os.environ.get('DATASET') == 'full' else 50 - self.sample_iterations = 50 if os.environ.get( - 'DATASET') == 'full' else 1 - self.infer_iterations = 50000 if os.environ.get( - 'DATASET') == 'full' else 1 - - self.timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime()) - self.int8_model = '' - - def tearDown(self): - try: - os.system("rm -rf {}".format(self.int8_model)) - except Exception as e: - print("Failed to delete {} due to {}".format(self.int8_model, - str(e))) - - def cache_unzipping(self, target_folder, zip_path): - if not os.path.exists(target_folder): - cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(target_folder, - zip_path) - os.system(cmd) - - def download_data(self, data_urls, data_md5s, folder_name, is_model=True): - data_cache_folder = os.path.join(self.cache_folder, folder_name) - zip_path = '' - if os.environ.get('DATASET') == 'full': - file_names = [] - for i in range(0, len(data_urls)): - download(data_urls[i], self.int8_download, data_md5s[i]) - file_names.append(data_urls[i].split('/')[-1]) - - zip_path = os.path.join(self.cache_folder, - 'full_imagenet_val.tar.gz') - if not os.path.exists(zip_path): - cat_command = 'cat' - for file_name in file_names: - cat_command += ' ' + os.path.join(self.cache_folder, - file_name) - cat_command += ' > ' + zip_path - os.system(cat_command) - - if os.environ.get('DATASET') != 'full' or is_model: - download(data_urls[0], self.int8_download, data_md5s[0]) - file_name = data_urls[0].split('/')[-1] - zip_path = os.path.join(self.cache_folder, file_name) - - print('Data is downloaded at {0}'.format(zip_path)) - self.cache_unzipping(data_cache_folder, zip_path) - return data_cache_folder - - def download_model(self): - pass - - def run_program(self, model_path, generate_int8=False, algo='direct'): - image_shape = [3, 224, 224] - - fluid.memory_optimize(fluid.default_main_program()) - - exe = fluid.Executor(fluid.CPUPlace()) - - [infer_program, feed_dict, - fetch_targets] = fluid.io.load_inference_model(model_path, exe) - - t = fluid.transpiler.InferenceTranspiler() - t.transpile(infer_program, fluid.CPUPlace()) - - val_reader = paddle.batch(val(), self.batch_size) - iterations = self.infer_iterations - - if generate_int8: - self.int8_model = os.path.join(os.getcwd(), - "calibration_out_" + self.timestamp) - iterations = self.sample_iterations - try: - os.system("mkdir " + self.int8_model) - except Exception as e: - print("Failed to create {} due to {}".format(self.int8_model, - str(e))) - sys.exit(-1) - - calibrator = int8_utility.Calibrator( - program=infer_program, - pretrained_model=model_path, - algo=algo, - exe=exe, - output=self.int8_model, - feed_var_names=feed_dict, - fetch_list=fetch_targets) - - test_info = [] - cnt = 0 - periods = [] - for batch_id, data in enumerate(val_reader()): - image = np.array( - [x[0].reshape(image_shape) for x in data]).astype("float32") - label = np.array([x[1] for x in data]).astype("int64") - label = label.reshape([-1, 1]) - running_program = calibrator.sampling_program.clone( - ) if generate_int8 else infer_program.clone() - - t1 = time.time() - _, acc1, _ = exe.run( - running_program, - feed={feed_dict[0]: image, - feed_dict[1]: label}, - fetch_list=fetch_targets) - t2 = time.time() - period = t2 - t1 - periods.append(period) - - if generate_int8: - calibrator.sample_data() - - test_info.append(np.mean(acc1) * len(data)) - cnt += len(data) - - if (batch_id + 1) % 100 == 0: - print("{0} images,".format(batch_id + 1)) - sys.stdout.flush() - - if (batch_id + 1) == iterations: - break - - if generate_int8: - calibrator.save_int8_model() - - print( - "Calibration is done and the corresponding files are generated at {}". - format(os.path.abspath("calibration_out"))) - else: - throughput = cnt / np.sum(periods) - latency = np.average(periods) - acc1 = np.sum(test_info) / cnt - return (throughput, latency, acc1) - - -class TestCalibrationForResnet50(TestCalibration): - def download_model(self): - # resnet50 fp32 data - data_urls = [ - 'http://paddle-inference-dist.bj.bcebos.com/int8/resnet50_int8_model.tar.gz' - ] - data_md5s = ['4a5194524823d9b76da6e738e1367881'] - self.model_cache_folder = self.download_data(data_urls, data_md5s, - "resnet50_fp32") - self.model = "ResNet-50" - self.algo = "direct" - - def test_calibration(self): - self.download_model() - print("Start FP32 inference for {0} on {1} images ...".format( - self.model, self.infer_iterations * self.batch_size)) - (fp32_throughput, fp32_latency, - fp32_acc1) = self.run_program(self.model_cache_folder + "/model") - print("Start INT8 calibration for {0} on {1} images ...".format( - self.model, self.sample_iterations * self.batch_size)) - self.run_program( - self.model_cache_folder + "/model", True, algo=self.algo) - print("Start INT8 inference for {0} on {1} images ...".format( - self.model, self.infer_iterations * self.batch_size)) - (int8_throughput, int8_latency, - int8_acc1) = self.run_program(self.int8_model) - delta_value = fp32_acc1 - int8_acc1 - self.assertLess(delta_value, 0.01) - print( - "FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}". - format(self.model, self.batch_size, fp32_throughput, fp32_latency, - fp32_acc1)) - print( - "INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}". - format(self.model, self.batch_size, int8_throughput, int8_latency, - int8_acc1)) - sys.stdout.flush() - - -if __name__ == '__main__': - unittest.main() diff --git a/python/setup.py.in b/python/setup.py.in index a392e230709168b88c38a2dfad162c1a8af60856..b4cf3b23da93f618f142377006d726f573e34571 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -110,7 +110,6 @@ packages=['paddle', 'paddle.fluid.contrib', 'paddle.fluid.contrib.decoder', 'paddle.fluid.contrib.quantize', - 'paddle.fluid.contrib.int8_inference', 'paddle.fluid.contrib.reader', 'paddle.fluid.contrib.slim', 'paddle.fluid.contrib.slim.core',