diff --git a/python/paddle/fluid/contrib/int8_inference/__init__.py b/python/paddle/fluid/contrib/int8_inference/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eca2dce114b069bf9b455d77ce670d73b5047fd2 --- /dev/null +++ b/python/paddle/fluid/contrib/int8_inference/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/paddle/fluid/contrib/int8_inference/utility.py b/python/paddle/fluid/contrib/int8_inference/utility.py new file mode 100644 index 0000000000000000000000000000000000000000..197fc5f2d261798dc17daa37c1b6d258936a8a39 --- /dev/null +++ b/python/paddle/fluid/contrib/int8_inference/utility.py @@ -0,0 +1,708 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import paddle.fluid.core as core +import numpy as np +import math +import os +import paddle.fluid as fluid + + +class Calibrator(object): + ''' + The calibrator class transforms the program and updates the calculated scale into it. + This is INT8 v1 calibration tool, mainly for the support of ResNet-50 and MobileNet. + ''' + # TODO(guomingz): Below op list will be updated once more INT8 op kernels are supported. + non_conv_int8_op_type = ("pool2d") + supported_int8_op_type = ("conv2d", "pool2d") + const_sign_op_type = ('pool2d', 'reshape', 'concat', 'transpose') + u8_max = 255 + s8_max = 127 + + def __init__(self, *args, **kwargs): + self.program = kwargs['program'] + self.iterations = kwargs['iterations'] + self.pretrained_model = kwargs['pretrained_model'] + self.debug = kwargs['debug'] + self.algo = kwargs['algo'] + + self._conv_input_var_name = [] + self._conv_output_var_name = [] + self._pool2d_output_var_name = [] + self._weights_var_name = [] + self._residual_input_var_name = [] + self._int8_output_var_op_index_dict = {} + self._conv_op_index = [ + index for index, value in enumerate(self.program.global_block().ops) + if value.type == 'conv2d' + ] + + self._var_max_value_map = {} + self._var_max_range = {} + self._weights_scaling_factor = {} + self._u8_output_var = [] + self._s8_output_var = [] + self._persistable_vars = [] + + def generate_sampling_program(self): + self.__init_analysis() + self.__generate_output_program() + + def generate_quantized_data(self, sampling_data): + self.__sampling(sampling_data) + self.__save_scale() + self.__update_program() + self.__update_output_program_attr() + self.__display_debug() + + def __display_debug(self): + if self.debug: + self.__dot(self._output_program) + print(self._output_program) + + def __get_max_range_by_var_name(self, program, var_name): + """ + Check the specified variable was generated from Relu layer or not. + If the variable was the output of one of the pool2d/reshape/concat + /transpose, we keep trace the ancestor of this variable; + If the variable was the output the conv op, we check it's has_relu + attr; + Otherwise, we return the Calibrator.s8 as default value. + Returns: + Return Calibrator.u8_max if the variable was generated by Relu, + otherwise it will returns Calibrator.s8 + """ + search_end_index = -1 + input_index_name = {} + output_index_name = {} + ops_type = [] + + for index, op in enumerate(program.current_block().ops): + ops_type.append(op.type) + + input_index_name[index] = op.input_arg_names + + output_index_name[index] = op.output_arg_names + if var_name in op.output_arg_names: + search_end_index = index + + # analysis + while search_end_index >= 0: + if ops_type[search_end_index] == "relu": + return Calibrator.u8_max + + input_name = input_index_name[search_end_index][0] + + for i in output_index_name.keys(): + if input_name in output_index_name[i]: + search_end_index = i + break + + if ops_type[ + search_end_index] not in Calibrator.const_sign_op_type and ops_type[ + search_end_index] != 'conv2d': + return Calibrator.s8_max + + if ops_type[search_end_index] != 'conv2d': + continue + + if program.current_block().ops[search_end_index].has_attr( + 'fuse_relu') and program.current_block().ops[ + search_end_index].attr('fuse_relu'): + return Calibrator.u8_max + else: + return Calibrator.s8_max + + return Calibrator.s8_max + + def __check_op_type_with_specified_var_as_input(self, + program, + var_name, + start_index=0): + ''' + Check whether all the type of ops that use the specified variable as the + input.If one of those op is not int8-enabled, return False. + ''' + op_type_list = [ + op.type for op in program.current_block().ops[start_index:] + if var_name in op.input_arg_names + ] + for i in op_type_list: + if not i in Calibrator.supported_int8_op_type: + return False + return True + + def __check_var_source_dt(self, var_name): + ''' + Check whether the specified variable is the output of int8 conv op or not. + If true, return the original op index. + If false, return -1 + ''' + return self._int8_output_var_op_index_dict[ + var_name] if var_name in self._int8_output_var_op_index_dict else -1 + + def __update_int8_output_var_op_index_dict(self, index, var_name=None): + ''' + Update the int8_output_variable/op_index dictionary + ''' + for k, v in self._int8_output_var_op_index_dict.items(): + if v >= index: + self._int8_output_var_op_index_dict[k] = v + 1 + if var_name: + self._int8_output_var_op_index_dict[var_name] = index + + def __update_program(self): + ''' + Update the program with the quantize/dequantize op insertion. + ''' + quantize_index, dequantize_index = self.__get_quantize_dequantize_combination( + self._output_program) + inserted_op_length = 0 + calc_max_func = self.__get_optimal_scaling_factor if self.algo == "KL" else np.max + insert_op_collection = sorted(quantize_index + dequantize_index) + + for index in insert_op_collection: + if index in quantize_index: + quantize_tmp = self._output_program.current_block().create_var( + name="quantize_{}_tmp".format(index), + dtype=core.VarDesc.VarType.UINT8) + original_out_name = self._output_program.current_block().ops[ + index + inserted_op_length - 1].output_names[0] + original_out = self._output_program.current_block().ops[ + index + inserted_op_length - 1].output(original_out_name)[0] + + op = self._output_program.current_block()._insert_op( + index=index + inserted_op_length, + type="quantize", + inputs={"Input": original_out}, + outputs={"Output": quantize_tmp}, ) + + op._set_attr("data_format", "MKLDNNLAYOUT") + op._set_attr("use_mkldnn", 1) + op._set_attr( + "Scale", self._var_max_range[original_out] / + calc_max_func(self._var_max_value_map[original_out])) + + if self.__get_max_range_by_var_name( + self._output_program, + original_out) == Calibrator.s8_max: + op._set_attr("is_negative_input", 1) + + self.__update_int8_output_var_op_index_dict( + index + inserted_op_length, "quantize_{}_tmp".format(index)) + + inserted_op_length += 1 + for op in self._output_program.current_block().ops[ + index + inserted_op_length:]: + for j in op.input_names: + if op.input(j) and op.input( + j + )[0] == original_out and op.type in Calibrator.supported_int8_op_type: + op.desc.set_input(j, + ["{}".format(quantize_tmp.name)]) + else: + start_index = index + inserted_op_length + dequantize_tmp_var = self._output_program.current_block( + ).create_var( + name="dequantize_{}_tmp".format(index + 1), + dtype="float32", ) + original_out_var = None + + for original_input in self._output_program.current_block().ops[ + start_index].input_arg_names: + index_res = self.__get_op_index_by_output_var( + self._output_program, original_input) + if index_res != -1: + original_out_var = original_input + break + + if original_out_var: + op = self._output_program.current_block()._insert_op( + index=start_index, + type="dequantize", + inputs={"Input": original_out_var}, + outputs={"Output": dequantize_tmp_var}) + op._set_attr("data_format", "MKLDNNLAYOUT") + op._set_attr("use_mkldnn", 1) + op._set_attr("Scale", self._var_max_range[original_out_var] + / calc_max_func(self._var_max_value_map[ + original_out_var])) + + for op_index in range( + start_index + 1, + len(self._output_program.current_block().ops)): + if self._output_program.current_block( + ).ops[op_index].type == "conv2d" and self._output_program.current_block( + ).ops[op_index].attr("force_fp32_output"): + continue + else: + for j in self._output_program.current_block().ops[ + op_index].input_names: + if len(self._output_program.current_block().ops[ + op_index].input(j) + ) and self._output_program.current_block( + ).ops[op_index].input(j)[ + 0] == original_out_var: + self._output_program.current_block( + ).ops[op_index].desc.set_input( + j, + ["{}".format(dequantize_tmp_var.name)]) + + inserted_op_length += 1 + + op._set_attr("data_format", "MKLDNNLAYOUT") + op._set_attr("use_mkldnn", 1) + + def __update_output_program_attr(self): + for i in self._output_program.list_vars(): + if i.name in self._persistable_vars: + i.persistable = False + os.system("rm -rf {}/{}".format(self.pretrained_model, i.name)) + + for i in self._u8_output_var: + self._output_program.current_block().var(i).desc.set_dtype( + core.VarDesc.VarType.UINT8) + + for i in self._s8_output_var: + self._output_program.current_block().var(i).desc.set_dtype( + core.VarDesc.VarType.INT8) + + @property + def sampling_program(self): + return self._output_program + + @property + def sampling_vars(self): + return self._weights_var_name + self._conv_input_var_name + self._conv_output_var_name + self._residual_input_var_name + self._pool2d_output_var_name + + def _is_close(self, a, b, rel_tol=1e-09, abs_tol=0.0): + return abs(a - b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol) + + def __generate_output_program(self): + for i in self.program.list_vars(): + if not i.persistable and i.name in self.sampling_vars: + i.persistable = True + self._persistable_vars.append(i.name) + + self._output_program = self.program.clone() + + def __save_scale(self): + ''' + Update the convolution scale information. + ''' + func = self.__get_optimal_scaling_factor if self.algo == 'KL' else np.max + for i in self._conv_op_index[1:]: + weights_var_name = self.program.current_block().ops[i].input( + 'Filter')[0] + input_var_name = self.program.current_block().ops[i].input('Input')[ + 0] + output_var_name = self.program.current_block().ops[i].output( + 'Output')[0] + self._output_program.current_block().ops[i]._set_attr( + "Scale_weights", self._weights_scaling_factor[weights_var_name]) + + self._output_program.current_block().ops[i]._set_attr( + "Scale_in", self._var_max_range[input_var_name] / + func(self._var_max_value_map[input_var_name])) + self._output_program.current_block().ops[i]._set_attr( + "Scale_out", self._var_max_range[output_var_name] / + func(self._var_max_value_map[output_var_name])) + if self._output_program.current_block().ops[i].desc.input( + "ResidualData"): + residual_var_name = self._output_program.current_block().ops[ + i].desc.input("ResidualData")[0] + self._output_program.current_block().ops[i]._set_attr( + "Scale_in_eltwise", self._var_max_range[residual_var_name] / + func(self._var_max_value_map[residual_var_name])) + + def __sampling(self, sampling_data): + ''' + Sampling the variables data range. + ''' + for i in self.program.list_vars(): + if i.name not in self.sampling_vars: + continue + + if i.name in self._weights_var_name: + scaling_factor_per_channel = [] + data = sampling_data[i.name][0] + for j in range(data.shape[0]): + var_value = float(np.max(np.abs(data[j]))) + if not self._is_close(var_value, 0.0): + scaling_factor_per_channel.append(Calibrator.s8_max / + var_value) + else: + scaling_factor_per_channel.append(0.0) + self._weights_scaling_factor[ + i.name] = scaling_factor_per_channel + else: + if i.name in self._conv_output_var_name: + op_pos = self.__get_op_index_by_output_var(self.program, + i.name) + cur_op = self.program.current_block().ops[op_pos] + + if cur_op.has_attr('fuse_relu') and cur_op.attr( + 'fuse_relu'): + max_range = Calibrator.u8_max + self._u8_output_var.append(i.name) + else: + max_range = Calibrator.s8_max + self._s8_output_var.append(i.name) + else: + max_range = self.__get_max_range_by_var_name(self.program, + i.name) + max_value = [[np.abs(np_data)] + for np_data in sampling_data[i.name]] + + self._var_max_range[i.name] = max_range + self._var_max_value_map[i.name] = max_value + + def __check_force_fp32_attr_by_output_var(self, program, var_name): + for op in program.current_block().ops: + if op.type == "conv2d" and var_name in op.output_arg_names: + return op.attr("force_fp32_output") + return False + + def __get_op_index_by_output_var(self, program, var_name, start_index=0): + ''' + Check whether the specified input variable is the output of the + conv/pool2d op's output or not. + + Returns: + The index if the variable is the output of any conv/pool2d op's + output. + -1 when the variable is not the output of any conv/pool2d op's + output. + ''' + for index, op in enumerate(program.current_block().ops[start_index:]): + if var_name in op.output_arg_names and op.type in Calibrator.supported_int8_op_type: + return index + return -1 + + def __get_op_index_by_input_var(self, program, var_name, start_index=0): + ''' + Get the op index by specified input variable. + Returns: + The op index if the variable is the input of this op or -1 if the + variable is not the input of any op. + ''' + for index, op in enumerate(program.current_block().ops[start_index:]): + if var_name in op.input_arg_names: + return index + + return -1 + + def __get_quantize_dequantize_combination(self, program): + """ + Get the quantize/dequantize op index for further inserting. + Args: + The program desc. + Returns: + Two lists contains the quantize op and dequantize op index information. + """ + quantize_op_index = [] + dequantize_op_index = [] + minimal_conv_count = 2 # there must be two conv ops if not enable the first conv int8. + if len(self._conv_op_index) < minimal_conv_count: + return [], [] + + for index, value in enumerate(self._conv_op_index): + if index == 0: + quantize_op_index.append(self._conv_op_index[index + 1]) + elif index == len(self._conv_op_index) - 1: + output_var = program.current_block().ops[value].output( + "Output")[0] + if self.__check_op_type_with_specified_var_as_input( + program, output_var, index): + dequantize_op_index.append(self._conv_op_index[index] + 2) + else: + program.current_block().ops[value]._set_attr( + "force_fp32_output", True) + + elif self._conv_op_index[index] + 1 < self._conv_op_index[index + + 1]: + + program.current_block().ops[self._conv_op_index[ + index]]._set_attr("force_fp32_output", True) + + for op_index in range(self._conv_op_index[index + 1], + self._conv_op_index[index], -1): + op_type = program.current_block().ops[op_index].type + op_has_int8_input = False + input_var_name = None + input_length = len(program.current_block().ops[op_index] + .input_arg_names) + + for var_name in program.current_block().ops[ + op_index].input_arg_names: + if self.__check_var_source_dt(var_name) != -1: + op_has_int8_input = True + input_var_name = var_name + break + + if op_has_int8_input: + if op_type == "conv2d": + if program.current_block().ops[op_index + + 1].type == "conv2d": + continue + elif program.current_block( + ).ops[op_index + + 1].type in Calibrator.non_conv_int8_op_type: + dequantize_op_index.append(op_index + 2) + break + else: + program.current_block().ops[op_index]._set_attr( + "force_fp32_output", True) + continue + elif not self.__check_force_fp32_attr_by_output_var( + program, input_var_name + ) and op_index not in dequantize_op_index: + share_input_flag = True + for input_attr_name in program.current_block().ops[ + op_index].input_names: + input_var_name = program.current_block().ops[ + op_index].input(input_attr_name)[0] + cousin_op_index = self.__get_op_index_by_input_var( + program, input_var_name) + if cousin_op_index != -1 and cousin_op_index in dequantize_op_index: + share_input_flag = False + break + if share_input_flag: + dequantize_op_index.append(op_index) + + elif input_length: + output_is_to_int8_op = False + share_input_flag = True + for var_name in program.current_block().ops[ + op_index].input_arg_names: + if not self.__check_op_type_with_specified_var_as_input( + program, var_name): + share_input_flag = False + break + + for var_name in program.current_block().ops[ + op_index].output_arg_names: + if self.__get_op_index_by_output_var( + program, var_name, op_index) != -1: + output_is_to_int8_op = True + break + + if share_input_flag or output_is_to_int8_op: + quantize_op_index.append(op_index) + + return quantize_op_index, dequantize_op_index + + def __init_analysis(self): + ''' + Collect the variable names for sampling. + ''' + start_index = 1 #analysis the conv op detail from second conv op. + + for i in self._conv_op_index[start_index:]: + self._weights_var_name.append(self.program.current_block().ops[i] + .input('Filter')[0]) + self._conv_input_var_name.append(self.program.current_block().ops[i] + .input('Input')[0]) + self._conv_output_var_name.append(self.program.current_block().ops[ + i].output('Output')[0]) + self._int8_output_var_op_index_dict[self.program.current_block() + .ops[i].output('Output')[0]] = i + if self.program.current_block().ops[i].desc.input("ResidualData"): + self._residual_input_var_name.append(self.program.current_block( + ).ops[i].desc.input("ResidualData")[0]) + + if self.program.current_block().ops[i + 1].type == "pool2d": + self._pool2d_output_var_name.append(self.program.current_block( + ).ops[i + 1].output('Out')[0]) + + def __expand_quantized_bins(self, quantized_bins, reference_bins): + expanded_quantized_bins = [0] * len(reference_bins) + num_merged_bins = len(reference_bins) / len(quantized_bins) + j_start = 0 + j_end = num_merged_bins + for idx in xrange(len(quantized_bins)): + zero_count = reference_bins[j_start:j_end].count(0) + num_merged_bins = j_end - j_start + if zero_count == num_merged_bins: + avg_bin_ele = 0 + else: + avg_bin_ele = quantized_bins[idx] / ( + num_merged_bins - zero_count + 0.0) + for idx1 in xrange(j_start, j_end): + expanded_quantized_bins[idx1] = (0 if reference_bins[idx1] == 0 + else avg_bin_ele) + j_start += num_merged_bins + j_end += num_merged_bins + if (idx + 1) == len(quantized_bins) - 1: + j_end = len(reference_bins) + return expanded_quantized_bins + + def __safe_entropy(self, reference_distr_P, P_sum, candidate_distr_Q, + Q_sum): + ''' + Calculate the entropy. + ''' + assert len(reference_distr_P) == len(candidate_distr_Q) + tmp_sum1 = 0 + tmp_sum2 = 0 + for idx in range(len(reference_distr_P)): + p_idx = reference_distr_P[idx] + q_idx = candidate_distr_Q[idx] + if p_idx == 0: + tmp_sum1 += 0 + tmp_sum2 += 0 + else: + if q_idx == 0: + print("Fatal error!, idx = " + str(idx) + + " qindex = 0! p_idx = " + str(p_idx)) + tmp_sum1 += p_idx * (math.log(Q_sum * p_idx)) + tmp_sum2 += p_idx * (math.log(P_sum * q_idx)) + return (tmp_sum1 - tmp_sum2) / P_sum + + # Reference: http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf + def __get_optimal_scaling_factor(self, + activation_blob, + num_quantized_bins=255): + ''' + Using the KL-divergenc method to get the more precise scaling factor. + ''' + max_val = np.max(activation_blob) + min_val = np.min(activation_blob) + if min_val >= 0: + hist, hist_edeges = np.histogram( + activation_blob, bins=2048, range=(min_val, max_val)) + ending_iter = 2047 + starting_iter = int(ending_iter * 0.7) + else: + th = max(abs(max_val), abs(min_val)) + hist, hist_edeges = np.histogram( + activation_blob, bins=2048, range=(-th, th)) + starting_iter = 0 + ending_iter = 2047 + if abs(max_val) > abs(min_val): + while starting_iter < ending_iter: + if hist[starting_iter] == 0: + starting_iter += 1 + continue + else: + break + starting_iter += int((ending_iter - starting_iter) * 0.6) + else: + while ending_iter > 0: + if hist[ending_iter] == 0: + ending_iter -= 1 + continue + else: + break + starting_iter = int(0.6 * ending_iter) + bin_width = hist_edeges[1] - hist_edeges[0] + P_sum = len(activation_blob) + min_kl_divergence = 0 + min_kl_index = 0 + kl_inited = False + for i in range(starting_iter, ending_iter + 1): + reference_distr_P = hist[0:i].tolist() + outliers_count = sum(hist[i:2048]) + if reference_distr_P[i - 1] == 0: + continue + reference_distr_P[i - 1] += outliers_count + reference_distr_bins = reference_distr_P[:] + candidate_distr_Q = hist[0:i].tolist() + num_merged_bins = i / num_quantized_bins + candidate_distr_Q_quantized = [0] * num_quantized_bins + j_start = 0 + j_end = num_merged_bins + for idx in xrange(num_quantized_bins): + candidate_distr_Q_quantized[idx] = sum(candidate_distr_Q[ + j_start:j_end]) + j_start += num_merged_bins + j_end += num_merged_bins + if (idx + 1) == num_quantized_bins - 1: + j_end = i + candidate_distr_Q = self.__expand_quantized_bins( + candidate_distr_Q_quantized, reference_distr_bins) + Q_sum = sum(candidate_distr_Q) + kl_divergence = self.__safe_entropy(reference_distr_P, P_sum, + candidate_distr_Q, Q_sum) + if not kl_inited: + min_kl_divergence = kl_divergence + min_kl_index = i + kl_inited = True + elif kl_divergence < min_kl_divergence: + min_kl_divergence = kl_divergence + min_kl_index = i + else: + pass + if min_kl_index == 0: + while starting_iter > 0: + if hist[starting_iter] == 0: + starting_iter -= 1 + continue + else: + break + min_kl_index = starting_iter + return (min_kl_index + 0.5) * bin_width + + @staticmethod + def __dot(program, output_name="model.dot"): + ''' + Generate the graphiz dot file for debugging. + ''' + dot_graph = "" + dot_nodes = [] + dot_edges = [] + dot_graph += "digraph pm {\n" + for block in program.blocks: + ops = list(block.ops) + for index, op in enumerate(ops): + op_type = op.type + op_name = op_type + "_" + op.output_arg_names[0].replace( + ".", "_") + "___" + str(index) + for name in op.input_arg_names: + name = name.replace(".", "_") + dot_edge = name + " -> " + op_name + if dot_edge not in dot_edges: + dot_edges.append(dot_edge) + dot_node = name + " [shape=oval, style=filled, fillcolor=yellow]" + if dot_node not in dot_nodes: + dot_nodes.append(dot_node) + + for name in op.output_arg_names: + name = name.replace(".", "_") + dot_edge = op_name + " -> " + name + if dot_edge not in dot_edges: + dot_edges.append(dot_edge) + if op_type in Calibrator.supported_int8_op_type: + if op_type == "conv2d" and op.has_attr( + 'force_fp32_output') and op.attr( + "force_fp32_output"): + dot_node = op_name + " [shape=box, style=filled, color=deeppink]" + else: + dot_node = op_name + " [shape=box, style=filled, color=greenyellow]" + elif op_type in ["quantize", "dequantize"]: + dot_node = op_name + " [shape=box, style=filled, color=gold]" + else: + dot_node = op_name + " [shape=box, style=filled, fillcolor=red]" + + if dot_node not in dot_nodes: + dot_nodes.append(dot_node) + + for dot_edge in dot_edges: + dot_graph += dot_edge + "\n" + for dot_node in dot_nodes: + dot_graph += dot_node + "\n" + dot_graph += "}" + + with open(output_name, 'w') as f: + f.write(dot_graph) diff --git a/python/paddle/fluid/contrib/tests/CMakeLists.txt b/python/paddle/fluid/contrib/tests/CMakeLists.txt index 79bec8c4ad34d682895250bc29b1fddb3a569bd4..81aee1233d1db756686d1a934b94672dc5c770fe 100644 --- a/python/paddle/fluid/contrib/tests/CMakeLists.txt +++ b/python/paddle/fluid/contrib/tests/CMakeLists.txt @@ -1,6 +1,10 @@ file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") +if(APPLE OR WIN32 OR NOT WITH_MKL) + list(REMOVE_ITEM TEST_OPS test_calibration) +endif() + foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) endforeach() diff --git a/python/paddle/fluid/contrib/tests/test_calibration.py b/python/paddle/fluid/contrib/tests/test_calibration.py new file mode 100644 index 0000000000000000000000000000000000000000..17e4eb8b831268bed00736db2e9706aece9fdd74 --- /dev/null +++ b/python/paddle/fluid/contrib/tests/test_calibration.py @@ -0,0 +1,230 @@ +# copyright (c) 2018 paddlepaddle authors. all rights reserved. +# +# licensed under the apache license, version 2.0 (the "license"); +# you may not use this file except in compliance with the license. +# you may obtain a copy of the license at +# +# http://www.apache.org/licenses/license-2.0 +# +# unless required by applicable law or agreed to in writing, software +# distributed under the license is distributed on an "as is" basis, +# without warranties or conditions of any kind, either express or implied. +# see the license for the specific language governing permissions and +# limitations under the license. +import unittest +import os +import numpy as np +import time +import sys +import random +import paddle +import paddle.fluid as fluid +import argparse +import functools +import contextlib +import paddle.fluid.profiler as profiler +from PIL import Image, ImageEnhance +import math +sys.path.append('..') +import int8_inference.utility as ut + +random.seed(0) +np.random.seed(0) + +DATA_DIM = 224 + +THREAD = 1 +BUF_SIZE = 102400 + +DATA_DIR = 'data/ILSVRC2012' + +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +# TODO(guomingz): Remove duplicated code from line 45 ~ line 114 +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(sample, mode, color_jitter, rotate): + img_path = sample[0] + + img = Image.open(img_path) + + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + + if img.mode != 'RGB': + img = img.convert('RGB') + + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + + return img, sample[1] + + +def _reader_creator(file_list, + mode, + shuffle=False, + color_jitter=False, + rotate=False, + data_dir=DATA_DIR): + def reader(): + with open(file_list) as flist: + full_lines = [line.strip() for line in flist] + if shuffle: + np.random.shuffle(full_lines) + + lines = full_lines + + for line in lines: + img_path, label = line.split() + img_path = os.path.join(data_dir, img_path) + if not os.path.exists(img_path): + continue + yield img_path, int(label) + + mapper = functools.partial( + process_image, mode=mode, color_jitter=color_jitter, rotate=rotate) + + return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE) + + +def val(data_dir=DATA_DIR): + file_list = os.path.join(data_dir, 'val_list.txt') + return _reader_creator(file_list, 'val', shuffle=False, data_dir=data_dir) + + +class TestCalibration(unittest.TestCase): + def setUp(self): + # TODO(guomingz): Put the download process in the cmake. + # Download and unzip test data set + imagenet_dl_url = 'http://paddle-inference-dist.bj.bcebos.com/int8/calibration_test_data.tar.gz' + zip_file_name = imagenet_dl_url.split('/')[-1] + cmd = 'rm -rf data {} && mkdir data && wget {} && tar xvf {} -C data'.format( + zip_file_name, imagenet_dl_url, zip_file_name) + os.system(cmd) + # resnet50 fp32 data + resnet50_fp32_model_url = 'http://paddle-inference-dist.bj.bcebos.com/int8/resnet50_int8_model.tar.gz' + resnet50_zip_name = resnet50_fp32_model_url.split('/')[-1] + resnet50_unzip_folder_name = 'resnet50_fp32' + cmd = 'rm -rf {} {} && mkdir {} && wget {} && tar xvf {} -C {}'.format( + resnet50_unzip_folder_name, resnet50_zip_name, + resnet50_unzip_folder_name, resnet50_fp32_model_url, + resnet50_zip_name, resnet50_unzip_folder_name) + os.system(cmd) + + self.iterations = 100 + self.skip_batch_num = 5 + + def run_program(self, model_path, generate_int8=False, algo='direct'): + image_shape = [3, 224, 224] + os.environ['FLAGS_use_mkldnn'] = 'True' + + fluid.memory_optimize(fluid.default_main_program()) + + exe = fluid.Executor(fluid.CPUPlace()) + + [infer_program, feed_dict, + fetch_targets] = fluid.io.load_inference_model(model_path, exe) + + t = fluid.transpiler.InferenceTranspiler() + t.transpile(infer_program, fluid.CPUPlace()) + + val_reader = paddle.batch(val(), batch_size=1) + + if generate_int8: + int8_model = os.path.join(os.getcwd(), "calibration_out") + + if os.path.exists(int8_model): + os.system("rm -rf " + int8_model) + os.system("mkdir " + int8_model) + + print("Start calibration ...") + + calibrator = ut.Calibrator( + program=infer_program, + pretrained_model=model_path, + iterations=100, + debug=False, + algo=algo) + + sampling_data = {} + + calibrator.generate_sampling_program() + test_info = [] + cnt = 0 + for batch_id, data in enumerate(val_reader()): + image = np.array( + [x[0].reshape(image_shape) for x in data]).astype("float32") + label = np.array([x[1] for x in data]).astype("int64") + label = label.reshape([-1, 1]) + running_program = calibrator.sampling_program.clone( + ) if generate_int8 else infer_program.clone() + for op in running_program.current_block().ops: + if op.has_attr("use_mkldnn"): + op._set_attr("use_mkldnn", True) + + _, acc1, _ = exe.run( + running_program, + feed={feed_dict[0]: image, + feed_dict[1]: label}, + fetch_list=fetch_targets) + if generate_int8: + for i in calibrator.sampling_program.list_vars(): + if i.name in calibrator.sampling_vars: + np_data = np.array(fluid.global_scope().find_var(i.name) + .get_tensor()) + if i.name not in sampling_data: + sampling_data[i.name] = [] + sampling_data[i.name].append(np_data) + + test_info.append(np.mean(acc1) * len(data)) + cnt += len(data) + + if batch_id != self.iterations - 1: + continue + + break + + if generate_int8: + calibrator.generate_quantized_data(sampling_data) + fluid.io.save_inference_model(int8_model, feed_dict, fetch_targets, + exe, calibrator.sampling_program) + print( + "Calibration is done and the corresponding files were generated at {}". + format(os.path.abspath("calibration_out"))) + else: + return np.sum(test_info) / cnt + + def test_calibration_for_resnet50(self): + fp32_acc1 = self.run_program("resnet50_fp32/model") + self.run_program("resnet50_fp32/model", True) + int8_acc1 = self.run_program("calibration_out") + delta_value = np.abs(fp32_acc1 - int8_acc1) + self.assertLess(delta_value, 0.01) + + +if __name__ == '__main__': + unittest.main()