diff --git a/calibration.py b/calibration.py deleted file mode 100644 index d08914172d5a2b3f1f9e3d313aef7c29e6bd782f..0000000000000000000000000000000000000000 --- a/calibration.py +++ /dev/null @@ -1,394 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -# from __future__ import print_function -import os -import numpy as np -import time -import sys -import paddle -import paddle.fluid as fluid -import models -import reader -import argparse -import functools -from models.learning_rate import cosine_decay -from utility import add_arguments, print_arguments -import math -import paddle.fluid.core as core - -parser = argparse.ArgumentParser(description=__doc__) -add_arg = functools.partial(add_arguments, argparser=parser) -# yapf: disable -add_arg('batch_size', int, 32, "Minibatch size.") -add_arg('use_gpu', bool, True, "Whether to use GPU or not.") -add_arg('class_dim', int, 1000, "Class number.") -add_arg('image_shape', str, "3,224,224", "Input image size") -add_arg('with_mem_opt', bool, True, "Whether to use memory optimization or not.") -add_arg('pretrained_model', str, None, "Whether to use pretrained model.") -add_arg('model', str, "SE_ResNeXt50_32x4d", "Set the network to use.") -# yapf: enable - -model_list = [m for m in dir(models) if "__" not in m] - -DEBUG = 1 - -def dot(program): - dot_graph = "" - dot_nodes = [] - dot_edges = [] - dot_graph += "digraph pm {\n" - for block in program.blocks: - ops = list(block.ops) - block_id = block.idx - for op in ops: - op_type = op.type - op_name = op_type + "_" + op.input_arg_names[0].replace(".", "_") - for name in op.input_arg_names: - name = name.replace(".", "_") - dot_edge = name + " -> " + op_name - if dot_edge not in dot_edges: - dot_edges.append(dot_edge) - dot_node = name + " [shape=oval]" - if dot_node not in dot_nodes: - dot_nodes.append(dot_node) - - for name in op.output_arg_names: - name = name.replace(".", "_") - dot_edge = op_name + " -> " + name - if dot_edge not in dot_edges: - dot_edges.append(dot_edge) - - dot_node = op_name + " [shape=box]" - if dot_node not in dot_nodes: - dot_nodes.append(dot_node) - - for dot_edge in dot_edges: - dot_graph += dot_edge + "\n" - for dot_node in dot_nodes: - dot_graph += dot_node + "\n" - dot_graph += "}" - - file = open("model.dot", 'w') - file.write(dot_graph) - file.close() - -def get_quantization_op_pos(program): - conv_op_index = [index for index, value in enumerate(program.global_block().ops) if value.type == 'conv2d'] - if len(conv_op_index) < 2: - return None - return [conv_op_index[1]] - -def get_dequantization_op_pos(program): - conv_op_index = [index for index, value in enumerate(program.global_block().ops) if value.type == 'conv2d'] - if len(conv_op_index) < 2: - return None - res = [] - support_int8_op_type = ["pool2d"] - - for index, value in enumerate(conv_op_index[:-1]): - if index == 0: continue - - if value + 1 == conv_op_index[index + 1]: - continue - else: - start_index = index + 1 - end_index = conv_op_index[index + 1] - while start_index < end_index: - if program.global_block().ops[start_index].type not in support_int8_op_type: - print program.global_block().ops[start_index].type, end_index - res.append(start_index) - break - else: - start_index += 1 - last_dequantize_op_index = conv_op_index[-1] - # skip pooling op which is the Successor of the last conv op - while program.global_block().ops[last_dequantize_op_index + 1].type in support_int8_op_type: - last_dequantize_op_index += 1 - res.append(last_dequantize_op_index) # need to fix - - return res - - -def get_requantization_op_pos(program): - pass - -# def create_op(program, op_name, data_type): -def update_program_for_saving_var(program, name, value, data_shape, dst, data_type="float32"): - tmp_var = program.current_block().create_var( - name=name, - dtype=data_type, - persistable=True, - ) - - program.current_block().append_op( - type='assign_value', - outputs={'Out': [tmp_var]}, - attrs={ - 'dtype':core.VarDesc.VarType.FP32, - 'shape': data_shape, - 'fp32_values': value - } - ) - - program.current_block().append_op( - type = 'save', - inputs={'X': '{}'.format(name)}, - outputs={}, - attrs={"file_path": "{}/{}".format(dst, name)} - ) - - -def eval(args): - # parameters from arguments - class_dim = args.class_dim - model_name = args.model - pretrained_model = args.pretrained_model - with_memory_optimization = args.with_mem_opt - image_shape = [int(m) for m in args.image_shape.split(",")] - - assert model_name in model_list, "{} is not in lists: {}".format(args.model, - model_list) - - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') - label = fluid.layers.data(name='label', shape=[1], dtype='int64') - - # model definition - model = models.__dict__[model_name]() - - if model_name is "GoogleNet": - out0, out1, out2 = model.net(input=image, class_dim=class_dim) - cost0 = fluid.layers.cross_entropy(input=out0, label=label) - cost1 = fluid.layers.cross_entropy(input=out1, label=label) - cost2 = fluid.layers.cross_entropy(input=out2, label=label) - avg_cost0 = fluid.layers.mean(x=cost0) - avg_cost1 = fluid.layers.mean(x=cost1) - avg_cost2 = fluid.layers.mean(x=cost2) - - avg_cost = avg_cost0 + 0.3 * avg_cost1 + 0.3 * avg_cost2 - acc_top1 = fluid.layers.accuracy(input=out0, label=label, k=1) - acc_top5 = fluid.layers.accuracy(input=out0, label=label, k=5) - else: - out = model.net(input=image, class_dim=class_dim) - cost = fluid.layers.cross_entropy(input=out, label=label) - - avg_cost = fluid.layers.mean(x=cost) - acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) - acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) - - test_program = fluid.default_main_program().clone(for_test=True) - - if with_memory_optimization: - fluid.memory_optimize(fluid.default_main_program()) - - place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - if pretrained_model: - def if_exist(var): - return os.path.exists(os.path.join(pretrained_model, var.name)) - - fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) - - t = fluid.transpiler.InferenceTranspiler() - t.transpile(test_program, fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()) - - conv_op_index = [index for index, value in enumerate(test_program.global_block().ops) if value.type == 'conv2d'] - weights_var_name = [] - conv_input_var_name = [] - conv_output_var_name = [] - - for i in conv_op_index[1:]: - weights_var_name.append(test_program.current_block().ops[i].input('Filter')[0]) - conv_input_var_name.append(test_program.current_block().ops[i].input('Input')[0]) - conv_output_var_name.append(test_program.current_block().ops[i].output('Output')[0]) - - not_persistable_vars = (i for i in test_program.list_vars() if not i.persistable) - back_program = test_program.clone() - for i in not_persistable_vars: - i.persistable= True - - var_name = [i.name for i in test_program.list_vars()] - - val_reader = paddle.batch(reader.val(), batch_size=args.batch_size) - feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) - - fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name] - - test_info = [[], [], []] - cnt = 0 - var_max = {} - for batch_id, data in enumerate(val_reader()): - t1 = time.time() - loss, acc1, acc5 = exe.run(test_program, - fetch_list=fetch_list, - feed=feeder.feed(data)) - for i in var_name: - # print (np.array(fluid.global_scope().find_var(i).get_tensor()).shape) - np_data = np.array(fluid.global_scope().find_var(i).get_tensor()) - - if i in weights_var_name: - max_value = [float(np.amax(np_data[j])) for j in range(np_data.shape[0])] - else: - max_value = [float(np.amax(np_data))] - var_max[i] = [] - var_max[i].append(max_value) - - t2 = time.time() - period = t2 - t1 - loss = np.mean(loss) - acc1 = np.mean(acc1) - acc5 = np.mean(acc5) - test_info[0].append(loss * len(data)) - test_info[1].append(acc1 * len(data)) - test_info[2].append(acc5 * len(data)) - cnt += len(data) - if batch_id % 10 == 0: - print("Testbatch {0},loss {1}, " - "acc1 {2},acc5 {3},time {4}".format(batch_id, \ - loss, acc1, acc5, \ - "%2.2f sec" % period)) - sys.stdout.flush() - - break - - test_loss = np.sum(test_info[0]) / cnt - test_acc1 = np.sum(test_info[1]) / cnt - test_acc5 = np.sum(test_info[2]) / cnt - - print("Test_loss {0}, test_acc1 {1}, test_acc5 {2}".format( - test_loss, test_acc1, test_acc5)) - sys.stdout.flush() - - infer_prog = test_program.clone() - - for i in conv_input_var_name: - update_program_for_saving_var(infer_prog, i+"_scale.input.test", var_max[i][0], np.array(var_max[i]).shape, pretrained_model) - - for i in conv_output_var_name: - update_program_for_saving_var(infer_prog, i+"_scale.output.test", var_max[i][0], np.array(var_max[i]).shape, pretrained_model) - - for i in weights_var_name: - update_program_for_saving_var(infer_prog, i+"_scale.weights.test", var_max[i][0], np.array(var_max[i]).shape, pretrained_model) - # update_program_for_saving_var(infer_prog, 'conv2_int8_tmp', var_max[var_name[1]][0], [1,], pretrained_model) - - #Step 2 save all variable - for batch_id, data in enumerate(val_reader()): - loss, acc1, acc5 = exe.run(infer_prog, - fetch_list=fetch_list, - feed=feeder.feed(data)) - break - - int8_prog = back_program.clone() - - # for index, value in enumerate(conv_op_index[1:]): - # # print index,conv_input_var_name[index], ["{}_scale.input.test".format(conv_input_var_name[index])] - # int8_prog.current_block().ops[value].desc.set_input("Scale_in", ["{}_scale.input.test".format(conv_input_var_name[index])]) - # int8_prog.current_block().ops[value].desc.set_input("Scale_out", ["{}_scale.output.test".format(conv_output_var_name[index])]) - # int8_prog.current_block().ops[value].desc.set_input("Scale_weights", ["{}_scale.weights.test".format(weights_var_name[index])]) - # if int8_prog.current_block().ops[value].desc.input("ResidualData"): - # name = int8_prog.current_block().ops[value].desc.input("ResidualData")[0] - # int8_prog.current_block().ops[value].desc.set_input("Scale_in_eltwise", ["{}_scale.output.test".format(name)]) - - - quantize_pos = get_quantization_op_pos(int8_prog) - - conv2_quantize_tmp = int8_prog.current_block().create_var( - name="conv2_quantize_tmp", - dtype=core.VarDesc.VarType.UINT8, - # persistable=True, - # lod_level= 0, - # shape= shape - ) - - op = int8_prog.current_block()._insert_op( - index=quantize_pos[0] , - - type="quantize", - - inputs={"Input": int8_prog.current_block().ops[quantize_pos[0] - 1].output('Out')[0], - "Scale": "{}_scale.input.test".format(conv_input_var_name[1])}, - - outputs={"Output": conv2_quantize_tmp}, - - ) - op._set_attr("data_format", "NCHW") - op._set_attr("use_mkldnn", 1) - - # int8_prog.current_block().ops[quantize_pos[0] + 1 ].desc.set_input("Input", ["conv2_quantize_tmp"]) - # for i in int8_prog.current_block().ops[quantize_pos[0] + 2:]: - # if i.type == 'conv2d' and i.input('Input')[0] == int8_prog.current_block().ops[quantize_pos[0] + 1].output('Out')[0]: - # i.desc.set_input("Input", ["conv2_quantize_tmp"]) - - # dequantize_pos = get_dequantization_op_pos(int8_prog) - # dequantize_tmp_var = int8_prog.current_block().create_var( - # name="dequantize_tmp_var", - # dtype="float32", - # persistable=True, - # #shape= (np.array(fluid.global_scope().find_var('pool2d_0.tmp_0').get_tensor())).shape - # ) - - # op = int8_prog.current_block()._insert_op( - # index=dequantize_pos[0] + 1, - - # type= "dequantize", - - # inputs={"Input": int8_prog.current_block().ops[dequantize_pos[0]].output('Out')[0], - # "Scale": "{}_scale.output.test".format( int8_prog.current_block().ops[dequantize_pos[0]].output('Out')[0])}, - - # outputs={"Output": dequantize_tmp_var}, - # ) - - # int8_prog.current_block().ops[dequantize_pos[0] + 2].desc.set_input("X", ["dequantize_tmp_var"]) - - #Step 3 Save the new model - # print int8_prog - # for i in int8_prog.current_block().ops: - # print '********' - # print i - # if i.type == 'conv2d': - # print i - # # print i.input_names; - # print '----' - # print i.type - # for j in i.input_names: - # print j, i.input(j)[0] if i.input(j) else ' ' - # for k in i.output_names: - # print k, i.output(k)[0] - # print conv_op_index - # print dequantize_pos - # sys.exit(0) - # if DEBUG: - # dot(int8_prog) - # for i in int8_prog.current_block().ops: - # print i - print int8_prog - for batch_id, data in enumerate(val_reader()): - loss, acc1, acc5 = exe.run(int8_prog, - fetch_list=fetch_list, - feed=feeder.feed(data)) - loss = np.mean(loss) - acc1 = np.mean(acc1) - acc5 = np.mean(acc5) - test_info[0].append(loss * len(data)) - test_info[1].append(acc1 * len(data)) - test_info[2].append(acc5 * len(data)) - cnt += len(data) - if batch_id % 10 == 0: - print("Testbatch {0},loss {1}, " - "acc1 {2},acc5 {3}".format(batch_id, \ - loss, acc1, acc5)) - sys.stdout.flush() - break - with open("__model_quantized__", "wb") as f: - f.write(int8_prog.desc.serialize_to_string()) - - -def main(): - args = parser.parse_args() - print_arguments(args) - eval(args) - - -if __name__ == '__main__': - main()