From 94132c3ee8e119331ab3c67016086fcbb74e4f0e Mon Sep 17 00:00:00 2001 From: liuqi Date: Mon, 25 Jun 2018 19:24:09 +0800 Subject: [PATCH] Fix converter bug and add winograd parameters selection. --- mace/python/tools/converter.py | 32 +++--- .../tools/converter_tool/base_converter.py | 15 +-- .../tools/converter_tool/transformer.py | 98 +++++++++++-------- tools/converter.py | 36 ++++--- tools/sh_commands.py | 13 +-- 5 files changed, 109 insertions(+), 85 deletions(-) diff --git a/mace/python/tools/converter.py b/mace/python/tools/converter.py index 55ca3c6f..d39a8da5 100644 --- a/mace/python/tools/converter.py +++ b/mace/python/tools/converter.py @@ -112,7 +112,7 @@ def main(unused_args): option = cvt.ConverterOption(FLAGS.transformers.split(',')) else: option = cvt.ConverterOption() - option.winograd_enabled = bool(FLAGS.winograd) + option.winograd = FLAGS.winograd input_node_names = FLAGS.input_node.split(',') input_node_shapes = FLAGS.input_shape.split(':') @@ -146,6 +146,17 @@ def main(unused_args): print("Transform model to one that can better run on device") if FLAGS.runtime == 'cpu+gpu': cpu_graph_def = copy.deepcopy(output_graph_def) + + option.device = cvt.DeviceType.GPU.value + option.data_type = parse_data_type( + FLAGS.data_type, cvt.DeviceType.GPU.value) + mace_gpu_transformer = transformer.Transformer( + option, output_graph_def) + output_graph_def = mace_gpu_transformer.run() + print "start optimize gpu memory." + memory_optimizer.optimize_gpu_memory(output_graph_def) + print "GPU memory optimization done." + option.device = cvt.DeviceType.CPU.value option.data_type = parse_data_type( FLAGS.data_type, cvt.DeviceType.CPU.value) @@ -157,17 +168,6 @@ def main(unused_args): memory_optimizer.optimize_cpu_memory(cpu_graph_def) print "CPU memory optimization done." - option.device = cvt.DeviceType.GPU.value - option.data_type = parse_data_type( - FLAGS.data_type, cvt.DeviceType.GPU.value) - option.enable_transpose_filters() - mace_gpu_transformer = transformer.Transformer( - option, output_graph_def) - output_gpu_graph_def = mace_gpu_transformer.run() - print "start optimize gpu memory." - memory_optimizer.optimize_gpu_memory(output_gpu_graph_def) - print "GPU memory optimization done." - print "Merge cpu and gpu ops together" output_graph_def.op.extend(cpu_graph_def.op) output_graph_def.mem_arena.mem_block.extend( @@ -261,11 +261,9 @@ def parse_args(): help="model tag for generated function and namespace") parser.add_argument( "--winograd", - type=str2bool, - nargs='?', - const=False, - default=False, - help="open winograd convolution or not") + type=int, + default=0, + help="Which version of winograd convolution to use. [2 | 4]") parser.add_argument( "--dsp_mode", type=int, default=0, help="dsp run mode, defalut=0") parser.add_argument( diff --git a/mace/python/tools/converter_tool/base_converter.py b/mace/python/tools/converter_tool/base_converter.py index 74e6a080..5b5b041a 100644 --- a/mace/python/tools/converter_tool/base_converter.py +++ b/mace/python/tools/converter_tool/base_converter.py @@ -158,6 +158,7 @@ class MaceKeyword(object): mace_shrink_axis_mask_str = 'shrink_axis_mask' mace_transpose_a_str = 'transpose_a' mace_transpose_b_str = 'transpose_b' + mace_op_data_type_str = 'T' class TransformerRule(Enum): @@ -182,6 +183,7 @@ class TransformerRule(Enum): SORT_BY_EXECUTION = 19 ADD_IN_OUT_TENSOR_INFO = 20 ADD_MACE_INPUT_AND_OUTPUT_NODES = 21 + UPDATE_FLOAT_OP_DATA_TYPE = 22 class ConverterInterface(object): @@ -226,7 +228,7 @@ class ConverterOption(object): self._output_nodes = {} self._data_type = mace_pb2.DT_FLOAT self._device = DeviceType.CPU.value - self._winograd_enabled = False + self._winograd = 0 if transformers: self._transformer_option = [TransformerRule[transformer] for transformer in transformers] @@ -251,6 +253,7 @@ class ConverterOption(object): TransformerRule.RESHAPE_FC_WEIGHT, TransformerRule.TRANSFORM_BUFFER_IMAGE, TransformerRule.ADD_DEVICE, + TransformerRule.UPDATE_FLOAT_OP_DATA_TYPE, TransformerRule.ADD_MACE_INPUT_AND_OUTPUT_NODES, TransformerRule.SORT_BY_EXECUTION, ] @@ -272,8 +275,8 @@ class ConverterOption(object): return self._device @property - def winograd_enabled(self): - return self._winograd_enabled + def winograd(self): + return self._winograd @property def transformer_option(self): @@ -303,9 +306,9 @@ class ConverterOption(object): def device(self, device): self._device = device - @winograd_enabled.setter - def winograd_enabled(self, winograd_enabled): - self._winograd_enabled = winograd_enabled + @winograd.setter + def winograd(self, winograd): + self._winograd = winograd def disable_transpose_filters(self): if TransformerRule.TRANSPOSE_FILTERS in self._transformer_option: diff --git a/mace/python/tools/converter_tool/transformer.py b/mace/python/tools/converter_tool/transformer.py index 9fc8a346..a6d8e42c 100644 --- a/mace/python/tools/converter_tool/transformer.py +++ b/mace/python/tools/converter_tool/transformer.py @@ -31,7 +31,6 @@ from mace.python.tools.converter_tool.base_converter import TransformerRule from mace.python.tools.convert_util import mace_check OPENCL_IMAGE_MAX_SIZE = 16384 -DEFAULT_GPU_WINO_BLK_SIZE = 4 class OpenCLBufferType(enum.Enum): @@ -53,6 +52,8 @@ class Transformer(base_converter.ConverterInterface): """ def __init__(self, option, model): + # Dependencies + # (TRANSFORM_MATMUL_TO_FC, TRANSFORM_GLOBAL_CONV_TO_FC) -> RESHAPE_FC_WEIGHT # noqa self._registered_transformers = { TransformerRule.REMOVE_IDENTITY_OP: self.remove_identity_op, TransformerRule.TRANSFORM_GLOBAL_POOLING: @@ -83,6 +84,8 @@ class Transformer(base_converter.ConverterInterface): self.transform_buffer_image, TransformerRule.ADD_DEVICE: self.add_device, + TransformerRule.UPDATE_FLOAT_OP_DATA_TYPE: + self.update_float_op_data_type, TransformerRule.ADD_MACE_INPUT_AND_OUTPUT_NODES: self.add_mace_input_and_output_nodes, TransformerRule.SORT_BY_EXECUTION: self.sort_by_execution, @@ -90,7 +93,7 @@ class Transformer(base_converter.ConverterInterface): self._option = option self._model = model - self._gpu_wino_blk = DEFAULT_GPU_WINO_BLK_SIZE + self._gpu_wino_blk = self._option.winograd self._ops = {} self._consts = {} @@ -442,7 +445,7 @@ class Transformer(base_converter.ConverterInterface): return filter_height, filter_width, in_channels, out_channels def check_if_gpu_use_winograd_conv(self, op): - if not self._option.winograd_enabled: + if not self._option.winograd: return False if op.type != MaceOp.Conv2D.name: return False @@ -464,7 +467,6 @@ class Transformer(base_converter.ConverterInterface): if filter_height != 3 or filter_width != 3 or strides[0] > 1 \ or strides[1] > 1 or dilations[0] > 1 or dilations[1] > 1: return False - self._gpu_wino_blk = DEFAULT_GPU_WINO_BLK_SIZE block_size = self._gpu_wino_blk blk_sqr = (block_size + 2) * (block_size + 2) width =\ @@ -479,9 +481,9 @@ class Transformer(base_converter.ConverterInterface): width = \ batch * ((out_height + block_size - 1) / block_size) * \ ((out_width + block_size - 1) / block_size) - return (blk_sqr * in_channels <= OPENCL_IMAGE_MAX_SIZE) and \ - (blk_sqr * out_channels <= OPENCL_IMAGE_MAX_SIZE) and \ - (width <= OPENCL_IMAGE_MAX_SIZE) + return (blk_sqr * in_channels < OPENCL_IMAGE_MAX_SIZE) and \ + (blk_sqr * out_channels < OPENCL_IMAGE_MAX_SIZE) and \ + (width < OPENCL_IMAGE_MAX_SIZE) def transform_gpu_winograd(self): """Only gpu needs winograd transform.""" @@ -577,17 +579,6 @@ class Transformer(base_converter.ConverterInterface): blk_size_arg.i = block_size ConverterUtil.add_data_format_arg(iwt_op, data_format) - filter_data = np.array(filter.float_data).reshape( - filter.dims) - - weight_tensor_value = filter_data - if filter_format == FilterFormat.HWIO: - weight_tensor_value = filter_data.transpose(3, 2, 0, 1) - elif filter_format == FilterFormat.HWOI: - weight_tensor_value = filter_data.transpose(2, 3, 0, 1) - filter.float_data[:] = weight_tensor_value.flat[:] - filter.dims[:] = weight_tensor_value.shape[:] - self.safe_remove_node(op, iwt_op) return False @@ -608,12 +599,13 @@ class Transformer(base_converter.ConverterInterface): def fold_biasadd(self): net = self._model for op in net.op: - if ((op.type == MaceOp.Conv2D.name - or op.type == MaceOp.Deconv2D.name - or op.type == MaceOp.DepthwiseConv2d.name - or op.type == MaceOp.FullyConnected.name - or op.type == MaceOp.WinogradInverseTransform.name) - and len(op.input) == 2) \ + if (((op.type == MaceOp.Conv2D.name + or op.type == MaceOp.Deconv2D.name + or op.type == MaceOp.DepthwiseConv2d.name + or op.type == MaceOp.FullyConnected.name) + and len(op.input) == 2) + or (op.type == MaceOp.WinogradInverseTransform.name + and len(op.input) == 1)) \ and len(self._consumers.get(op.output[0], [])) == 1: consumer_op = self._consumers[op.output[0]][0] if consumer_op.type == MaceOp.BiasAdd.name: @@ -893,25 +885,24 @@ class Transformer(base_converter.ConverterInterface): if op.type == MaceOp.Conv2D.name \ or op.type == MaceOp.Deconv2D.name \ or op.type == MaceOp.DepthwiseConv2d.name: - if ConverterUtil.get_arg( - op, MaceKeyword.mace_winograd_filter_transformed) \ - is None: - filter = self._consts[op.input[1]] - filter_data = np.array(filter.float_data).reshape( - filter.dims) - if op.type == MaceOp.Deconv2D.name: - filter_data = filter_data.transpose(2, 3, 0, 1) - else: - filter_data = filter_data.transpose(3, 2, 0, 1) - filter.float_data[:] = filter_data.flat - filter.dims[:] = filter_data.shape - if op.type == MaceOp.FullyConnected.name: - weight = self._consts[op.input[1]] - weight_data = np.array(weight.float_data).reshape( - weight.dims) - weight_data = weight_data.transpose(1, 0) - weight.float_data[:] = weight_data.flat - weight.dims[:] = weight_data.shape + filter = self._consts[op.input[1]] + filter_data = np.array(filter.float_data).reshape( + filter.dims) + if op.type == MaceOp.Deconv2D.name: + filter_data = filter_data.transpose(2, 3, 0, 1) + else: + filter_data = filter_data.transpose(3, 2, 0, 1) + filter.float_data[:] = filter_data.flat + filter.dims[:] = filter_data.shape + if (op.type == MaceOp.MatMul.name and + ConverterUtil.get_arg(op, MaceKeyword.mace_winograd_filter_transformed) is not None): # noqa + filter = self._consts[op.input[0]] + filter_data = np.array(filter.float_data).reshape( + filter.dims) + filter_data = filter_data.transpose(3, 2, 0, 1) + filter.float_data[:] = filter_data.flat + filter.dims[:] = filter_data.shape + self.set_filter_format(FilterFormat.OIHW) return False @@ -1104,6 +1095,11 @@ class Transformer(base_converter.ConverterInterface): weight = self._consts[op.input[1]] if len(weight.dims) == 2: op.type = MaceOp.FullyConnected.name + weight_data = np.array(weight.float_data).reshape( + weight.dims) + weight_data = weight_data.transpose(1, 0) + weight.float_data[:] = weight_data.flat + weight.dims[:] = weight_data.shape return False @@ -1156,6 +1152,22 @@ class Transformer(base_converter.ConverterInterface): return False + def update_float_op_data_type(self): + print("update op with float data type") + net = self._model + for op in net.op: + data_type_arg = ConverterUtil.get_arg( + op, MaceKeyword.mace_op_data_type_str) + if not data_type_arg: + data_type_arg = op.arg.add() + data_type_arg.name = MaceKeyword.mace_op_data_type_str + data_type_arg.i = self._option.data_type + elif data_type_arg.i != self._option.data_type \ + and data_type_arg.i == mace_pb2.DT_FLOAT: + data_type_arg.i = self._option.data_type + + return False + def sort_dfs(self, op, visited, sorted_nodes): visited.update([op.name]) if len(op.input) > 0: diff --git a/tools/converter.py b/tools/converter.py index 19837129..ea212ec9 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -112,6 +112,8 @@ DSPDataTypeStrs = [ DSPDataType = Enum('DSPDataType', [(ele, ele) for ele in DSPDataTypeStrs], type=str) +WinogradParameters = [0, 2, 4] + class DefaultValues(object): omp_num_threads = -1, @@ -408,6 +410,12 @@ def format_model_config(flags): else: subgraph[YAMLKeyword.validation_inputs_data] = \ validation_inputs_data + input_ranges = subgraph.get( + YAMLKeyword.input_ranges, []) + if not isinstance(input_ranges, list): + subgraph[YAMLKeyword.input_ranges] = [input_ranges] + else: + subgraph[YAMLKeyword.input_ranges] = input_ranges for key in [YAMLKeyword.limit_opencl_kernel_time, YAMLKeyword.nnlib_graph_mode, @@ -417,6 +425,12 @@ def format_model_config(flags): if value == "": model_config[key] = 0 + mace_check(model_config[YAMLKeyword.winograd] in WinogradParameters, + ModuleName.YAML_CONFIG, + "'winograd' parameters must be in " + + str(WinogradParameters) + + ". 0 for disable winograd convolution") + weight_file_path = model_config.get(YAMLKeyword.weight_file_path, "") model_config[YAMLKeyword.weight_file_path] = weight_file_path @@ -511,7 +525,7 @@ def print_configuration(flags, configs): configs[YAMLKeyword.embed_model_data]]) data.append([YAMLKeyword.linkshared, configs[YAMLKeyword.linkshared]]) - data.append(["Tuning", flags.tuning]) + data.append(["Tuning", flags.disable_tuning]) MaceLogger.summary(StringFormatter.table(header, data, title)) @@ -736,7 +750,7 @@ def build_specific_lib(target_abi, target_soc, serial_num, subgraphs[0][YAMLKeyword.input_tensors], subgraphs[0][YAMLKeyword.input_shapes], subgraphs[0][YAMLKeyword.validation_inputs_data], - input_ranges=subgraphs[0].get(YAMLKeyword.input_ranges, None)) + input_ranges=subgraphs[0][YAMLKeyword.input_ranges]) device_type = parse_device_type(RuntimeType.gpu) sh_commands.tuning_run( @@ -869,8 +883,8 @@ def build_library(flags): convert_model(configs) - generate_library(configs, flags.tuning, - flags.enable_openmp, flags.address_sanitizer) + generate_library(configs, flags.disable_tuning, + flags.disable_openmp, flags.address_sanitizer) print_library_summary(configs) @@ -980,7 +994,7 @@ def run_specific_target(flags, configs, target_abi, subgraphs[0][YAMLKeyword.input_tensors], subgraphs[0][YAMLKeyword.input_shapes], subgraphs[0][YAMLKeyword.validation_inputs_data], - input_ranges=subgraphs[0].get(YAMLKeyword.input_ranges, None)) + input_ranges=subgraphs[0][YAMLKeyword.input_ranges]) runtime_list = [] if target_abi == ABIType.host: runtime_list.extend([RuntimeType.cpu]) @@ -1129,7 +1143,7 @@ def bm_specific_target(flags, configs, target_abi, target_soc, serial_num): subgraphs[0][YAMLKeyword.input_tensors], subgraphs[0][YAMLKeyword.input_shapes], subgraphs[0][YAMLKeyword.validation_inputs_data], - input_ranges=subgraphs[0].get(YAMLKeyword.input_ranges, None)) + input_ranges=subgraphs[0][YAMLKeyword.input_ranges]) runtime_list = [] if target_abi == ABIType.host: runtime_list.extend([RuntimeType.cpu]) @@ -1262,13 +1276,13 @@ def parse_args(): help='build model library and test tools') build.set_defaults(func=build_library) build.add_argument( - '--tuning', - action="store_true", - help="whether tuning the parameters for the GPU of specified SoC.") + '--disable_tuning', + action="store_false", + help="Disable tuning the parameters for the GPU of specified SoC.") build.add_argument( - "--enable_openmp", + "--disable_openmp", action="store_false", - help="Enable openmp for multiple thread.") + help="Disable openmp for multiple thread.") run = subparsers.add_parser( 'run', parents=[all_type_parent_parser, run_bm_parent_parser, diff --git a/tools/sh_commands.py b/tools/sh_commands.py index f96ad7de..3e231ca8 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -486,7 +486,7 @@ def gen_model_code(model_codegen_dir, input_shapes, dsp_mode, embed_model_data, - fast_conv, + winograd, obfuscate, model_build_type, data_type, @@ -512,7 +512,7 @@ def gen_model_code(model_codegen_dir, "--input_shape=%s" % input_shapes, "--dsp_mode=%s" % dsp_mode, "--embed_model_data=%s" % embed_model_data, - "--winograd=%s" % fast_conv, + "--winograd=%s" % winograd, "--obfuscate=%s" % obfuscate, "--output_dir=%s" % model_codegen_dir, "--model_build_type=%s" % model_build_type, @@ -525,8 +525,8 @@ def gen_random_input(model_output_dir, input_nodes, input_shapes, input_files, - input_file_name="model_input", - input_ranges=None): + input_ranges, + input_file_name="model_input"): for input_name in input_nodes: formatted_name = common.formatted_file_name( input_file_name, input_name) @@ -534,10 +534,7 @@ def gen_random_input(model_output_dir, sh.rm("%s/%s" % (model_output_dir, formatted_name)) input_nodes_str = ",".join(input_nodes) input_shapes_str = ":".join(input_shapes) - if input_ranges: - input_ranges_str = ":".join(input_ranges) - else: - input_ranges_str = None + input_ranges_str = ":".join(input_ranges) generate_input_data("%s/%s" % (model_output_dir, input_file_name), input_nodes_str, input_shapes_str, -- GitLab