# Copyright 2018 The MACE Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function import copy import numpy as np from enum import Enum from operator import mul from functools import reduce from py_proto import mace_pb2 from transform import base_converter from transform.base_converter import ConverterUtil from transform.base_converter import DeviceType from transform.base_converter import EltwiseType from transform.base_converter import MaceKeyword from transform.base_converter import MaceOp from transform.base_converter import PaddingMode from transform.base_converter import PoolingType from transform.base_converter import ReduceType from utils.util import mace_check HexagonSupportedOps = [ 'BatchToSpaceND_8', 'DepthToSpace_8', 'DepthwiseSupernode_8x8p32to8', 'DequantizeOUTPUT_8tof', 'INPUT', 'OUTPUT', 'QuantizedAdd_8p8to8', 'QuantizedAvgPool_8', 'QuantizedConcat_8', 'QuantizedMaxPool_8', 'QuantizedMul_8x8to8', 'QuantizedResizeBilinear_8', 'QuantizedSoftmax_8', 'QuantizedSub_8p8to8', 'QuantizeINPUT_f_to_8', 'SpaceToBatchND_8', 'SpaceToDepth_8', 'Supernode_8x8p32to8', 'Nop', ] HexagonOp = Enum('HexagonOp', [(op, op) for op in HexagonSupportedOps], type=str) class HexagonOps(object): def __init__(self): self.hexagon_ops = { MaceOp.BatchToSpaceND.name: HexagonOp.BatchToSpaceND_8.name, MaceOp.DepthToSpace.name: HexagonOp.DepthToSpace_8.name, MaceOp.Concat.name: HexagonOp.QuantizedConcat_8.name, MaceOp.Conv2D.name: HexagonOp.Supernode_8x8p32to8.name, MaceOp.DepthwiseConv2d.name: HexagonOp.DepthwiseSupernode_8x8p32to8.name, MaceOp.Dequantize.name: HexagonOp.DequantizeOUTPUT_8tof.name, MaceOp.Eltwise.name: [HexagonOp.QuantizedAdd_8p8to8.name, HexagonOp.QuantizedSub_8p8to8.name, HexagonOp.QuantizedMul_8x8to8.name], MaceOp.Identity.name: HexagonOp.Nop.name, MaceOp.Quantize.name: HexagonOp.QuantizeINPUT_f_to_8.name, MaceOp.Pooling.name: [HexagonOp.QuantizedAvgPool_8.name, HexagonOp.QuantizedMaxPool_8.name], MaceOp.Reduce.name: HexagonOp.QuantizedAvgPool_8.name, MaceOp.ResizeBilinear.name: HexagonOp.QuantizedResizeBilinear_8.name, MaceOp.SpaceToBatchND.name: HexagonOp.SpaceToBatchND_8.name, MaceOp.SpaceToDepth.name: HexagonOp.SpaceToDepth_8.name, MaceOp.Softmax.name: HexagonOp.QuantizedSoftmax_8.name, } def has_op(self, tf_op): return tf_op in self.hexagon_ops def map_nn_op(self, tf_op): if tf_op not in self.hexagon_ops: raise Exception('Could not map nn op for: ', tf_op) return self.hexagon_ops[tf_op] padding_mode = { PaddingMode.NA: 0, PaddingMode.SAME: 1, PaddingMode.VALID: 2 } def get_tensor_name_from_op(op_name, port): return op_name + ':' + str(port) def get_op_and_port_from_tensor(tensor_name): if ':' in tensor_name: op, port = tensor_name.split(':') port = int(port) else: op = tensor_name port = 0 return op, port def normalize_name(name): return name.replace(':', '_') class HexagonConverter(base_converter.ConverterInterface): def __init__(self, option, model, quantize_activation_info): self._option = option self._model = model self._hexagon_ops = HexagonOps() self._consts = {} self._quantize_activation_info = quantize_activation_info def run(self): if self._option.device == DeviceType.HTA.value: mace_check(len(self._option.input_nodes) == 1 and len(self._option.output_nodes) == 1, 'hta only support single input and output') for tensor in self._model.tensors: self._consts[tensor.name] = tensor # convert op node self.convert_ops() self.convert_input_output_node() self.add_node_id() return self._model def add_port_for_tensors(self, tensors): for i in range(len(tensors)): if ':' not in tensors[i]: node_name = tensors[i] tensors[i] += ':0' if node_name in self._quantize_activation_info: self._quantize_activation_info[tensors[i]] = \ self._quantize_activation_info[node_name] def convert_ops(self): print("Convert mace graph to hexagon.") for op in self._model.op: if not self._hexagon_ops.has_op(op.type): raise Exception('Unsupported op: ', op) self.add_port_for_tensors(op.input) self.add_port_for_tensors(op.output) if op.type == MaceOp.Conv2D.name \ or op.type == MaceOp.DepthwiseConv2d.name: channels = op.output_shape[0].dims[3] if len(op.input) < 3: print('Supernode requires biasadd, we add it.') bias_data = np.zeros(channels, dtype=int) bias_tensor = self._model.tensors.add() bias_tensor.data_type = mace_pb2.DT_INT32 bias_tensor.dims.extend([channels]) bias_tensor.int32_data.extend(bias_data) bias_tensor.minval = 0 bias_tensor.maxval = 0 bias_tensor.name = op.name + "/bias:0" bias = bias_tensor.name self._consts[bias] = bias_tensor else: bias = op.input.pop() self.add_min_max_const_node(op, op.input[0]) self.add_min_max_const_node(op, op.input[1]) strides_arg = ConverterUtil.get_arg(op, 'strides') mace_check(strides_arg is not None, "Missing strides of Conv or Depthwise Conv.") strides = self.add_shape_const_node( op, [1, strides_arg.ints[0], strides_arg.ints[1], 1], MaceKeyword.mace_strides_str) op.input.extend([strides, bias]) self.add_min_max_const_node(op, bias) self.add_min_max_const_node( op, op.output[0], True, True, False) elif op.type == MaceOp.Eltwise.name: self.add_min_max_const_node(op, op.input[0]) self.add_min_max_const_node(op, op.input[1]) element_type = \ ConverterUtil.get_arg(op, MaceKeyword.mace_element_type_str).i if element_type == EltwiseType.SUM.value \ or element_type == EltwiseType.SUB.value: self.add_min_max_const_node( op, op.output[0], True, True, False) elif op.type == MaceOp.BatchToSpaceND.name \ or op.type == MaceOp.SpaceToBatchND.name: strides_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_space_batch_block_shape_str) strides_tensor = self._model.tensors.add() strides_tensor.name = op.name + '/strides:0' strides_tensor.data_type = mace_pb2.DT_INT32 strides_tensor.dims.extend([1, 1, 1, len(strides_arg.ints)]) strides_tensor.int32_data.extend(strides_arg.ints) if op.type == MaceOp.BatchToSpaceND.name: pad_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_batch_to_space_crops_str) else: pad_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_paddings_str) pad_tensor = self._model.tensors.add() pad_tensor.name = op.name + '/pad:0' pad_tensor.data_type = mace_pb2.DT_INT32 pad_tensor.dims.extend([1, 1, len(pad_arg.ints) // 2, 2]) pad_tensor.int32_data.extend(pad_arg.ints) op.input.extend([strides_tensor.name, pad_tensor.name]) self.add_min_max_const_node(op, op.input[0]) elif op.type == MaceOp.DepthToSpace.name \ or op.type == MaceOp.SpaceToDepth.name: size_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_space_depth_block_size_str) size_tensor = self._model.tensors.add() size_tensor.name = op.name + '/block_size:0' size_tensor.data_type = mace_pb2.DT_INT32 size_tensor.dims.extend([1]) size_tensor.int32_data.extend([size_arg.i]) op.input.extend([size_tensor.name]) self.add_min_max_const_node(op, op.input[0]) elif op.type == MaceOp.Pooling.name: self.add_min_max_const_node(op, op.input[0]) window_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_kernel_str) window_tensor = self._model.tensors.add() window_tensor.name = op.name + '/window:0' window_tensor.data_type = mace_pb2.DT_INT32 window_tensor.dims.extend( [1, window_arg.ints[0], window_arg.ints[1], 1]) strides_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_strides_str) strides_tensor = self._model.tensors.add() strides_tensor.name = op.name + '/strides:0' strides_tensor.data_type = mace_pb2.DT_INT32 strides_tensor.dims.extend( [1, strides_arg.ints[0], strides_arg.ints[1], 1]) op.input.extend([window_tensor.name, strides_tensor.name]) elif op.type == MaceOp.Reduce.name: self.add_min_max_const_node(op, op.input[0]) reduce_type_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_reduce_type_str) mace_check(reduce_type_arg.i == ReduceType.MEAN.value, "Hexagon Reduce only supports Mean now.") keep_dims_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_keepdims_str) mace_check(keep_dims_arg.i == 1, "Hexagon Reduce Mean only supports keep dims now.") axis_arg = ConverterUtil.get_arg(op, MaceKeyword.mace_axis_str) mace_check(1 <= len(axis_arg.ints) <= 2, "Hexagon Reduce Mean only supports spatial now.") for i in axis_arg.ints: mace_check(1 <= i <= 2, "Hexagon Reduce Mean only supports spatial now") producer_op_name, _ = get_op_and_port_from_tensor(op.input[0]) input_dims = None for producer_op in self._model.op: if producer_op.name == producer_op_name: input_dims = producer_op.output_shape[0].dims break mace_check(input_dims is not None, "Missing input shape.") window_tensor = self._model.tensors.add() window_tensor.name = op.name + '/window:0' window_tensor.data_type = mace_pb2.DT_INT32 if len(axis_arg.ints) == 1: dim1, dim2 = (input_dims[1], 1) \ if axis_arg.ints[0] == 1 else (1, input_dims[2]) else: dim1, dim2 = input_dims[1], input_dims[2] window_tensor.dims.extend([1, dim1, dim2, 1]) strides_tensor = self._model.tensors.add() strides_tensor.name = op.name + '/strides:0' strides_tensor.data_type = mace_pb2.DT_INT32 strides_tensor.dims.extend([1, dim1, dim2, 1]) op.input.extend([window_tensor.name, strides_tensor.name]) elif op.type == MaceOp.ResizeBilinear.name: newdim_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_resize_size_str) newdim_tensor = self._model.tensors.add() newdim_tensor.name = op.name + '/newdim:0' newdim_tensor.data_type = mace_pb2.DT_INT32 newdim_tensor.dims.extend([len(newdim_arg.ints)]) newdim_tensor.int32_data.extend(newdim_arg.ints) op.input.extend([newdim_tensor.name]) self.add_min_max_const_node(op, op.input[0]) align_corners_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_align_corners_str) align_corners_tensor = self._model.tensors.add() align_corners_tensor.name = op.name + '/align_corners:0' align_corners_tensor.data_type = mace_pb2.DT_INT32 align_corners_tensor.dims.extend([1]) align_corners_tensor.int32_data.extend([align_corners_arg.i]) op.input.extend([align_corners_tensor.name]) elif op.type == MaceOp.Concat.name: inputs = copy.deepcopy(op.input) for ipt in inputs: self.add_min_max_const_node(op, ipt, True, False) for ipt in inputs: self.add_min_max_const_node(op, ipt, False, True) dim_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_axis_str) dim_tensor = self._model.tensors.add() dim_tensor.name = op.name + '/dim:0' dim_tensor.data_type = mace_pb2.DT_INT32 dim_tensor.dims.extend([1]) dim_tensor.int32_data.extend([dim_arg.i]) op.input.insert(0, dim_tensor.name) elif op.type in [MaceOp.Softmax.name, MaceOp.Dequantize.name]: self.add_min_max_const_node(op, op.input[0]) if op.type != MaceOp.Dequantize.name: min_output_shape = op.output_shape.add() min_output_shape.dims.extend([1]) max_output_shape = op.output_shape.add() max_output_shape.dims.extend([1]) op.output_type.extend( [mace_pb2.DT_UINT8, mace_pb2.DT_FLOAT, mace_pb2.DT_FLOAT]) for i in range(len(op.output_shape)): out_max_byte_size = reduce(mul, op.output_shape[i].dims) if op.output_type[i] == mace_pb2.DT_FLOAT: out_max_byte_size *= 4 op.out_max_byte_size.extend([out_max_byte_size]) op.padding = padding_mode[PaddingMode.NA] arg = ConverterUtil.get_arg(op, MaceKeyword.mace_padding_str) if arg is not None: op.padding = padding_mode[PaddingMode(arg.i)] if op.type == MaceOp.Eltwise.name: element_type = \ ConverterUtil.get_arg(op, MaceKeyword.mace_element_type_str).i if element_type == EltwiseType.SUM.value: op.type = HexagonOp.QuantizedAdd_8p8to8.name elif element_type == EltwiseType.SUB.value: op.type = HexagonOp.QuantizedSub_8p8to8.name elif element_type == EltwiseType.PROD.value: op.type = HexagonOp.QuantizedMul_8x8to8.name else: mace_check(False, "Hexagon does not support elementwise %s" % EltwiseType(element_type).name) elif op.type == MaceOp.Pooling.name: pooling_type_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_pooling_type_str) if PoolingType(pooling_type_arg.i) == PoolingType.AVG: op.type = HexagonOp.QuantizedAvgPool_8.name else: op.type = HexagonOp.QuantizedMaxPool_8.name else: op.type = self._hexagon_ops.map_nn_op(op.type) def add_const_node(self, name, val): if name not in self._consts: tensor = self._model.tensors.add() self._consts[name] = tensor tensor.name = name tensor.data_type = mace_pb2.DT_FLOAT tensor.dims.extend([1]) tensor.float_data.extend([val]) def add_min_max_const_node( self, this_op, tensor_name, add_min=True, add_max=True, diff_port=True): op, port = get_op_and_port_from_tensor(tensor_name) mace_check(port == 0, 'port should be 0 to add min max tensor then.') if tensor_name in self._quantize_activation_info: quantize_info = self._quantize_activation_info[tensor_name] minval = quantize_info.minval maxval = quantize_info.maxval is_activation = True elif tensor_name in self._consts: tensor = self._consts[tensor_name] minval = tensor.minval maxval = tensor.maxval is_activation = False else: raise Exception('Quantize info not found: ', tensor_name) if add_min: if is_activation and diff_port: min_tensor_name = op + ':1' else: min_tensor_name = op + '_min:0' self.add_const_node(min_tensor_name, minval) this_op.input.extend([min_tensor_name]) if add_max: if is_activation and diff_port: max_tensor_name = op + ':2' else: max_tensor_name = op + '_max:0' self.add_const_node(max_tensor_name, maxval) this_op.input.extend([max_tensor_name]) def add_shape_const_node(self, op, values, name): tensor = self._model.tensors.add() node_name = op.name + '/' + name tensor.name = node_name + ':0' tensor.data_type = mace_pb2.DT_INT32 tensor.dims.extend(values) return tensor.name def add_constant_min_max_for_first_op(self, op): minval = self._quantize_activation_info[op.input[0]].minval maxval = self._quantize_activation_info[op.input[0]].maxval input_op, _ = get_op_and_port_from_tensor(op.input[0]) input_min = input_op + '_min:0' input_max = input_op + '_max:0' self.add_const_node(input_min, minval) self.add_const_node(input_max, maxval) for i in range(len(op.input)): if op.input[i] == input_op + ':1': op.input[i] = input_min elif op.input[i] == input_op + ':2': op.input[i] = input_max def convert_input_output_node(self): quantize_input_op = self._model.op[0] mace_check( quantize_input_op.type == HexagonOp.QuantizeINPUT_f_to_8.name, "Not started with Quantize op.") del quantize_input_op.input[:] dequantize_output_op = self._model.op[-1] mace_check(dequantize_output_op.type == HexagonOp.DequantizeOUTPUT_8tof.name, "Not ended with Dequantize op.") dequantize_input = [input for input in dequantize_output_op.input] del dequantize_output_op.input[:] del dequantize_output_op.output_shape[:] del dequantize_output_op.output_type[:] del dequantize_output_op.out_max_byte_size[:] index = 1 while index < len(self._model.op) - 1: op = self._model.op[index] if op.type == HexagonOp.QuantizeINPUT_f_to_8.name: quantize_input_op.output.extend(op.output) quantize_input_op.output_shape.extend(op.output_shape) quantize_input_op.output_type.extend(op.output_type) quantize_input_op.out_max_byte_size.extend( op.out_max_byte_size) del self._model.op[index] elif op.type == HexagonOp.DequantizeOUTPUT_8tof.name: dequantize_output_op.input.extend(op.input) del self._model.op[index] index += 1 # input order matters dequantize_output_op.input.extend(dequantize_input) if self._option.device == DeviceType.HTA.value: # replace QuantizeINPUT_f_to_8 with INPUT quantize_input_op.type = HexagonOp.INPUT.name del quantize_input_op.output_shape[1:] del quantize_input_op.output_type[1:] del quantize_input_op.out_max_byte_size[1:] # replace first op's input min max with constant self.add_constant_min_max_for_first_op(self._model.op[1]) # replace DequantizeOUTPUT_8tof with OUTPUT dequantize_output_op.type = HexagonOp.OUTPUT.name del dequantize_output_op.input[1:] def add_node_id(self): node_id_counter = 0 node_id_map = {} for tensor in self._model.tensors: tensor.node_id = node_id_counter node_id_counter += 1 node_id_map[tensor.name] = tensor.node_id print("Hexagon op:") index = 0 for op in self._model.op: op.node_id = node_id_counter node_id_counter += 1 for output in op.output: node_id_map[output] = op.node_id if op.type not in [HexagonOp.QuantizeINPUT_f_to_8, HexagonOp.DequantizeOUTPUT_8tof.name]: index_str = str(index) index += 1 else: index_str = '' print('Op: %s (%s, node_id:%d, index:%s)' % (op.name, op.type, op.node_id, index_str)) for ipt in op.input: op_name, port = get_op_and_port_from_tensor(ipt) tensor_name = ipt if port == 0 else op_name + ':0' node_id = node_id_map[tensor_name] node_input = op.node_input.add() node_input.node_id = node_id node_input.output_port = int(port)