diff --git a/mace/core/net.cc b/mace/core/net.cc index 9fad47b6c767e94e3546c49a20b07986fdd5f7df..f629e2068da0015a3744aa6a4b2ecc6c309e3739 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -40,13 +40,19 @@ SerialNet::SerialNet(const std::shared_ptr op_registry, MACE_LATENCY_LOGGER(1, "Constructing SerialNet ", net_def->name()); for (int idx = 0; idx < net_def->op_size(); ++idx) { const auto &operator_def = net_def->op(idx); - VLOG(3) << "Creating operator " << operator_def.name() << "(" - << operator_def.type() << ")"; - OperatorDef temp_def(operator_def); - std::unique_ptr op( - op_registry->CreateOperator(temp_def, ws, type, mode)); - if (op) { - operators_.emplace_back(std::move(op)); + // TODO(liuqi): refactor based on PB + const int op_device = + ArgumentHelper::GetSingleArgument( + operator_def, "device", -1); + if (op_device == type) { + VLOG(3) << "Creating operator " << operator_def.name() << "(" + << operator_def.type() << ")"; + OperatorDef temp_def(operator_def); + std::unique_ptr op( + op_registry->CreateOperator(temp_def, ws, type, mode)); + if (op) { + operators_.emplace_back(std::move(op)); + } } } } diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index ce451491c454a1213e9db7bf446ad81152877bc3..3867cfe36f3b5606f793de0043cda6fad79429f6 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -136,7 +136,11 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def, // As DSP may have different data output type for each op, // we stick to the same concept. for (auto &op : net_def.op()) { - if (!op.mem_id().empty()) { + // TODO(liuqi): refactor based on PB + const int op_device = + ArgumentHelper::GetSingleArgument( + op, "device", -1); + if (op_device == device_type && !op.mem_id().empty()) { const DataType op_dtype = static_cast( ArgumentHelper::GetSingleArgument( op, "T", static_cast(DT_FLOAT))); @@ -150,20 +154,29 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def, MACE_CHECK(dtype != DataType::DT_INVALID, "data type is invalid."); for (auto &mem_block : net_def.mem_arena().mem_block()) { if (device_type == DeviceType::GPU) { - std::unique_ptr image_buf( - new Image({mem_block.x(), mem_block.y()}, dtype)); - preallocated_allocator_.SetBuffer(mem_block.mem_id(), - std::move(image_buf)); + // TODO(liuqi): refactor based on PB + if (mem_block.mem_id() >= 20000) { + std::unique_ptr image_buf( + new Image({mem_block.x(), mem_block.y()}, dtype)); + preallocated_allocator_.SetBuffer(mem_block.mem_id(), + std::move(image_buf)); + } } else { - std::unique_ptr tensor_buf( - new Buffer(GetDeviceAllocator(device_type), mem_block.x())); - preallocated_allocator_.SetBuffer(mem_block.mem_id(), - std::move(tensor_buf)); + if (mem_block.mem_id() < 20000) { + std::unique_ptr tensor_buf( + new Buffer(GetDeviceAllocator(device_type), mem_block.x())); + preallocated_allocator_.SetBuffer(mem_block.mem_id(), + std::move(tensor_buf)); + } } } VLOG(3) << "Preallocate buffer to tensors"; for (auto &op : net_def.op()) { - if (!op.mem_id().empty()) { + // TODO(liuqi): refactor based on PB + const int op_device = + ArgumentHelper::GetSingleArgument( + op, "device", -1); + if (op_device == device_type && !op.mem_id().empty()) { auto mem_ids = op.mem_id(); int count = mem_ids.size(); for (int i = 0; i < count; ++i) { diff --git a/mace/python/tools/converter.py b/mace/python/tools/converter.py index dda674328b5734b309f5b6375787aa9fadae7814..26c6c03607a37cb36eaaef6d6a44e4108877877d 100644 --- a/mace/python/tools/converter.py +++ b/mace/python/tools/converter.py @@ -16,6 +16,7 @@ import argparse import sys import hashlib import os.path +import copy from mace.proto import mace_pb2 from mace.python.tools import tf_dsp_converter_lib @@ -25,6 +26,7 @@ from mace.python.tools.converter_tool import base_converter as cvt from mace.python.tools.converter_tool import tensorflow_converter from mace.python.tools.converter_tool import caffe_converter from mace.python.tools.converter_tool import transformer +from mace.python.tools.convert_util import mace_check # ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb \ @@ -34,11 +36,14 @@ from mace.python.tools.converter_tool import transformer FLAGS = None -data_type_map = {'DT_HALF': mace_pb2.DT_HALF, - 'DT_FLOAT': mace_pb2.DT_FLOAT} device_type_map = {'cpu': mace_pb2.CPU, 'gpu': mace_pb2.GPU, 'dsp': mace_pb2.HEXAGON} +device_data_type_map = { + mace_pb2.CPU: mace_pb2.DT_FLOAT, + mace_pb2.GPU: mace_pb2.DT_HALF, + mace_pb2.HEXAGON: mace_pb2.DT_UINT8 +} def file_checksum(fname): @@ -81,7 +86,7 @@ def main(unused_args): if FLAGS.platform not in ['tensorflow', 'caffe']: print ("platform %s is not supported." % FLAGS.platform) sys.exit(-1) - if FLAGS.runtime not in ['cpu', 'gpu', 'dsp']: + if FLAGS.runtime not in ['cpu', 'gpu', 'dsp', '']: print ("runtime %s is not supported." % FLAGS.runtime) sys.exit(-1) @@ -95,8 +100,6 @@ def main(unused_args): sys.exit(-1) else: option = cvt.ConverterOption() - option.data_type = data_type_map[FLAGS.data_type] - option.device = device_type_map[FLAGS.runtime] option.winograd_enabled = bool(FLAGS.winograd) input_node_names = FLAGS.input_node.split(',') @@ -117,8 +120,8 @@ def main(unused_args): print("Convert model to mace model.") if FLAGS.platform == 'tensorflow': - converter = tensorflow_converter.TensorflowConverter(option, - FLAGS.model_file) # noqa + converter = tensorflow_converter.TensorflowConverter( + option, FLAGS.model_file) elif FLAGS.platform == 'caffe': converter = caffe_converter.CaffeConverter(option, FLAGS.model_file, @@ -126,16 +129,49 @@ def main(unused_args): output_graph_def = converter.run() print("Transform model to one that can better run on device.") - # TODO(liuqi/liyin): transform gpu/cpu and merge their ops - mace_transformer = transformer.Transformer(option, output_graph_def) - output_graph_def = mace_transformer.run() + if not FLAGS.runtime: + cpu_graph_def = copy.deepcopy(output_graph_def) + option.device = mace_pb2.CPU + option.data_type = device_data_type_map[mace_pb2.CPU] + option.disable_transpose_filters() + mace_cpu_transformer = transformer.Transformer( + option, cpu_graph_def) + cpu_graph_def = mace_cpu_transformer.run() + print "start optimize cpu memory." + memory_optimizer.optimize_cpu_memory(cpu_graph_def) + print "CPU memory optimization done." - print "start optimize memory." - if FLAGS.runtime == 'gpu': - memory_optimizer.optimize_gpu_memory(output_graph_def) - elif FLAGS.runtime == 'cpu': - memory_optimizer.optimize_cpu_memory(output_graph_def) - print "Memory optimization done." + option.device = mace_pb2.GPU + option.data_type = device_data_type_map[mace_pb2.GPU] + option.enable_transpose_filters() + mace_gpu_transformer = transformer.Transformer( + option, output_graph_def) + output_gpu_graph_def = mace_gpu_transformer.run() + print "start optimize gpu memory." + memory_optimizer.optimize_gpu_memory(output_gpu_graph_def) + print "GPU memory optimization done." + + print "Merge cpu and gpu ops together" + output_graph_def.op.extend(cpu_graph_def.op) + output_graph_def.mem_arena.mem_block.extend( + cpu_graph_def.mem_arena.mem_block) + print "Merge done" + else: + option.device = device_type_map[FLAGS.runtime] + option.data_type = device_data_type_map[option.device] + mace_transformer = transformer.Transformer( + option, output_graph_def) + output_graph_def = mace_transformer.run() + + print "start optimize memory." + if FLAGS.runtime == 'gpu': + memory_optimizer.optimize_gpu_memory(output_graph_def) + elif FLAGS.runtime == 'cpu': + memory_optimizer.optimize_cpu_memory(output_graph_def) + else: + mace_check(False, "runtime only support [gpu|cpu|dsp]") + + print "Memory optimization done." if FLAGS.output_type == 'source': source_converter_lib.convert_to_source( @@ -188,7 +224,7 @@ def parse_args(): default="", help="File to save the output graph to.") parser.add_argument( - "--runtime", type=str, default="cpu", help="Runtime: cpu/gpu/dsp") + "--runtime", type=str, default="", help="Runtime: cpu/gpu/dsp") parser.add_argument( "--input_node", type=str, @@ -196,11 +232,6 @@ def parse_args(): help="e.g., input_node") parser.add_argument( "--output_node", type=str, default="softmax", help="e.g., softmax") - parser.add_argument( - "--data_type", - type=str, - default='DT_FLOAT', - help="e.g., DT_HALF/DT_FLOAT") parser.add_argument( "--output_type", type=str, default="pb", help="output type: source/pb") parser.add_argument( diff --git a/mace/python/tools/converter_tool/base_converter.py b/mace/python/tools/converter_tool/base_converter.py index 7e1039b906264912c1f0ea8f103a6c8c82e16b6a..e6ba45f451359b508037189a20a49db64a76721d 100644 --- a/mace/python/tools/converter_tool/base_converter.py +++ b/mace/python/tools/converter_tool/base_converter.py @@ -1,3 +1,18 @@ +# Copyright 2018 Xiaomi, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + from enum import Enum from mace.proto import mace_pb2 @@ -117,6 +132,27 @@ class MaceKeyword(object): mace_axis_str = 'axis' mace_shape_str = 'shape' mace_winograd_filter_transformed = 'is_filter_transformed' + mace_device = 'device' + + +class TransformerRule(Enum): + REMOVE_IDENTITY_OP = 0 + TRANSFORM_GLOBAL_POOLING = 1 + FOLD_SOFTMAX = 2 + FOLD_BATCHNORM = 3, + FOLD_CONV_AND_BN = 4, + FOLD_DEPTHWISE_CONV_AND_BN = 5, + TRANSFORM_GPU_WINOGRAD = 6, + TRANSFORM_ADD_TO_BIASADD = 7, + FOLD_BIASADD = 8, + FOLD_ACTIVATION = 9, + TRANSPOSE_FILTERS = 10, + RESHAPE_FC_WEIGHT = 11, + TRANSPOSE_DATA_FORMAT = 12, + TRANSFORM_GLOBAL_CONV_TO_FC = 13, + TRANSFORM_BUFFER_IMAGE = 14, + ADD_DEVICE_AND_DATA_TYPE = 15, + SORT_BY_EXECUTION = 16 class ConverterInterface(object): @@ -162,6 +198,25 @@ class ConverterOption(object): self._data_type = mace_pb2.DT_FLOAT self._device = mace_pb2.CPU self._winograd_enabled = False + self._transformer_option = [ + TransformerRule.REMOVE_IDENTITY_OP, + TransformerRule.TRANSFORM_GLOBAL_POOLING, + TransformerRule.FOLD_SOFTMAX, + TransformerRule.FOLD_BATCHNORM, + TransformerRule.FOLD_CONV_AND_BN, + TransformerRule.FOLD_DEPTHWISE_CONV_AND_BN, + TransformerRule.TRANSFORM_GPU_WINOGRAD, + TransformerRule.TRANSFORM_ADD_TO_BIASADD, + TransformerRule.FOLD_BIASADD, + TransformerRule.FOLD_ACTIVATION, + TransformerRule.TRANSPOSE_FILTERS, + TransformerRule.RESHAPE_FC_WEIGHT, + TransformerRule.TRANSPOSE_DATA_FORMAT, + TransformerRule.TRANSFORM_GLOBAL_CONV_TO_FC, + TransformerRule.TRANSFORM_BUFFER_IMAGE, + TransformerRule.ADD_DEVICE_AND_DATA_TYPE, + TransformerRule.SORT_BY_EXECUTION, + ] @property def input_nodes(self): @@ -183,6 +238,10 @@ class ConverterOption(object): def winograd_enabled(self): return self._winograd_enabled + @property + def transformer_option(self): + return self._transformer_option + @input_nodes.setter def input_nodes(self, input_nodes): for node in input_nodes: @@ -211,6 +270,14 @@ class ConverterOption(object): def winograd_enabled(self, winograd_enabled): self._winograd_enabled = winograd_enabled + def disable_transpose_filters(self): + if TransformerRule.TRANSPOSE_FILTERS in self._transformer_option: + self._transformer_option.remove(TransformerRule.TRANSPOSE_FILTERS) + + def enable_transpose_filters(self): + if TransformerRule.TRANSPOSE_FILTERS not in self._transformer_option: + self._transformer_option.append(TransformerRule.TRANSPOSE_FILTERS) + class ConverterUtil(object): @staticmethod diff --git a/mace/python/tools/converter_tool/caffe_converter.py b/mace/python/tools/converter_tool/caffe_converter.py index 5c1e3421c7ea803c31de706221b96c3f8158a801..6af877648e150c1fa1d968774511e6a6e1c81091 100644 --- a/mace/python/tools/converter_tool/caffe_converter.py +++ b/mace/python/tools/converter_tool/caffe_converter.py @@ -1,3 +1,18 @@ +# Copyright 2018 Xiaomi, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import math import numpy as np import google.protobuf.text_format @@ -325,10 +340,6 @@ class CaffeConverter(base_converter.ConverterInterface): op.input.extend(caffe_op.layer.bottom) op.output.extend(caffe_op.layer.top) - data_type_arg = op.arg.add() - data_type_arg.name = 'T' - data_type_arg.i = self._option.data_type - ConverterUtil.add_data_format_arg(op, DataFormat.NCHW) return op diff --git a/mace/python/tools/converter_tool/shape_inference.py b/mace/python/tools/converter_tool/shape_inference.py index a260be1ca1f1a67f8f90a2b480f18cb1888a5d00..e77f8be183ae70e8321dd15f8108362c464b5b7f 100644 --- a/mace/python/tools/converter_tool/shape_inference.py +++ b/mace/python/tools/converter_tool/shape_inference.py @@ -1,3 +1,18 @@ +# Copyright 2018 Xiaomi, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import math import numpy as np diff --git a/mace/python/tools/converter_tool/tensorflow_converter.py b/mace/python/tools/converter_tool/tensorflow_converter.py index c2c5b3d0165304e409bac75efe8c38e3b859232e..81bf2027eb4e382df13146d3c8a102c67705cf8e 100644 --- a/mace/python/tools/converter_tool/tensorflow_converter.py +++ b/mace/python/tools/converter_tool/tensorflow_converter.py @@ -1,3 +1,18 @@ +# Copyright 2018 Xiaomi, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import math import numpy as np import tensorflow as tf @@ -197,11 +212,6 @@ class TensorflowConverter(base_converter.ConverterInterface): for tf_output in tf_op.outputs: output_shape = op.output_shape.add() output_shape.dims.extend(tf_output.shape.as_list()) - op.output_type.append(self._option.data_type) - - data_type_arg = op.arg.add() - data_type_arg.name = 'T' - data_type_arg.i = self._option.data_type ConverterUtil.add_data_format_arg(op, DataFormat.NHWC) @@ -289,7 +299,6 @@ class TensorflowConverter(base_converter.ConverterInterface): op.input.extend([scale_name, offset_name]) del op.output[1:] del op.output_shape[1:] - del op.output_type[1:] def convert_pooling(self, tf_op): op = self.convert_general_op(tf_op) diff --git a/mace/python/tools/converter_tool/transformer.py b/mace/python/tools/converter_tool/transformer.py index 6dc51b7d9c44e6d9a1373ba89b5f97dfda137c2b..5ccd36975c26ab4457c2141f24745feab1ee855f 100644 --- a/mace/python/tools/converter_tool/transformer.py +++ b/mace/python/tools/converter_tool/transformer.py @@ -1,3 +1,18 @@ +# Copyright 2018 Xiaomi, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import enum import numpy as np @@ -11,6 +26,7 @@ from mace.python.tools.converter_tool.base_converter import FilterFormat from mace.python.tools.converter_tool.base_converter import MaceOp from mace.python.tools.converter_tool.base_converter import MaceKeyword from mace.python.tools.converter_tool.base_converter import ConverterUtil +from mace.python.tools.converter_tool.base_converter import TransformerRule from mace.python.tools.convert_util import mace_check OPENCL_IMAGE_MAX_SIZE = 16384 @@ -36,23 +52,52 @@ class Transformer(base_converter.ConverterInterface): def __init__(self, option, model): # DO NOT reorder the following transformers - self._registered_transformers = [ - self.remove_identity_op, - self.transform_global_pooling, - self.fold_softmax, - self.fold_batchnorm, - self.fold_conv_and_bn, # data_format related - self.fold_depthwise_conv_and_bn, # data_format related - self.transform_gpu_winograd, # data_format related - self.transform_add_to_biasadd, - self.fold_biasadd, - self.fold_activation, - self.transpose_filters, - self.transpose_data_format, - self.transform_global_conv_to_fc, - self.transform_buffer_image, - self.sort_by_execution, + self._registered_transformers_order = [ + TransformerRule.REMOVE_IDENTITY_OP, + TransformerRule.TRANSFORM_GLOBAL_POOLING, + TransformerRule.FOLD_SOFTMAX, + TransformerRule.FOLD_BATCHNORM, + TransformerRule.FOLD_CONV_AND_BN, + TransformerRule.FOLD_DEPTHWISE_CONV_AND_BN, + TransformerRule.TRANSFORM_GPU_WINOGRAD, + TransformerRule.TRANSFORM_ADD_TO_BIASADD, + TransformerRule.FOLD_BIASADD, + TransformerRule.FOLD_ACTIVATION, + TransformerRule.TRANSPOSE_FILTERS, + TransformerRule.RESHAPE_FC_WEIGHT, + TransformerRule.TRANSPOSE_DATA_FORMAT, + TransformerRule.TRANSFORM_GLOBAL_CONV_TO_FC, + TransformerRule.TRANSFORM_BUFFER_IMAGE, + TransformerRule.ADD_DEVICE_AND_DATA_TYPE, + TransformerRule.SORT_BY_EXECUTION, ] + self._registered_transformers = { + TransformerRule.REMOVE_IDENTITY_OP: self.remove_identity_op, + TransformerRule.TRANSFORM_GLOBAL_POOLING: + self.transform_global_pooling, + TransformerRule.FOLD_SOFTMAX: self.fold_softmax, + TransformerRule.FOLD_BATCHNORM: self.fold_batchnorm, + TransformerRule.FOLD_CONV_AND_BN: + self.fold_conv_and_bn, # data_format related + TransformerRule.FOLD_DEPTHWISE_CONV_AND_BN: + self.fold_depthwise_conv_and_bn, # data_format related + TransformerRule.TRANSFORM_GPU_WINOGRAD: + self.transform_gpu_winograd, # data_format related + TransformerRule.TRANSFORM_ADD_TO_BIASADD: + self.transform_add_to_biasadd, + TransformerRule.FOLD_BIASADD: self.fold_biasadd, + TransformerRule.FOLD_ACTIVATION: self.fold_activation, + TransformerRule.TRANSPOSE_FILTERS: self.transpose_filters, + TransformerRule.RESHAPE_FC_WEIGHT: self.reshape_fc_weight, + TransformerRule.TRANSPOSE_DATA_FORMAT: self.transpose_data_format, + TransformerRule.TRANSFORM_GLOBAL_CONV_TO_FC: + self.transform_global_conv_to_fc, + TransformerRule.TRANSFORM_BUFFER_IMAGE: + self.transform_buffer_image, + TransformerRule.ADD_DEVICE_AND_DATA_TYPE: + self.add_device_and_data_type, + TransformerRule.SORT_BY_EXECUTION: self.sort_by_execution, + } self._option = option self._model = model @@ -67,12 +112,14 @@ class Transformer(base_converter.ConverterInterface): self._target_data_format = DataFormat.NCHW def run(self): - for transformer in self._registered_transformers: - while True: - self.construct_ops_and_consumers() - changed = transformer() - if not changed: - break + for key in self._registered_transformers_order: + if key in self._option.transformer_option: + transformer = self._registered_transformers[key] + while True: + self.construct_ops_and_consumers() + changed = transformer() + if not changed: + break return self._model @@ -404,19 +451,16 @@ class Transformer(base_converter.ConverterInterface): wt_output_shape.dims.extend( [16, in_channels, wt_output_width, 1]) - arg = wt_op.arg.add() - arg.name = 'T' - arg.i = self._option.data_type - if ConverterUtil.get_arg(op, MaceKeyword.mace_padding_str) \ is not None: padding_arg = wt_op.arg.add() padding_arg.name = MaceKeyword.mace_padding_str - padding_arg.i = ConverterUtil.get_arg(op, - MaceKeyword.mace_padding_str).i # noqa - elif ConverterUtil.get_arg(op, - MaceKeyword.mace_padding_values_str) is not None: # noqa + padding_arg.i = ConverterUtil.get_arg( + op, MaceKeyword.mace_padding_str).i + elif ConverterUtil.get_arg( + op, MaceKeyword.mace_padding_values_str)\ + is not None: padding_arg = wt_op.arg.add() padding_arg.name = MaceKeyword.mace_padding_values_str padding_arg.ints.extend(ConverterUtil.get_arg( @@ -432,9 +476,6 @@ class Transformer(base_converter.ConverterInterface): matmul_output_shape.dims.extend( [16, out_channels, wt_output_width, 1]) - arg = matmul_op.arg.add() - arg.name = 'T' - arg.i = self._option.data_type arg = matmul_op.arg.add() arg.name = MaceKeyword.mace_winograd_filter_transformed arg.i = 1 @@ -451,9 +492,6 @@ class Transformer(base_converter.ConverterInterface): iwt_output_shape = iwt_op.output_shape.add() iwt_output_shape.dims.extend(op.output_shape[0].dims) - arg = iwt_op.arg.add() - arg.name = 'T' - arg.i = self._option.data_type batch_arg = iwt_op.arg.add() batch_arg.name = 'batch' batch_arg.i = batch @@ -618,10 +656,6 @@ class Transformer(base_converter.ConverterInterface): dims_arg.name = MaceKeyword.mace_dims_str dims_arg.ints.extend([0, 3, 1, 2]) - arg = op.arg.add() - arg.name = 'T' - arg.i = self._option.data_type - for output_node in self._option.output_nodes.values(): output_name = MaceKeyword.mace_output_node_name \ + '_' + output_node.name @@ -639,75 +673,43 @@ class Transformer(base_converter.ConverterInterface): dims_arg.name = MaceKeyword.mace_dims_str dims_arg.ints.extend([0, 2, 3, 1]) - arg = op.arg.add() - arg.name = 'T' - arg.i = self._option.data_type - return False def transpose_filters(self): net = self._model filter_format = self.filter_format() - # TODO(liyin/liuqi): remove this if-condition after combine cpu/gpu - if self._option.device == mace_pb2.CPU: - print("Transpose filters to OIHW") - # transpose filter to OIHW/MIHW for tensorflow (HWIO/HWIM) - if filter_format == FilterFormat.HWIO: - for op in net.op: - if op.type == MaceOp.Conv2D.name \ - or op.type == MaceOp.Deconv2D.name \ - or op.type == MaceOp.DepthwiseConv2d.name: - if ConverterUtil.get_arg(op, - MaceKeyword.mace_winograd_filter_transformed) is None: # noqa - filter = self._consts[op.input[1]] - filter_data = np.array(filter.float_data).reshape( - filter.dims) - filter_data = filter_data.transpose(3, 2, 0, 1) - filter.float_data[:] = filter_data.flat - filter.dims[:] = filter_data.shape - self.set_filter_format(FilterFormat.OIHW) - - elif self._option.device == mace_pb2.GPU: - # TODO(liyin/liuqi): remove this whole logic after combine cpu/gpu - print("Transpose filters to HWOI/HWIM") + print("Transpose filters to OIHW") + # transpose filter to OIHW/MIHW for tensorflow (HWIO/HWIM) + if filter_format == FilterFormat.HWIO: for op in net.op: if op.type == MaceOp.Conv2D.name \ or op.type == MaceOp.Deconv2D.name \ or op.type == MaceOp.DepthwiseConv2d.name: - filter = self._consts[op.input[1]] - filter_data = np.array(filter.float_data).reshape( - filter.dims) - # transpose filter to HWOI/HWIM for - # tensorflow and caffe (OIHW/MIHW) - if filter_format == FilterFormat.HWIO \ - and (op.type == MaceOp.Conv2D.name - or op.type == MaceOp.Deconv2D.name): - filter_data = filter_data.transpose(0, 1, 3, 2) + if ConverterUtil.get_arg( + op, MaceKeyword.mace_winograd_filter_transformed)\ + is None: + filter = self._consts[op.input[1]] + filter_data = np.array(filter.float_data).reshape( + filter.dims) + filter_data = filter_data.transpose(3, 2, 0, 1) filter.float_data[:] = filter_data.flat filter.dims[:] = filter_data.shape - elif filter_format == FilterFormat.OIHW: - if op.type == MaceOp.Conv2D.name \ - or op.type == MaceOp.Deconv2D.name: - filter_data = filter_data.transpose(2, 3, 0, 1) - filter.float_data[:] = filter_data.flat - filter.dims[:] = filter_data.shape - elif op.type == MaceOp.DepthwiseConv2d.name: - filter_data = filter_data.transpose(2, 3, 1, 0) - filter.float_data[:] = filter_data.flat - filter.dims[:] = filter_data.shape - - if op.type == MaceOp.FullyConnected.name: - weight = self._consts[op.input[1]] - input_shape = list(self._producer[op.input[0]] - .output_shape[0].dims) - weight_shape = [weight.dims[0]] + input_shape[1:] - # OCHW -> OHWC - weight_data = np.array(weight.float_data).reshape( - weight_shape) - weight_data = weight_data.transpose(0, 2, 3, 1) - weight.float_data[:] = weight_data.flat - self.set_filter_format(FilterFormat.HWOI) + self.set_filter_format(FilterFormat.OIHW) + + return False + + def reshape_fc_weight(self): + net = self._model + for op in net.op: + if op.type == MaceOp.FullyConnected.name: + weight = self._consts[op.input[1]] + # NCHW + input_shape = list(self._producer[op.input[0]] + .output_shape[0].dims) + weight_shape = [weight.dims[0]] + input_shape[1:] + del weight.dims[:] + weight.dims.extend(weight_shape) return False @@ -727,9 +729,6 @@ class Transformer(base_converter.ConverterInterface): arg = op_def.arg.add() arg.name = MaceKeyword.mace_mode arg.i = 0 - arg = op_def.arg.add() - arg.name = 'T' - arg.i = self._option.data_type op.input[input_idx] = output_name @@ -788,9 +787,6 @@ class Transformer(base_converter.ConverterInterface): arg = op_def.arg.add() arg.name = MaceKeyword.mace_buffer_type arg.i = OpenCLBufferType.IN_OUT_CHANNEL.value - arg = op_def.arg.add() - arg.name = 'T' - arg.i = self._option.data_type for output_node in self._option.output_nodes.values(): output_name = MaceKeyword.mace_output_node_name \ @@ -806,9 +802,6 @@ class Transformer(base_converter.ConverterInterface): arg = op_def.arg.add() arg.name = MaceKeyword.mace_buffer_type arg.i = OpenCLBufferType.IN_OUT_CHANNEL.value - arg = op_def.arg.add() - arg.name = 'T' - arg.i = self._option.data_type return False @@ -885,6 +878,19 @@ class Transformer(base_converter.ConverterInterface): in_channels * filter_width * filter_height][:] + def add_device_and_data_type(self): + # TODO(liuqi) add device definition in OperatorDef + net = self._model + for op in net.op: + arg = op.arg.add() + arg.name = MaceKeyword.mace_device + arg.i = self._option.device + data_type_arg = op.arg.add() + data_type_arg.name = 'T' + data_type_arg.i = self._option.data_type + + return False + def sort_dfs(self, op, visited, sorted_nodes): visited.update([op.name]) if len(op.input) > 0: diff --git a/mace/python/tools/source_converter_lib.py b/mace/python/tools/source_converter_lib.py index 8b08c11dcaf7b6cbc4862836c5a065d623d6dfdd..5b43e61b07f89e95b849fee56aea7bc3f83381af 100644 --- a/mace/python/tools/source_converter_lib.py +++ b/mace/python/tools/source_converter_lib.py @@ -167,7 +167,6 @@ def convert_to_source(net_def, model_checksum, weight_checksum, template_dir, tensor_info=tensor_info, tensor=t, tag=model_tag, - runtime=runtime, offset=offset, ) model_data.extend(tensor_info.data) diff --git a/mace/test/mace_api_mt_test.cc b/mace/test/mace_api_mt_test.cc index 2e032b84185f3ff270b89d6d7446e845a4b35b2e..a1271b28b433c46b5caf5052d3db9562b032dcaf 100644 --- a/mace/test/mace_api_mt_test.cc +++ b/mace/test/mace_api_mt_test.cc @@ -55,6 +55,7 @@ void BufferToImage(const std::string &input_name, const std::string &output_name, const int buffer_type, const std::vector &mem_ids, + const DeviceType device_type, NetDef *net_def, const int mode = NetMode::NORMAL) { OperatorDef operator_def; @@ -64,6 +65,7 @@ void BufferToImage(const std::string &input_name, .Output(output_name) .AddIntArg("buffer_type", buffer_type) .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .AddIntArg("device", static_cast(device_type)) .AddIntArg("mode", mode) .Finalize(&operator_def); @@ -76,6 +78,7 @@ template void ImageToBuffer(const std::string &input_name, const std::string &output_name, const int buffer_type, + const DeviceType device_type, NetDef *net_def) { OperatorDef operator_def; @@ -84,6 +87,7 @@ void ImageToBuffer(const std::string &input_name, .Output(output_name) .AddIntArg("buffer_type", buffer_type) .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .AddIntArg("device", static_cast(device_type)) .Finalize(&operator_def); net_def->add_op()->CopyFrom(operator_def); @@ -94,6 +98,7 @@ void Conv3x3(const std::string &input_name, const std::string &filter_name, const std::string &output_name, const std::vector &mem_ids, + const DeviceType device_type, NetDef *net_def) { OperatorDef operator_def; ops::test::OpDefBuilder("Conv2D", "Conv2dOp") @@ -104,6 +109,7 @@ void Conv3x3(const std::string &input_name, .AddIntArg("padding", Padding::SAME) .AddIntsArg("dilations", {1, 1}) .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .AddIntArg("device", static_cast(device_type)) .Finalize(&operator_def); operator_def.set_mem_id(mem_ids); @@ -113,6 +119,7 @@ void Conv3x3(const std::string &input_name, template void Relu(const std::string &input_name, const std::string &output_name, + const DeviceType device_type, NetDef *net_def) { OperatorDef operator_def; ops::test::OpDefBuilder("Activation", "ReluTest") @@ -120,6 +127,7 @@ void Relu(const std::string &input_name, .Output(output_name) .AddStringArg("activation", "RELU") .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .AddIntArg("device", static_cast(device_type)) .Finalize(&operator_def); net_def->add_op()->CopyFrom(operator_def); @@ -195,7 +203,8 @@ std::map AddMemoryOptimization( const std::vector> &output_shapes, NetDef *net_def) { std::map res; - int mem_id = 0; + // TODO(liuqi) refactor based on PB + int mem_id = 20000; size_t input_shape_size = input_shapes.size(); uint32_t in_mem_block_x = 0; uint32_t in_mem_block_y = 0; @@ -269,21 +278,25 @@ void MaceRunFunc(const int in_out_size) { BufferToImage(input_name, input_names[i], mace::kernels::IN_OUT_CHANNEL, {mem_map[input_names[i]]}, + device, &net_def); } BufferToImage(filter_tensor_name, filter_tensor_img_name, - mace::kernels::CONV2D_FILTER, {}, + mace::kernels::CONV2D_FILTER, {}, device, &net_def, NetMode::INIT); for (size_t i = 0; i < output_names.size(); ++i) { Conv3x3(input_names[i], filter_tensor_img_name, output_names[i], {mem_map[output_names[i]]}, + device, &net_def); } for (size_t i = 0; i < output_names.size(); ++i) { std::string output_name = MakeString("mace_output_node_", output_names[i]); ImageToBuffer(output_names[i], output_name, - mace::kernels::IN_OUT_CHANNEL, &net_def); + mace::kernels::IN_OUT_CHANNEL, + device, + &net_def); } const std::string file_path ="/data/local/tmp/mace"; diff --git a/mace/test/mace_api_test.cc b/mace/test/mace_api_test.cc index be7b007f803d477cfdbfab8d69381f19136cb177..776fa6744c231aadff21aa592b52e43900f423b5 100644 --- a/mace/test/mace_api_test.cc +++ b/mace/test/mace_api_test.cc @@ -65,6 +65,7 @@ void BufferToImage(const std::string &input_name, const std::string &output_name, const int buffer_type, const std::vector &mem_ids, + const DeviceType device_type, NetDef *net_def, const int mode = NetMode::NORMAL) { OperatorDef operator_def; @@ -74,6 +75,7 @@ void BufferToImage(const std::string &input_name, .Output(output_name) .AddIntArg("buffer_type", buffer_type) .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .AddIntArg("device", static_cast(device_type)) .AddIntArg("mode", mode) .Finalize(&operator_def); @@ -86,6 +88,7 @@ template void ImageToBuffer(const std::string &input_name, const std::string &output_name, const int buffer_type, + const DeviceType device_type, NetDef *net_def) { OperatorDef operator_def; @@ -94,6 +97,7 @@ void ImageToBuffer(const std::string &input_name, .Output(output_name) .AddIntArg("buffer_type", buffer_type) .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .AddIntArg("device", static_cast(device_type)) .Finalize(&operator_def); net_def->add_op()->CopyFrom(operator_def); @@ -104,6 +108,7 @@ void Conv3x3(const std::string &input_name, const std::string &filter_name, const std::string &output_name, const std::vector &mem_ids, + const DeviceType device_type, NetDef *net_def) { OperatorDef operator_def; ops::test::OpDefBuilder("Conv2D", "Conv2dOp") @@ -114,6 +119,7 @@ void Conv3x3(const std::string &input_name, .AddIntArg("padding", Padding::SAME) .AddIntsArg("dilations", {1, 1}) .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .AddIntArg("device", static_cast(device_type)) .Finalize(&operator_def); operator_def.set_mem_id(mem_ids); @@ -123,6 +129,7 @@ void Conv3x3(const std::string &input_name, template void Relu(const std::string &input_name, const std::string &output_name, + const DeviceType device_type, NetDef *net_def) { OperatorDef operator_def; ops::test::OpDefBuilder("Activation", "ReluTest") @@ -130,6 +137,7 @@ void Relu(const std::string &input_name, .Output(output_name) .AddStringArg("activation", "RELU") .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .AddIntArg("device", static_cast(device_type)) .Finalize(&operator_def); net_def->add_op()->CopyFrom(operator_def); @@ -205,7 +213,8 @@ std::map AddMemoryOptimization( const std::vector> &output_shapes, NetDef *net_def) { std::map res; - int mem_id = 0; + // TODO(liuqi) refactor based on PB + int mem_id = 20000; size_t input_shape_size = input_shapes.size(); uint32_t in_mem_block_x = 0; uint32_t in_mem_block_y = 0; @@ -279,21 +288,24 @@ void MaceRun(const int in_out_size, BufferToImage(input_name, input_names[i], mace::kernels::IN_OUT_CHANNEL, {mem_map[input_names[i]]}, + device, &net_def); } BufferToImage(filter_tensor_name, filter_tensor_img_name, - mace::kernels::CONV2D_FILTER, {}, + mace::kernels::CONV2D_FILTER, {}, device, &net_def, NetMode::INIT); for (size_t i = 0; i < output_names.size(); ++i) { Conv3x3(input_names[i], filter_tensor_img_name, output_names[i], {mem_map[output_names[i]]}, - &net_def); + device, &net_def); } for (size_t i = 0; i < output_names.size(); ++i) { std::string output_name = MakeString("mace_output_node_", output_names[i]); ImageToBuffer(output_names[i], output_name, - mace::kernels::IN_OUT_CHANNEL, &net_def); + mace::kernels::IN_OUT_CHANNEL, + device, + &net_def); } MaceEngine engine(&net_def, device, input_names, output_names); diff --git a/tools/mace_tools.py b/tools/mace_tools.py index f8a603fb08872d4f19bd8f6885feb18f585aa166..2af843a63f677e0e68c6ca3845ac872f5ece1862 100644 --- a/tools/mace_tools.py +++ b/tools/mace_tools.py @@ -62,27 +62,23 @@ def get_target_socs(configs): return target_socs -def get_data_and_device_type(runtime): - data_type = "" +def parse_device_type(runtime): device_type = "" if runtime == "dsp": - data_type = "DT_UINT8" device_type = "HEXAGON" elif runtime == "gpu": - data_type = "DT_HALF" device_type = "GPU" elif runtime == "cpu": - data_type = "DT_FLOAT" device_type = "CPU" - return data_type, device_type + return device_type def get_hexagon_mode(configs): runtime_list = [] for model_name in configs["models"]: - model_runtime = configs["models"][model_name]["runtime"] + model_runtime = configs["models"][model_name].get("runtime", "") runtime_list.append(model_runtime.lower()) global_runtime = "" @@ -114,7 +110,7 @@ def model_benchmark_stdout_processor(stdout, abi, serialno, model_name, - runtime): + device_type): metrics = [0] * 3 for line in stdout.split('\n'): line = line.strip() @@ -138,14 +134,14 @@ def model_benchmark_stdout_processor(stdout, f.write("model_name,device_name,soc,abi,runtime," "init,warmup,run_avg\n") - data_str = "{model_name},{device_name},{soc},{abi},{runtime}," \ + data_str = "{model_name},{device_name},{soc},{abi},{device_type}," \ "{init},{warmup},{run_avg}\n" \ .format( model_name=model_name, device_name=device_name, soc=target_soc, abi=abi, - runtime=runtime, + device_type=device_type, init=metrics[0], warmup=metrics[1], run_avg=metrics[2] @@ -154,8 +150,7 @@ def model_benchmark_stdout_processor(stdout, f.write(data_str) -def tuning_run(runtime, - target_abi, +def tuning_run(target_abi, serialno, vlog_level, embed_model_data, @@ -205,7 +200,7 @@ def tuning_run(runtime, if running_round > 0 and FLAGS.collect_report: model_benchmark_stdout_processor( - stdout, target_abi, serialno, model_name, runtime) + stdout, target_abi, serialno, model_name, device_type) def build_mace_run_prod(hexagon_mode, runtime, target_abi, @@ -222,7 +217,7 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi, strip = "never" debug = True - if runtime == "gpu": + if not runtime or runtime == "gpu": gen_opencl_and_tuning_code(target_abi, serialno, [], False) sh_commands.bazel_build( mace_run_target, @@ -234,19 +229,14 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi, sh_commands.update_mace_run_lib(model_output_dir, model_name, embed_model_data) - tuning_run(runtime, target_abi, serialno, vlog_level, embed_model_data, + device_type = parse_device_type("gpu") + tuning_run(target_abi, serialno, vlog_level, embed_model_data, model_output_dir, input_nodes, output_nodes, input_shapes, output_shapes, model_name, device_type, running_round=0, restart_round=1, out_of_range_check=False, phone_data_dir=phone_data_dir, tuning=tuning, limit_opencl_kernel_time=limit_opencl_kernel_time) - tuning_run(runtime, target_abi, serialno, vlog_level, embed_model_data, - model_output_dir, input_nodes, output_nodes, input_shapes, - output_shapes, model_name, device_type, running_round=0, - restart_round=1, out_of_range_check=True, - phone_data_dir=phone_data_dir, tuning=False) - gen_opencl_and_tuning_code(target_abi, serialno, [model_output_dir], True) sh_commands.bazel_build( @@ -391,8 +381,7 @@ def parse_model_configs(): print("'platform' must be 'tensorflow' or 'caffe'") exit(1) - for key in ["model_file_path", "model_sha256_checksum", - "runtime"]: + for key in ["model_file_path", "model_sha256_checksum"]: value = model_config.get(key, "") if value == "": print("CONFIG ERROR:") @@ -529,6 +518,11 @@ def parse_args(): type=str, default="", help="Valgrind command args.") + parser.add_argument( + "--validation_runtime", + type=str, + default="cpu", + help="validation runtime.") return parser.parse_known_args() @@ -541,9 +535,11 @@ def process_models(project_name, configs, embed_model_data, vlog_level, print '===================', model_name, '===================' model_config = configs["models"][model_name] input_file_list = model_config["validation_inputs_data"] - data_type, device_type = get_data_and_device_type( - model_config["runtime"]) - + model_runtime = model_config.get("runtime", "") + model_device_type = parse_device_type(model_runtime) + run_device_type = model_device_type + if not run_device_type: + run_device_type = parse_device_type(FLAGS.validation_runtime) # Create model build directory model_path_digest = md5sum(model_config["model_file_path"]) model_output_base_dir = "%s/%s/%s/%s/%s" % ( @@ -581,7 +577,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level, if FLAGS.mode == "build" or FLAGS.mode == "all": build_mace_run_prod(hexagon_mode, - model_config["runtime"], + model_runtime, target_abi, serialno, vlog_level, @@ -592,7 +588,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level, model_config["input_shapes"], model_config["output_shapes"], model_name, - device_type, + model_device_type, FLAGS.round, FLAGS.restart_round, FLAGS.tuning, @@ -607,8 +603,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level, if FLAGS.mode == "run" or FLAGS.mode == "validate" or \ FLAGS.mode == "all": - tuning_run(model_config["runtime"], - target_abi, + tuning_run(target_abi, serialno, vlog_level, embed_model_data, @@ -618,7 +613,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level, model_config["input_shapes"], model_config["output_shapes"], model_name, - device_type, + run_device_type, FLAGS.round, FLAGS.restart_round, FLAGS.out_of_range_check, @@ -641,7 +636,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level, model_config["input_shapes"], model_config["output_shapes"], model_name, - device_type, + run_device_type, phone_data_dir, FLAGS.omp_num_threads, FLAGS.cpu_affinity_policy, @@ -654,7 +649,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level, model_file_path, weight_file_path, model_config["platform"], - model_config["runtime"], + run_device_type, model_config["input_nodes"], model_config["output_nodes"], model_config["input_shapes"], @@ -746,8 +741,7 @@ def main(unused_args): for model_name in configs["models"]: print '===================', model_name, '===================' model_config = configs["models"][model_name] - data_type, device_type = get_data_and_device_type( - model_config["runtime"]) + runtime = model_config.get("runtime", "") # Create model build directory model_path_digest = md5sum(model_config["model_file_path"]) @@ -778,8 +772,7 @@ def main(unused_args): model_config["model_sha256_checksum"], ",".join(model_config["input_nodes"]), ",".join(model_config["output_nodes"]), - data_type, - model_config["runtime"], + runtime, model_name, ":".join(model_config["input_shapes"]), model_config["dsp_mode"], diff --git a/tools/sh_commands.py b/tools/sh_commands.py index e74059fc92d6f96f9530b2e83688165951305f29..5832f35bceb4ccadcf73c215d7c799ba8b869d09 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -465,7 +465,6 @@ def gen_model_code(model_codegen_dir, model_sha256_checksum, input_nodes, output_nodes, - data_type, runtime, model_tag, input_shapes, @@ -489,7 +488,6 @@ def gen_model_code(model_codegen_dir, "--output=%s" % model_codegen_dir + "/model.cc", "--input_node=%s" % input_nodes, "--output_node=%s" % output_nodes, - "--data_type=%s" % data_type, "--runtime=%s" % runtime, "--output_type=source", "--template=%s" % "mace/python/tools", @@ -703,7 +701,7 @@ def validate_model(abi, model_file_path, weight_file_path, platform, - runtime, + device_type, input_nodes, output_nodes, input_shapes, @@ -727,7 +725,7 @@ def validate_model(abi, if platform == "tensorflow": validate(platform, model_file_path, "", "%s/%s" % (model_output_dir, input_file_name), - "%s/%s" % (model_output_dir, output_file_name), runtime, + "%s/%s" % (model_output_dir, output_file_name), device_type, ":".join(input_shapes), ":".join(output_shapes), ",".join(input_nodes), ",".join(output_nodes)) elif platform == "caffe": @@ -743,7 +741,8 @@ def validate_model(abi, logger.error('There is no caffe python module.') validate(platform, model_file_path, weight_file_path, "%s/%s" % (model_output_dir, input_file_name), - "%s/%s" % (model_output_dir, output_file_name), runtime, + "%s/%s" % (model_output_dir, output_file_name), + device_type, ":".join(input_shapes), ":".join(output_shapes), ",".join(input_nodes), ",".join(output_nodes)) elif caffe_env == common.CaffeEnvType.DOCKER: @@ -806,7 +805,7 @@ def validate_model(abi, "--weight_file=/mace/%s" % weight_file_name, "--input_file=/mace/%s" % input_file_name, "--mace_out_file=/mace/%s" % output_file_name, - "--mace_runtime=%s" % runtime, + "--device_type=%s" % device_type, "--input_node=%s" % ",".join(input_nodes), "--output_node=%s" % ",".join(output_nodes), "--input_shape=%s" % ":".join(input_shapes), diff --git a/tools/validate.py b/tools/validate.py index eb767377639e733dac633735505b02b80357697c..dba6a3e2db2b2b38b53d4f1c925645f246ebd3a2 100644 --- a/tools/validate.py +++ b/tools/validate.py @@ -44,7 +44,7 @@ def load_data(file): return np.empty([0]) -def compare_output(platform, mace_runtime, output_name, mace_out_value, +def compare_output(platform, device_type, output_name, mace_out_value, out_value): if mace_out_value.size != 0: out_value = out_value.reshape(-1) @@ -53,9 +53,9 @@ def compare_output(platform, mace_runtime, output_name, mace_out_value, similarity = (1 - spatial.distance.cosine(out_value, mace_out_value)) print output_name, 'MACE VS', platform.upper( ), 'similarity: ', similarity - if (mace_runtime == "cpu" and similarity > 0.999) or \ - (mace_runtime == "gpu" and similarity > 0.995) or \ - (mace_runtime == "dsp" and similarity > 0.930): + if (device_type == "CPU" and similarity > 0.999) or \ + (device_type == "GPU" and similarity > 0.995) or \ + (device_type == "HEXAGON" and similarity > 0.930): print '===================Similarity Test Passed==================' else: print '===================Similarity Test Failed==================' @@ -65,7 +65,7 @@ def compare_output(platform, mace_runtime, output_name, mace_out_value, sys.exit(-1) -def validate_tf_model(platform, mace_runtime, model_file, input_file, +def validate_tf_model(platform, device_type, model_file, input_file, mace_out_file, input_names, input_shapes, output_names): import tensorflow as tf if not os.path.isfile(model_file): @@ -100,11 +100,11 @@ def validate_tf_model(platform, mace_runtime, model_file, input_file, output_file_name = common.formatted_file_name( mace_out_file, output_names[i]) mace_out_value = load_data(output_file_name) - compare_output(platform, mace_runtime, output_names[i], + compare_output(platform, device_type, output_names[i], mace_out_value, output_values[i]) -def validate_caffe_model(platform, mace_runtime, model_file, input_file, +def validate_caffe_model(platform, device_type, model_file, input_file, mace_out_file, weight_file, input_names, input_shapes, output_names, output_shapes): os.environ['GLOG_minloglevel'] = '1' # suprress Caffe verbose prints @@ -144,12 +144,12 @@ def validate_caffe_model(platform, mace_runtime, model_file, input_file, output_file_name = common.formatted_file_name( mace_out_file, output_names[i]) mace_out_value = load_data(output_file_name) - compare_output(platform, mace_runtime, output_names[i], mace_out_value, + compare_output(platform, device_type, output_names[i], mace_out_value, value) def validate(platform, model_file, weight_file, input_file, mace_out_file, - mace_runtime, input_shape, output_shape, input_node, output_node): + device_type, input_shape, output_shape, input_node, output_node): input_names = [name for name in input_node.split(',')] input_shape_strs = [shape for shape in input_shape.split(':')] input_shapes = [[int(x) for x in shape.split(',')] @@ -158,14 +158,14 @@ def validate(platform, model_file, weight_file, input_file, mace_out_file, assert len(input_names) == len(input_shapes) if platform == 'tensorflow': - validate_tf_model(platform, mace_runtime, model_file, input_file, + validate_tf_model(platform, device_type, model_file, input_file, mace_out_file, input_names, input_shapes, output_names) elif platform == 'caffe': output_shape_strs = [shape for shape in output_shape.split(':')] output_shapes = [[int(x) for x in shape.split(',')] for shape in output_shape_strs] - validate_caffe_model(platform, mace_runtime, model_file, input_file, + validate_caffe_model(platform, device_type, model_file, input_file, mace_out_file, weight_file, input_names, input_shapes, output_names, output_shapes) @@ -194,7 +194,7 @@ def parse_args(): default="", help="mace output file to load.") parser.add_argument( - "--mace_runtime", type=str, default="gpu", help="mace runtime device.") + "--device_type", type=str, default="", help="mace runtime device.") parser.add_argument( "--input_shape", type=str, default="1,64,64,3", help="input shape.") parser.add_argument( @@ -214,7 +214,7 @@ if __name__ == '__main__': FLAGS.weight_file, FLAGS.input_file, FLAGS.mace_out_file, - FLAGS.mace_runtime, + FLAGS.device_type, FLAGS.input_shape, FLAGS.output_shape, FLAGS.input_node,