From ced4a49dccf3f713a23104ab893986be5f1b2c34 Mon Sep 17 00:00:00 2001 From: liuqi Date: Mon, 28 May 2018 15:56:21 +0800 Subject: [PATCH] Refactor mace_tools and yaml format for better usage. --- .gitlab-ci.yml | 1 + .../create_a_model_deployment.rst | 12 +- docs/getting_started/how_to_build.rst | 142 +- .../models/demo_app_models.yaml | 50 +- mace/benchmark/BUILD | 2 - mace/core/BUILD | 8 +- mace/core/mace.cc | 9 +- mace/kernels/opencl/concat.cc | 4 +- mace/kernels/opencl/helper.cc | 5 +- mace/kernels/opencl/slice.cc | 4 +- mace/mace.bzl | 12 - mace/python/tools/BUILD | 7 +- mace/python/tools/converter.py | 103 +- .../converter_tool/tensorflow_converter.py | 6 - .../tools/converter_tool/transformer.py | 7 +- mace/python/tools/encrypt_opencl_codegen.py | 2 - .../python/tools/mace_engine_factory.h.jinja2 | 2 +- .../tools/mace_engine_factory_codegen.py | 1 - mace/python/tools/model_saver.py | 315 +++ mace/python/tools/operator.jinja2 | 6 +- mace/python/tools/source_converter_lib.py | 99 - mace/python/tools/tensor_util.py | 173 -- mace/utils/BUILD | 24 +- mace/utils/tuner.h | 20 +- tools/bazel.rc | 57 + tools/bazel_adb_run.py | 61 +- tools/common.py | 96 +- tools/example.yaml | 46 - tools/mace_tools.py | 1694 ++++++++++------- tools/sh_commands.py | 437 ++--- tools/validate.py | 34 +- 31 files changed, 1922 insertions(+), 1517 deletions(-) create mode 100644 mace/python/tools/model_saver.py delete mode 100644 mace/python/tools/source_converter_lib.py delete mode 100644 mace/python/tools/tensor_util.py create mode 100644 tools/bazel.rc delete mode 100644 tools/example.yaml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d36bbed6..30e20590 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -68,4 +68,5 @@ platform_compitable_tests: stage: platform_compitable_tests script: - mkdir -p mace/codegen/version && bash mace/tools/git/gen_version_source.sh mace/codegen/version/version.cc + - mkdir -p mace/codegen/tuning && python mace/python/tools/binary_codegen.py --output_path=mace/codegen/tuning/tuning_params.cc - bazel build mace/core:core diff --git a/docs/getting_started/create_a_model_deployment.rst b/docs/getting_started/create_a_model_deployment.rst index bf47aed5..d949c7b7 100644 --- a/docs/getting_started/create_a_model_deployment.rst +++ b/docs/getting_started/create_a_model_deployment.rst @@ -46,14 +46,14 @@ Configurations - The SHA256 checksum of the model file * - weight_sha256_checksum - The SHA256 checksum of the weight file, used by Caffe model - * - input_nodes - - The input node names, one or more strings - * - output_nodes - - The output node names, one or more strings + * - input_tensors + - The input tensor names (tensorflow), top name of inputs' layer (caffe). one or more strings + * - output_tensors + - The output tensor names (tensorflow), top name of outputs' layer (caffe). one or more strings * - input_shapes - - The shapes of the input nodes, in NHWC order + - The shapes of the input tensors, in NHWC order * - output_shapes - - The shapes of the output nodes, in NHWC order + - The shapes of the output tensors, in NHWC order * - runtime - The running device, one of CPU, GPU or DSP * - limit_opencl_kernel_time diff --git a/docs/getting_started/how_to_build.rst b/docs/getting_started/how_to_build.rst index 43178cf9..ea9da202 100644 --- a/docs/getting_started/how_to_build.rst +++ b/docs/getting_started/how_to_build.rst @@ -126,6 +126,7 @@ Tool = 0, "Failed to open model data file ", - model_data_file, ", error code: ", errno); + model_data_file, ", error code: ", strerror(errno)); const unsigned char *model_data = static_cast( mmap(nullptr, data_size, PROT_READ, MAP_PRIVATE, fd, 0)); MACE_CHECK(model_data != MAP_FAILED, "Failed to map model data file ", - model_data_file, ", error code: ", errno); + model_data_file, ", error code: ", strerror(errno)); int ret = close(fd); MACE_CHECK(ret == 0, "Failed to close model data file ", - model_data_file, ", error code: ", errno); + model_data_file, ", error code: ", strerror(errno)); return model_data; } @@ -302,7 +302,8 @@ void UnloadModelData(const unsigned char *model_data, const size_t &data_size) { int ret = munmap(const_cast(model_data), data_size); - MACE_CHECK(ret == 0, "Failed to unmap model data file, error code: ", errno); + MACE_CHECK(ret == 0, "Failed to unmap model data file, error code: ", + strerror(errno)); } MaceStatus CreateMaceEngineFromProto( diff --git a/mace/kernels/opencl/concat.cc b/mace/kernels/opencl/concat.cc index 4abcbcfe..47e09450 100644 --- a/mace/kernels/opencl/concat.cc +++ b/mace/kernels/opencl/concat.cc @@ -215,6 +215,7 @@ static MaceStatus ConcatN(cl::Kernel *kernel, (*kernel_error)->UnMap(); } if (runtime->is_profiling_enabled()) { + event.wait(); CallStats tmp_stats; runtime->GetCallStats(event, &tmp_stats); call_stats.start_micros = @@ -223,8 +224,7 @@ static MaceStatus ConcatN(cl::Kernel *kernel, } } if (future != nullptr) { - future->wait_fn = [runtime, event, call_stats](CallStats *stats) { - event.wait(); + future->wait_fn = [runtime, call_stats](CallStats *stats) { if (stats != nullptr) { stats->start_micros = call_stats.start_micros; stats->end_micros = stats->start_micros + call_stats.end_micros; diff --git a/mace/kernels/opencl/helper.cc b/mace/kernels/opencl/helper.cc index c14aac1e..fff04eb1 100644 --- a/mace/kernels/opencl/helper.cc +++ b/mace/kernels/opencl/helper.cc @@ -209,8 +209,9 @@ std::string DtToUpstreamCLCMDDt(const DataType dt) { std::vector Default3DLocalWS(const uint32_t *gws, const uint32_t kwg_size) { std::vector lws(4, 0); - uint64_t cache_size = OpenCLRuntime::Global()->device_global_mem_cache_size(); - uint32_t base = cache_size / kBaseGPUMemCacheSize; + uint64_t cache_size = + OpenCLRuntime::Global()->device_global_mem_cache_size(); + uint32_t base = std::max(cache_size / kBaseGPUMemCacheSize, 1); lws[1] = std::min(gws[1], kwg_size); lws[2] = std::min(std::min(gws[2], base), kwg_size / lws[1]); diff --git a/mace/kernels/opencl/slice.cc b/mace/kernels/opencl/slice.cc index 64d69693..f865091f 100644 --- a/mace/kernels/opencl/slice.cc +++ b/mace/kernels/opencl/slice.cc @@ -115,6 +115,7 @@ MaceStatus SliceFunctor::operator()( kernel_error_->UnMap(); } if (runtime->is_profiling_enabled()) { + event.wait(); CallStats tmp_stats; runtime->GetCallStats(event, &tmp_stats); call_stats.start_micros = @@ -123,8 +124,7 @@ MaceStatus SliceFunctor::operator()( } } if (future != nullptr) { - future->wait_fn = [runtime, event, call_stats](CallStats *stats) { - event.wait(); + future->wait_fn = [runtime, call_stats](CallStats *stats) { if (stats != nullptr) { stats->start_micros = call_stats.start_micros; stats->end_micros = stats->start_micros + call_stats.end_micros; diff --git a/mace/mace.bzl b/mace/mace.bzl index 3db0ff5c..9e81ee40 100644 --- a/mace/mace.bzl +++ b/mace/mace.bzl @@ -24,18 +24,6 @@ def if_android_arm64(a): "//conditions:default": [], }) -def if_production_mode(a): - return select({ - "//mace:production_mode": a, - "//conditions:default": [], - }) - -def if_not_production_mode(a): - return select({ - "//mace:production_mode": [], - "//conditions:default": a, - }) - def if_neon_enabled(a): return select({ "//mace:neon_enabled": a, diff --git a/mace/python/tools/BUILD b/mace/python/tools/BUILD index 8230044b..bcbe98e0 100644 --- a/mace/python/tools/BUILD +++ b/mace/python/tools/BUILD @@ -3,7 +3,6 @@ py_library( srcs = [ "convert_util.py", "graph_util.py", - "tensor_util.py", "tf_dsp_converter_lib.py", "converter_tool/base_converter.py", "converter_tool/shape_inference.py", @@ -20,9 +19,9 @@ py_library( ) py_library( - name = "source_converter_lib", + name = "model_saver_lib", srcs = [ - "source_converter_lib.py", + "model_saver.py", ], srcs_version = "PY2AND3", deps = [ @@ -45,7 +44,7 @@ py_binary( srcs_version = "PY2AND3", deps = [ ":converter_lib", - ":source_converter_lib", + ":model_saver_lib", "@six_archive//:six", ], ) diff --git a/mace/python/tools/converter.py b/mace/python/tools/converter.py index fe378b14..86392e5d 100644 --- a/mace/python/tools/converter.py +++ b/mace/python/tools/converter.py @@ -21,8 +21,7 @@ import copy from mace.proto import mace_pb2 from mace.python.tools import tf_dsp_converter_lib from mace.python.tools import memory_optimizer -from mace.python.tools import source_converter_lib -from mace.python.tools import tensor_util +from mace.python.tools import model_saver from mace.python.tools.converter_tool import base_converter as cvt from mace.python.tools.converter_tool import tensorflow_converter from mace.python.tools.converter_tool import caffe_converter @@ -42,6 +41,20 @@ device_type_map = {'cpu': cvt.DeviceType.CPU.value, 'dsp': cvt.DeviceType.HEXAGON.value} +def parse_data_type(data_type, device_type): + if device_type == cvt.DeviceType.GPU.value: + if data_type == 'fp32_fp32': + return mace_pb2.DT_FLOAT + else: + return mace_pb2.DT_HALF + elif device_type == cvt.DeviceType.CPU.value: + return mace_pb2.DT_FLOAT + elif device_type == cvt.DeviceType.HEXAGON.value: + return mace_pb2.DT_UINT8 + else: + print("Invalid device type: " + device_type) + + def file_checksum(fname): hash_func = hashlib.sha256() with open(fname, "rb") as f: @@ -82,7 +95,7 @@ def main(unused_args): if FLAGS.platform not in ['tensorflow', 'caffe']: print ("platform %s is not supported." % FLAGS.platform) sys.exit(-1) - if FLAGS.runtime not in ['cpu', 'gpu', 'dsp', '']: + if FLAGS.runtime not in ['cpu', 'gpu', 'dsp', 'cpu+gpu']: print ("runtime %s is not supported." % FLAGS.runtime) sys.exit(-1) @@ -114,7 +127,6 @@ def main(unused_args): output_node.name = output_node_names[i] option.add_output_node(output_node) - print("Convert model to mace model.") if FLAGS.platform == 'tensorflow': converter = tensorflow_converter.TensorflowConverter( option, FLAGS.model_file) @@ -122,24 +134,18 @@ def main(unused_args): converter = caffe_converter.CaffeConverter(option, FLAGS.model_file, FLAGS.weight_file) + else: + print("Mace do not support platorm %s yet." & FLAGS.platform) + exit(1) output_graph_def = converter.run() - if FLAGS.gpu_data_type == 'half': - gpu_data_type = mace_pb2.DT_HALF - else: - gpu_data_type = mace_pb2.DT_FLOAT - device_data_type_map = { - cvt.DeviceType.CPU.value: mace_pb2.DT_FLOAT, - cvt.DeviceType.GPU.value: gpu_data_type, - cvt.DeviceType.HEXAGON.value: mace_pb2.DT_UINT8 - } - print("Transform model to one that can better run on device") - if not FLAGS.runtime: + if FLAGS.runtime == 'cpu+gpu': cpu_graph_def = copy.deepcopy(output_graph_def) option.device = cvt.DeviceType.CPU.value - option.data_type = device_data_type_map[cvt.DeviceType.CPU.value] + option.data_type = parse_data_type( + FLAGS.data_type, cvt.DeviceType.CPU.value) option.disable_transpose_filters() mace_cpu_transformer = transformer.Transformer( option, cpu_graph_def) @@ -149,7 +155,8 @@ def main(unused_args): print "CPU memory optimization done." option.device = cvt.DeviceType.GPU.value - option.data_type = device_data_type_map[cvt.DeviceType.GPU.value] + option.data_type = parse_data_type( + FLAGS.data_type, cvt.DeviceType.GPU.value) option.enable_transpose_filters() mace_gpu_transformer = transformer.Transformer( option, output_graph_def) @@ -165,7 +172,8 @@ def main(unused_args): print "Merge done" else: option.device = device_type_map[FLAGS.runtime] - option.data_type = device_data_type_map[option.device] + option.data_type = parse_data_type( + FLAGS.data_type, option.device) mace_transformer = transformer.Transformer( option, output_graph_def) output_graph_def = mace_transformer.run() @@ -180,36 +188,13 @@ def main(unused_args): print "Memory optimization done." - if FLAGS.obfuscate: - tensor_util.obfuscate_name(output_graph_def) - else: - tensor_util.rename_tensor(output_graph_def) - - tensor_infos, model_data = tensor_util.get_tensor_info_and_model_data( - output_graph_def, FLAGS.runtime, FLAGS.gpu_data_type) - - source_converter_lib.convert_to_source( - output_graph_def, model_checksum, weight_checksum, FLAGS.template, - FLAGS.obfuscate, FLAGS.model_tag, FLAGS.codegen_output, - FLAGS.runtime, FLAGS.embed_model_data, FLAGS.winograd, - FLAGS.model_load_type, tensor_infos, model_data) - - if not FLAGS.embed_model_data: - output_dir = os.path.dirname(FLAGS.codegen_output) + '/' - with open(output_dir + FLAGS.model_tag + '.data', "wb") as f: - f.write(bytearray(model_data)) - - if FLAGS.model_load_type == 'pb': - tensor_util.del_tensor_data( - output_graph_def, FLAGS.runtime, FLAGS.gpu_data_type) - tensor_util.update_tensor_data_type( - output_graph_def, FLAGS.runtime, FLAGS.gpu_data_type) - with open(FLAGS.pb_output, "wb") as f: - f.write(output_graph_def.SerializeToString()) - # with open(FLAGS.pb_output + '_txt', "wb") as f: - # # output_graph_def.ClearField('tensors') - # f.write(str(output_graph_def)) - print("Model conversion is completed.") + model_saver.save_model( + output_graph_def, model_checksum, weight_checksum, + FLAGS.template_dir, FLAGS.obfuscate, FLAGS.model_tag, + FLAGS.output_dir, FLAGS.runtime, + FLAGS.embed_model_data, + FLAGS.winograd, FLAGS.data_type, + FLAGS.model_build_type) def str2bool(v): @@ -244,15 +229,10 @@ def parse_args(): default="", help="Weight file sha256 checksum") parser.add_argument( - "--codegen_output", + "--output_dir", type=str, default="", help="File to save the output graph to.") - parser.add_argument( - "--pb_output", - type=str, - default="", - help="File to save the mace model to.") parser.add_argument( "--runtime", type=str, default="", help="Runtime: cpu/gpu/dsp") parser.add_argument( @@ -263,7 +243,7 @@ def parse_args(): parser.add_argument( "--output_node", type=str, default="softmax", help="e.g., softmax") parser.add_argument( - "--template", type=str, default="", help="template path") + "--template_dir", type=str, default="", help="template path") parser.add_argument( "--obfuscate", type=str2bool, @@ -295,13 +275,16 @@ def parse_args(): default=True, help="embed model data.") parser.add_argument( - "--model_load_type", + "--model_build_type", type=str, - default="source", - help="[source|pb] Load models in generated `source` code" + - "or `pb` file.") + default="code", + help="[proto|code] build models to code" + + "or `Protobuf` file.") parser.add_argument( - "--gpu_data_type", type=str, default="half", help="half/float") + "--data_type", + type=str, + default="fp16_fp32", + help="fp16_fp32/fp32_fp32") return parser.parse_known_args() diff --git a/mace/python/tools/converter_tool/tensorflow_converter.py b/mace/python/tools/converter_tool/tensorflow_converter.py index 5c806f41..19674f37 100644 --- a/mace/python/tools/converter_tool/tensorflow_converter.py +++ b/mace/python/tools/converter_tool/tensorflow_converter.py @@ -395,12 +395,6 @@ class TensorflowConverter(base_converter.ConverterInterface): align_corners_arg.i = tf_op.get_attr(tf_align_corners) def convert_space_batch(self, tf_op): - print """You might want to try 'flatten_atrous_conv' in - transform graph to turn atrous conv2d into regular conv2d. - This may give you performance benefit on GPU. - (see https://github.com/tensorflow/tensorflow/blob/master/ - tensorflow/tools/graph_transforms/README.md#flatten_atrous_conv) - """ op = self.convert_general_op(tf_op) del op.input[1:] diff --git a/mace/python/tools/converter_tool/transformer.py b/mace/python/tools/converter_tool/transformer.py index e9952ee8..c707e424 100644 --- a/mace/python/tools/converter_tool/transformer.py +++ b/mace/python/tools/converter_tool/transformer.py @@ -52,7 +52,7 @@ class Transformer(base_converter.ConverterInterface): """ def __init__(self, option, model): - # DO NOT reorder the following transformers + # DO NOT reorder the following transformers' order self._registered_transformers_order = [ TransformerRule.REMOVE_USELESS_RESHAPE_OP, TransformerRule.REMOVE_IDENTITY_OP, @@ -940,8 +940,9 @@ class Transformer(base_converter.ConverterInterface): op_def.type = MaceKeyword.mace_image_to_buffer op_def.input.extend([output_node.name]) op_def.output.extend([output_name]) - output_shape = op_def.output_shape.add() - output_shape.dims.extend(output_node.shape) + if output_node.shape: + output_shape = op_def.output_shape.add() + output_shape.dims.extend(output_node.shape) arg = op_def.arg.add() arg.name = MaceKeyword.mace_buffer_type diff --git a/mace/python/tools/encrypt_opencl_codegen.py b/mace/python/tools/encrypt_opencl_codegen.py index 1c227ab9..e5fb93c9 100644 --- a/mace/python/tools/encrypt_opencl_codegen.py +++ b/mace/python/tools/encrypt_opencl_codegen.py @@ -73,8 +73,6 @@ def encrypt_opencl_codegen(cl_kernel_dir, output_path): with open(output_path, "w") as w_file: w_file.write(cpp_cl_encrypted_kernel) - print("Generate encrypted opencl source done!") - def parse_args(): """Parses command line arguments.""" diff --git a/mace/python/tools/mace_engine_factory.h.jinja2 b/mace/python/tools/mace_engine_factory.h.jinja2 index b1c7879a..41df3fc6 100644 --- a/mace/python/tools/mace_engine_factory.h.jinja2 +++ b/mace/python/tools/mace_engine_factory.h.jinja2 @@ -25,7 +25,7 @@ namespace mace { -{% if model_type == 'source' %} +{% if model_type == 'code' %} {% for tag in model_tags %} namespace {{tag}} { diff --git a/mace/python/tools/mace_engine_factory_codegen.py b/mace/python/tools/mace_engine_factory_codegen.py index c744069a..f5ecafd0 100644 --- a/mace/python/tools/mace_engine_factory_codegen.py +++ b/mace/python/tools/mace_engine_factory_codegen.py @@ -25,7 +25,6 @@ def gen_mace_engine_factory(model_tags, template_dir, model_type, output_dir): j2_env = Environment( loader=FileSystemLoader(template_dir), trim_blocks=True) # generate mace_run BUILD file - print model_tags template_name = 'mace_engine_factory.h.jinja2' source = j2_env.get_template(template_name).render( model_tags=model_tags, diff --git a/mace/python/tools/model_saver.py b/mace/python/tools/model_saver.py new file mode 100644 index 00000000..f41a9479 --- /dev/null +++ b/mace/python/tools/model_saver.py @@ -0,0 +1,315 @@ +# Copyright 2018 Xiaomi, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import os +import uuid +import numpy as np +import hashlib +from enum import Enum + +from mace.proto import mace_pb2 +from jinja2 import Environment, FileSystemLoader + +GENERATED_NAME = set() + +GPUDataTypeStrs = [ + "fp16_fp32", + "fp32_fp32", +] + +GPUDataType = \ + Enum('GPUDataType', [(ele, ele) for ele in GPUDataTypeStrs], type=str) + + +def generate_obfuscated_name(namespace, name): + md5 = hashlib.md5() + md5.update(namespace) + md5.update(name) + md5_digest = md5.hexdigest() + + name = md5_digest[:8] + while name in GENERATED_NAME: + name = md5_digest + assert name not in GENERATED_NAME + GENERATED_NAME.add(name) + return name + + +def generate_tensor_map(tensors): + tensor_map = {} + for t in tensors: + if t.name not in tensor_map: + tensor_map[t.name] = generate_obfuscated_name("tensor", t.name) + return tensor_map + + +def generate_in_out_map(ops, tensor_map): + in_out_map = {} + for op in ops: + op.name = generate_obfuscated_name("op", op.name) + for input_name in op.input: + if input_name not in in_out_map: + if input_name in tensor_map: + in_out_map[input_name] = tensor_map[input_name] + else: + in_out_map[input_name] = generate_obfuscated_name( + "in", input_name) + for output_name in op.output: + if output_name not in in_out_map: + if output_name in tensor_map: + in_out_map[output_name] = tensor_map[output_name] + else: + in_out_map[output_name] = generate_obfuscated_name( + "out", output_name) + return in_out_map + + +def obfuscate_name(net_def): + input_node = "mace_input_node" + output_node = "mace_output_node" + tensor_map = generate_tensor_map(net_def.tensors) + in_out_map = generate_in_out_map(net_def.op, tensor_map) + for t in net_def.tensors: + if input_node not in t.name and output_node not in t.name: + t.name = tensor_map[t.name] + for op in net_def.op: + for i in range(len(op.input)): + if input_node not in op.input[i]: + op.input[i] = in_out_map[op.input[i]] + for i in range(len(op.output)): + if output_node not in op.output[i]: + op.output[i] = in_out_map[op.output[i]] + + +def normalize_op_name(op_name): + idx = op_name.rfind(':') + if idx == -1: + return op_name + else: + return op_name[:idx] + + +def rename_tensor(net_def): + tensor_map = {} + for t in net_def.tensors: + if t.name not in tensor_map: + tensor_map[t.name] = "_" + normalize_op_name(t.name).replace("/", + "_") + t.name = tensor_map[t.name] + for op in net_def.op: + for i in range(len(op.input)): + if op.input[i] in tensor_map: + op.input[i] = tensor_map[op.input[i]] + for i in range(len(op.output)): + if op.output[i] in tensor_map: + op.output[i] = tensor_map[op.output[i]] + + +def stringfy(value): + return ', '.join('"{0}"'.format(w) for w in value) + + +class TensorInfo: + def __init__(self, id, tensor): + self.id = id + self.data_type = tensor.data_type + if tensor.data_type == mace_pb2.DT_HALF: + self.data_type = mace_pb2.DT_HALF + self.data = bytearray( + np.array(tensor.float_data).astype(np.float16).tobytes()) + elif tensor.data_type == mace_pb2.DT_FLOAT: + self.data_type = mace_pb2.DT_FLOAT + self.data = bytearray( + np.array(tensor.float_data).astype(np.float32).tobytes()) + elif tensor.data_type == mace_pb2.DT_INT32: + self.data = bytearray( + np.array(tensor.int32_data).astype(np.int32).tobytes()) + elif tensor.data_type == mace_pb2.DT_UINT8: + self.data = bytearray( + np.array(tensor.int32_data).astype(np.uint8).tolist()) + else: + raise Exception('Tensor data type %s not supported' % + tensor.data_type) + + +def update_tensor_infos(net_def, runtime, data_type): + offset = 0 + counter = 0 + tensor_infos = [] + for tensor in net_def.tensors: + # update data_type + if tensor.data_type == mace_pb2.DT_FLOAT and runtime == 'gpu' \ + and data_type == GPUDataType.fp16_fp32: + tensor.data_type = mace_pb2.DT_HALF + + # Add offset and data_size + tensor_info = TensorInfo(counter, tensor) + tensor_infos.append(tensor_info) + # align + if tensor_info.data_type != 'DT_UINT8' and offset % 4 != 0: + padding = 4 - offset % 4 + offset += padding + + if tensor.data_type == mace_pb2.DT_FLOAT \ + or tensor.data_type == mace_pb2.DT_HALF: + tensor.data_size = len(tensor.float_data) + elif tensor.data_type == mace_pb2.DT_INT32: + tensor.data_size = len(tensor.int32_data) + elif tensor.data_type == mace_pb2.DT_UINT8: + tensor.data_size = len(tensor.int32_data) + tensor.offset = offset + offset += len(tensor_info.data) + counter += 1 + + +def extract_model_data(net_def): + model_data = [] + offset = 0 + counter = 0 + for tensor in net_def.tensors: + tensor_info = TensorInfo(counter, tensor) + # align + if tensor_info.data_type != mace_pb2.DT_UINT8 and offset % 4 != 0: + padding = 4 - offset % 4 + model_data.extend(bytearray([0] * padding)) + offset += padding + model_data.extend(tensor_info.data) + offset += len(tensor_info.data) + counter += 1 + return model_data + + +def save_model_data(net_def, model_tag, output_dir): + model_data = extract_model_data(net_def) + # generate tensor data + with open(output_dir + model_tag + '.data', "wb") as f: + f.write(bytearray(model_data)) + + +def save_model_to_proto(net_def, model_tag, output_dir): + for tensor in net_def.tensors: + if tensor.data_type == mace_pb2.DT_FLOAT \ + or tensor.data_type == mace_pb2.DT_HALF: + del tensor.float_data[:] + elif tensor.data_type == mace_pb2.DT_INT32: + del tensor.int32_data[:] + elif tensor.data_type == mace_pb2.DT_UINT8: + del tensor.int32_data[:] + proto_file_path = output_dir + model_tag + '.pb' + with open(proto_file_path, "wb") as f: + f.write(net_def.SerializeToString()) + with open(proto_file_path + '_txt', "wb") as f: + f.write(str(net_def)) + + +def save_model_to_code(net_def, model_tag, runtime, + template_dir, output_dir, embed_model_data, + model_checksum, weight_checksum, + obfuscate, winograd_conv): + # Create the jinja2 environment. + j2_env = Environment( + loader=FileSystemLoader(template_dir), trim_blocks=True) + j2_env.filters['stringfy'] = stringfy + + # generate tensor source files + template_name = 'tensor_source.jinja2' + + counter = 0 + for tensor in net_def.tensors: + tensor_info = TensorInfo(counter, tensor) + # convert tensor + source = j2_env.get_template(template_name).render( + tensor_info=tensor_info, + tensor=tensor, + tag=model_tag, + ) + with open(output_dir + 'tensor' + str(counter) + '.cc', "wb") as f: + f.write(source) + counter += 1 + + # generate tensor data + model_data = extract_model_data(net_def) + template_name = 'tensor_data.jinja2' + source = j2_env.get_template(template_name).render( + tag=model_tag, + embed_model_data=embed_model_data, + model_data_size=len(model_data), + model_data=model_data) + with open(output_dir + 'tensor_data' + '.cc', "wb") as f: + f.write(source) + + # generate op source files + template_name = 'operator.jinja2' + counter = 0 + op_size = len(net_def.op) + for start in range(0, op_size, 10): + source = j2_env.get_template(template_name).render( + start=start, + end=min(start + 10, op_size), + net=net_def, + tag=model_tag, + runtime=runtime, + ) + with open(output_dir + 'op' + str(counter) + '.cc', "wb") as f: + f.write(source) + counter += 1 + + # generate model source files + build_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + template_name = 'model.jinja2' + checksum = model_checksum + if weight_checksum is not None: + checksum = "{},{}".format(model_checksum, weight_checksum) + source = j2_env.get_template(template_name).render( + net=net_def, + tag=model_tag, + runtime=runtime, + obfuscate=obfuscate, + embed_model_data=embed_model_data, + winograd_conv=winograd_conv, + checksum=checksum, + build_time=build_time) + with open(output_dir + 'model.cc', "wb") as f: + f.write(source) + + # generate model header file + template_name = 'model_header.jinja2' + source = j2_env.get_template(template_name).render(tag=model_tag, ) + with open(output_dir + model_tag + '.h', "wb") as f: + f.write(source) + + +def save_model(net_def, model_checksum, weight_checksum, template_dir, + obfuscate, model_tag, output_dir, runtime, embed_model_data, + winograd_conv, data_type, model_build_type): + if obfuscate: + obfuscate_name(net_def) + else: + rename_tensor(net_def) + + output_dir = output_dir + '/' + # update tensor type + update_tensor_infos(net_def, runtime, data_type) + + if model_build_type == 'proto' or not embed_model_data: + save_model_data(net_def, model_tag, output_dir) + + if model_build_type == 'proto': + save_model_to_proto(net_def, model_tag, output_dir) + else: + save_model_to_code(net_def, model_tag, runtime, + template_dir, output_dir, embed_model_data, + model_checksum, weight_checksum, + obfuscate, winograd_conv) diff --git a/mace/python/tools/operator.jinja2 b/mace/python/tools/operator.jinja2 index 8fdf0b2c..6f682256 100644 --- a/mace/python/tools/operator.jinja2 +++ b/mace/python/tools/operator.jinja2 @@ -94,10 +94,11 @@ void CreateOperator{{i}}(mace::OperatorDef *op) { {% endfor %} + {% if net.op[i].output_shape|length > 0 %} op->mutable_output_shape()->Reserve({{ net.op[i].output_shape|length }}); mace::OutputShape * output_shape = nullptr; {% for shape in net.op[i].output_shape %} - {% if shape.dims|length > 0 %} + {% if shape.dims|length > 0 %} output_shape = op->add_output_shape(); output_shape->mutable_dims()->Reserve({{ shape.dims|length }}); @@ -105,8 +106,9 @@ void CreateOperator{{i}}(mace::OperatorDef *op) { output_shape->add_dims({{ dim }}); {% endfor %} - {% endif %} + {% endif %} {% endfor %} + {% endif %} std::vector output_types_int({ {{ net.op[i].output_type | join(', ') }} }); std::vector output_types({{ net.op[i].output_type | length }}); diff --git a/mace/python/tools/source_converter_lib.py b/mace/python/tools/source_converter_lib.py deleted file mode 100644 index 145d1d41..00000000 --- a/mace/python/tools/source_converter_lib.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright 2018 Xiaomi, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import datetime -import os - -from mace.proto import mace_pb2 -from jinja2 import Environment, FileSystemLoader - - -def stringfy(value): - return ', '.join('"{0}"'.format(w) for w in value) - - -def convert_to_source(net_def, model_checksum, weight_checksum, template_dir, - obfuscate, model_tag, output, runtime, embed_model_data, - winograd_conv, model_load_type, tensor_infos, - model_data): - # Capture our current directory - print template_dir - - # Create the jinja2 environment. - j2_env = Environment( - loader=FileSystemLoader(template_dir), trim_blocks=True) - j2_env.filters['stringfy'] = stringfy - output_dir = os.path.dirname(output) + '/' - # generate tensor source files - template_name = 'tensor_source.jinja2' - for i in range(len(net_def.tensors)): - if model_load_type == 'source': - source = j2_env.get_template(template_name).render( - tensor_info=tensor_infos[i], - tensor=net_def.tensors[i], - tag=model_tag, - ) - with open(output_dir + 'tensor' + str(i) + '.cc', "wb") as f: - f.write(source) - - if model_load_type == 'source': - # generate tensor data - template_name = 'tensor_data.jinja2' - source = j2_env.get_template(template_name).render( - tag=model_tag, - embed_model_data=embed_model_data, - model_data_size=len(model_data), - model_data=model_data) - with open(output_dir + 'tensor_data' + '.cc', "wb") as f: - f.write(source) - - # generate op source files - template_name = 'operator.jinja2' - counter = 0 - op_size = len(net_def.op) - for start in range(0, op_size, 10): - source = j2_env.get_template(template_name).render( - start=start, - end=min(start + 10, op_size), - net=net_def, - tag=model_tag, - runtime=runtime, - ) - with open(output_dir + 'op' + str(counter) + '.cc', "wb") as f: - f.write(source) - counter += 1 - - # generate model source files - build_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') - template_name = 'model.jinja2' - checksum = model_checksum - if weight_checksum is not None: - checksum = "{},{}".format(model_checksum, weight_checksum) - source = j2_env.get_template(template_name).render( - net=net_def, - tag=model_tag, - runtime=runtime, - obfuscate=obfuscate, - embed_model_data=embed_model_data, - winograd_conv=winograd_conv, - checksum=checksum, - build_time=build_time) - with open(output, "wb") as f: - f.write(source) - - # generate model header file - template_name = 'model_header.jinja2' - source = j2_env.get_template(template_name).render(tag=model_tag, ) - with open(output_dir + model_tag + '.h', "wb") as f: - f.write(source) diff --git a/mace/python/tools/tensor_util.py b/mace/python/tools/tensor_util.py deleted file mode 100644 index 61a5e90c..00000000 --- a/mace/python/tools/tensor_util.py +++ /dev/null @@ -1,173 +0,0 @@ -# Copyright 2018 Xiaomi, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import hashlib -import numpy as np - -from mace.proto import mace_pb2 - -GENERATED_NAME = set() - - -def generate_obfuscated_name(namespace, name): - md5 = hashlib.md5() - md5.update(namespace) - md5.update(name) - md5_digest = md5.hexdigest() - - name = md5_digest[:8] - while name in GENERATED_NAME: - name = md5_digest - assert name not in GENERATED_NAME - GENERATED_NAME.add(name) - return name - - -def generate_tensor_map(tensors): - tensor_map = {} - for t in tensors: - if t.name not in tensor_map: - tensor_map[t.name] = generate_obfuscated_name("tensor", t.name) - return tensor_map - - -def generate_in_out_map(ops, tensor_map): - in_out_map = {} - for op in ops: - op.name = generate_obfuscated_name("op", op.name) - for input_name in op.input: - if input_name not in in_out_map: - if input_name in tensor_map: - in_out_map[input_name] = tensor_map[input_name] - else: - in_out_map[input_name] = generate_obfuscated_name( - "in", input_name) - for output_name in op.output: - if output_name not in in_out_map: - if output_name in tensor_map: - in_out_map[output_name] = tensor_map[output_name] - else: - in_out_map[output_name] = generate_obfuscated_name( - "out", output_name) - return in_out_map - - -def obfuscate_name(net_def): - input_node = "mace_input_node" - output_node = "mace_output_node" - tensor_map = generate_tensor_map(net_def.tensors) - in_out_map = generate_in_out_map(net_def.op, tensor_map) - for t in net_def.tensors: - if input_node not in t.name and output_node not in t.name: - t.name = tensor_map[t.name] - for op in net_def.op: - for i in range(len(op.input)): - if input_node not in op.input[i]: - op.input[i] = in_out_map[op.input[i]] - for i in range(len(op.output)): - if output_node not in op.output[i]: - op.output[i] = in_out_map[op.output[i]] - - -def normalize_op_name(op_name): - idx = op_name.rfind(':') - if idx == -1: - return op_name - else: - return op_name[:idx] - - -def rename_tensor(net_def): - tensor_map = {} - for t in net_def.tensors: - if t.name not in tensor_map: - tensor_map[t.name] = "_" + normalize_op_name(t.name).replace("/", - "_") - t.name = tensor_map[t.name] - for op in net_def.op: - for i in range(len(op.input)): - if op.input[i] in tensor_map: - op.input[i] = tensor_map[op.input[i]] - for i in range(len(op.output)): - if op.output[i] in tensor_map: - op.output[i] = tensor_map[op.output[i]] - - -class TensorInfo: - def __init__(self, id, t, runtime, gpu_data_type): - self.id = id - self.data_type = mace_pb2.DataType.Name(t.data_type) - if t.data_type == mace_pb2.DT_FLOAT: - if runtime == 'gpu' and gpu_data_type == 'half': - self.data_type = mace_pb2.DT_HALF - self.data = bytearray( - np.array(t.float_data).astype(np.float16).tobytes()) - else: - self.data_type = mace_pb2.DT_FLOAT - self.data = bytearray( - np.array(t.float_data).astype(np.float32).tobytes()) - elif t.data_type == mace_pb2.DT_INT32: - self.data = bytearray( - np.array(t.int32_data).astype(np.int32).tobytes()) - elif t.data_type == mace_pb2.DT_UINT8: - self.data = bytearray( - np.array(t.int32_data).astype(np.uint8).tolist()) - else: - raise Exception('Tensor data type %s not supported' % t.data_type) - - -def get_tensor_info_and_model_data(net_def, runtime, gpu_data_type): - model_data = [] - offset = 0 - counter = 0 - tensor_infos = [] - for t in net_def.tensors: - tensor_info = TensorInfo(counter, t, runtime, gpu_data_type) - tensor_infos.append(tensor_info) - # align - if tensor_info.data_type != 'DT_UINT8' and offset % 4 != 0: - padding = 4 - offset % 4 - model_data.extend(bytearray([0] * padding)) - offset += padding - - if t.data_type == mace_pb2.DT_FLOAT: - t.data_size = len(t.float_data) - elif t.data_type == mace_pb2.DT_INT32: - t.data_size = len(t.int32_data) - elif t.data_type == mace_pb2.DT_UINT8: - t.data_size = len(t.int32_data) - t.offset = offset - - counter += 1 - model_data.extend(tensor_info.data) - offset += len(tensor_info.data) - - return tensor_infos, model_data - - -def del_tensor_data(net_def, runtime, gpu_data_type): - for t in net_def.tensors: - if t.data_type == mace_pb2.DT_FLOAT: - del t.float_data[:] - elif t.data_type == mace_pb2.DT_INT32: - del t.int32_data[:] - elif t.data_type == mace_pb2.DT_UINT8: - del t.int32_data[:] - - -def update_tensor_data_type(net_def, runtime, gpu_data_type): - for t in net_def.tensors: - if t.data_type == mace_pb2.DT_FLOAT and runtime == 'gpu' \ - and gpu_data_type == 'half': - t.data_type = mace_pb2.DT_HALF diff --git a/mace/utils/BUILD b/mace/utils/BUILD index b1d62e0d..69794347 100644 --- a/mace/utils/BUILD +++ b/mace/utils/BUILD @@ -31,28 +31,6 @@ cc_library( copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], deps = [ "//mace/public", - ], -) - -cc_library( - name = "utils_dev", - srcs = [ - "tuner_development.cc", - ], - copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], - deps = [ - ":utils", - ], -) - -cc_library( - name = "utils_prod", - srcs = [ - "tuner_production.cc", - ], - copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], - deps = [ - ":utils", "//mace/codegen:generated_tuning_params", ], ) @@ -70,7 +48,7 @@ cc_test( ]), linkstatic = 1, deps = [ - ":utils_dev", + ":utils", "@gtest//:gtest", "@gtest//:gtest_main", ], diff --git a/mace/utils/tuner.h b/mace/utils/tuner.h index 21522587..e2a83aac 100644 --- a/mace/utils/tuner.h +++ b/mace/utils/tuner.h @@ -29,10 +29,6 @@ namespace mace { -extern bool GetTuningParams( - const char *path, - std::unordered_map> *param_table); - template class Tuner { public: @@ -74,9 +70,6 @@ class Tuner { : ""); return func(param_table_[obfucated_param_key], nullptr, nullptr); } else { -#ifndef MACE_DISABLE_NO_TUNING_WARNING - LOG(WARNING) << "Fallback to default parameter: " << param_key; -#endif return func(default_param, nullptr, nullptr); } } @@ -124,9 +117,16 @@ class Tuner { } inline void ReadRunParamters() { - bool success = GetTuningParams(path_, ¶m_table_); - if (!success) { - LOG(WARNING) << "Get run parameter failed."; + extern const std::map> + kTuningParamsData; + if (!kTuningParamsData.empty()) { + for (auto it = kTuningParamsData.begin(); it != kTuningParamsData.end(); + ++it) { + param_table_.emplace(it->first, std::vector( + it->second.begin(), it->second.end())); + } + } else { + LOG(INFO) << "There is no tuned parameters."; } } diff --git a/tools/bazel.rc b/tools/bazel.rc new file mode 100644 index 00000000..9348ee1e --- /dev/null +++ b/tools/bazel.rc @@ -0,0 +1,57 @@ +# Partially borrowed from tensorflow tools/bazel.rc + +# By default, we don't distinct target and host platfroms. +# When doing cross compilation, use --config=cross_compile to distinct them. +build --distinct_host_configuration=false +build:cross_compile --distinct_host_configuration=true + +build --verbose_failures +build --copt=-std=c++11 +build --copt=-D_GLIBCXX_USE_C99_MATH_TR1 +build --copt=-DMACE_OBFUSCATE_LITERALS + +# Usage example: bazel build --config android +build:android --crosstool_top=//external:android/crosstool +build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain +build:android --config=cross_compile + +# Usage example: bazel build --config optimization +build:optimization -c opt +build:optimization --copt=-O3 +build:optimization --strip=always + +# Address sanitizer +build:asan --strip=never +build:asan --copt -fsanitize=address +build:asan --copt -D_FORTIFY_SOURCE +build:asan --copt -DADDRESS_SANITIZER +build:asan --copt -O0 +build:asan --copt -g +build:asan --copt -fno-omit-frame-pointer +build:asan --linkopt -fsanitize=address + +# Thread sanitizer +build:tsan --strip=never +build:tsan --copt -fsanitize=thread +build:tsan --copt -DTHREAD_SANITIZER +build:tsan --copt -DDYNAMIC_ANNOTATIONS_ENABLED=1 +build:tsan --copt -DDYNAMIC_ANNOTATIONS_EXTERNAL_IMPL=1 +build:tsan --copt -O0 +build:tsan --copt -fno-omit-frame-pointer +build:tsan --linkopt -fsanitize=thread + +# Memory sanitizer +build:msan --strip=never +build:msan --copt -fsanitize=memory +build:msan --copt -DADDRESS_SANITIZER +build:msan --copt -O0 +build:msan --copt -fno-omit-frame-pointer +build:msan --linkopt -fsanitize=memory + +# Undefined Behavior Sanitizer +build:ubsan --strip=never +build:ubsan --copt -fsanitize=undefined +build:ubsan --copt -O0 +build:ubsan --copt -fno-omit-frame-pointer +build:ubsan --linkopt -fsanitize=undefined +build:ubsan --linkopt -lubsan diff --git a/tools/bazel_adb_run.py b/tools/bazel_adb_run.py index 84cae554..c67c2a85 100644 --- a/tools/bazel_adb_run.py +++ b/tools/bazel_adb_run.py @@ -95,21 +95,6 @@ def parse_args(): type=str2bool, default=False, help="Whether to run the target") - parser.add_argument( - "--valgrind", - type=bool, - default=False, - help="Whether to use valgrind to check memory error.") - parser.add_argument( - "--valgrind_path", - type=str, - default="/data/local/tmp/valgrind", - help="Valgrind install path.") - parser.add_argument( - "--valgrind_args", - type=str, - default="", - help="Valgrind command args.") parser.add_argument("--args", type=str, default="", help="Command args") parser.add_argument( "--stdout_processor", @@ -121,6 +106,10 @@ def parse_args(): type=str2bool, default=True, help="Whether to use neon optimization") + parser.add_argument( + '--address_sanitizer', + action="store_true", + help="Whether to enable AddressSanitizer") return parser.parse_known_args() @@ -145,16 +134,17 @@ def main(unused_args): sh_commands.gen_encrypted_opencl_source() sh_commands.gen_compiled_opencl_source() sh_commands.gen_mace_version() + sh_commands.gen_tuning_param_code([]) strip = "always" debug = False - if FLAGS.valgrind: + if FLAGS.address_sanitizer: strip = "never" debug = True for target_abi in target_abis: - sh_commands.bazel_build(target, strip=strip, abi=target_abi, - disable_no_tuning_warning=True, debug=debug, - enable_neon=FLAGS.enable_neon) + sh_commands.bazel_build(target, abi=target_abi, + enable_neon=FLAGS.enable_neon, + address_sanitizer=FLAGS.address_sanitizer) if FLAGS.run_target: for serialno in target_devices: if target_abi not in set( @@ -162,28 +152,17 @@ def main(unused_args): print("Skip device %s which does not support ABI %s" % (serialno, target_abi)) continue - if FLAGS.valgrind: - stdouts = sh_commands.adb_run_valgrind( - serialno, - host_bin_path, - bin_name, - valgrind_path=FLAGS.valgrind_path, - valgrind_args=FLAGS.valgrind_args, - args=FLAGS.args, - opencl_profiling=1, - vlog_level=0, - device_bin_path="/data/local/tmp/mace", - out_of_range_check=1) - else: - stdouts = sh_commands.adb_run( - serialno, - host_bin_path, - bin_name, - args=FLAGS.args, - opencl_profiling=1, - vlog_level=0, - device_bin_path="/data/local/tmp/mace", - out_of_range_check=1) + stdouts = sh_commands.adb_run( + target_abi, + serialno, + host_bin_path, + bin_name, + args=FLAGS.args, + opencl_profiling=1, + vlog_level=0, + device_bin_path="/data/local/tmp/mace", + out_of_range_check=1, + address_sanitizer=FLAGS.address_sanitizer) device_properties = sh_commands.adb_getprop_by_serialno( serialno) globals()[FLAGS.stdout_processor](stdouts, device_properties, diff --git a/tools/common.py b/tools/common.py index b708fc50..5e3d1149 100644 --- a/tools/common.py +++ b/tools/common.py @@ -13,23 +13,99 @@ # limitations under the License. import enum -import logging import re ################################ # log ################################ -def init_logging(): - logger = logging.getLogger('MACE') - logger.setLevel(logging.INFO) +class CMDColors: + PURPLE = '\033[95m' + BLUE = '\033[94m' + GREEN = '\033[92m' + YELLOW = '\033[93m' + RED = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' - ch = logging.StreamHandler() - ch.setLevel(logging.INFO) - formatter = logging.Formatter( - '%(asctime)s [%(name)s] [%(levelname)s]: %(message)s') - ch.setFormatter(formatter) - logger.addHandler(ch) + +class MaceLogger: + @staticmethod + def header(message): + print CMDColors.PURPLE + message + CMDColors.ENDC + + @staticmethod + def summary(message): + print CMDColors.GREEN + message + CMDColors.ENDC + + @staticmethod + def info(message): + print message + + @staticmethod + def warning(message): + print CMDColors.YELLOW + 'WARNING:' + message + CMDColors.ENDC + + @staticmethod + def error(module, message): + print CMDColors.RED + 'ERROR: [' + module + '] '\ + + message + CMDColors.ENDC + exit(1) + + +def mace_check(condition, module, message): + if not condition: + MaceLogger.error(module, message) + + +################################ +# String Formatter +################################ +class StringFormatter: + @staticmethod + def table(header, data, title, align="R"): + data_size = len(data) + column_size = len(header) + column_length = [len(str(ele)) + 1 for ele in header] + for row_idx in range(data_size): + data_tuple = data[row_idx] + ele_size = len(data_tuple) + assert(ele_size == column_size) + for i in range(ele_size): + column_length[i] = max(column_length[i], + len(str(data_tuple[i])) + 1) + + table_column_length = sum(column_length) + column_size + 1 + dash_line = '-' * table_column_length + '\n' + header_line = '=' * table_column_length + '\n' + output = "" + output += dash_line + output += str(title).center(table_column_length) + '\n' + output += dash_line + output += '|' + '|'.join([str(header[i]).center(column_length[i]) + for i in range(column_size)]) + '|\n' + output += header_line + + for data_tuple in data: + ele_size = len(data_tuple) + row_list = [] + for i in range(ele_size): + if align == "R": + row_list.append(str(data_tuple[i]).rjust(column_length[i])) + elif align == "L": + row_list.append(str(data_tuple[i]).ljust(column_length[i])) + elif align == "C": + row_list.append(str(data_tuple[i]) + .center(column_length[i])) + output += '|' + '|'.join(row_list) + "|\n" + dash_line + return output + + @staticmethod + def block(message): + line_length = 10 + len(str(message)) + 10 + star_line = '*' * line_length + '\n' + return star_line + str(message).center(line_length) + '\n' + star_line ################################ diff --git a/tools/example.yaml b/tools/example.yaml deleted file mode 100644 index b0fda8b3..00000000 --- a/tools/example.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# example.yaml -# Each yaml file describes a exported library (could be named [target_abi]/libmace-${filename}.a), -# which can contains more than one models -# target_soc can get by `adb shell getprop | grep ro.board.platform | cut -d [ -f3 | cut -d ] -f1` -target_abis: [armeabi-v7a, arm64-v8a] -target_socs: [MSM8953] -embed_model_data: 1 -models: - preview_net: - platform: tensorflow - model_file_path: path/to/model64.pb # also support http:// and https:// - model_sha256_checksum: 05d92625809dc9edd6484882335c48c043397aed450a168d75eb8b538e86881a - input_nodes: input_node - output_nodes: output_node - input_shapes: 1,64,64,3 - output_shapes: 1,64,64,2 - runtime: gpu - limit_opencl_kernel_time: 0 - dsp_mode: 0 - obfuscate: 1 - fast_conv: 0 - validation_inputs_data: - - path/to/input_files - capture_net: - platform: caffe - model_file_path: path/to/model.prototxt - weight_file_path: path/to/weight.caffemodel - model_sha256_checksum: 05d92625809dc9edd6484882335c48c043397aed450a168d75eb8b538e86881a - weight_sha256_checksum: 05d92625809dc9edd6484882335c48c043397aed450a168d75eb8b538e86881a - input_nodes: - - input_node0 - - input_node1 - output_nodes: - - output_node0 - - output_node1 - input_shapes: - - 1,256,256,3 - - 1,128,128,3 - output_shapes: - - 1,256,256,2 - - 1,1,1,2 - runtime: cpu - limit_opencl_kernel_time: 1 - dsp_mode: 0 - obfuscate: 1 - fast_conv: 0 diff --git a/tools/mace_tools.py b/tools/mace_tools.py index f831ba51..7bafee33 100644 --- a/tools/mace_tools.py +++ b/tools/mace_tools.py @@ -18,7 +18,6 @@ # --mode=all import argparse -import enum import filelock import hashlib import os @@ -28,48 +27,143 @@ import sys import urllib import yaml import re +from enum import Enum -import common import sh_commands +from sh_commands import BuildType -from ConfigParser import ConfigParser +from common import CaffeEnvType +from common import mace_check +from common import MaceLogger +from common import StringFormatter - -def get_target_socs(configs): - if "host" in configs["target_abis"]: - return [""] - else: - available_socs = sh_commands.adb_get_all_socs() - target_socs = available_socs - if "target_socs" in configs: - target_socs = set(configs["target_socs"]) - target_socs = target_socs & available_socs - - if FLAGS.target_socs != "all": - socs = set(FLAGS.target_socs.split(',')) - target_socs = target_socs & socs - missing_socs = socs.difference(target_socs) - if len(missing_socs) > 0: - print( - "Error: devices with SoCs are not connected %s" % - missing_socs) - exit(1) - - if not target_socs: - print("Error: no device to run") - exit(1) - - return target_socs +################################ +# common definitions +################################ +BUILD_OUTPUT_DIR = 'build' +PHONE_DATA_DIR = "/data/local/tmp/mace_run/" +MODEL_OUTPUT_DIR_NAME = 'model' +BUILD_TMP_DIR_NAME = '_tmp' +BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general' +OUTPUT_LIBRARY_DIR_NAME = 'library' + +ABITypeStrs = [ + "armeabi-v7a", + "arm64-v8a", + "host", +] +ABIType = Enum('ABIType', [(ele, ele) for ele in ABITypeStrs], type=str) + + +PlatformTypeStrs = [ + "tensorflow", + "caffe", +] +PlatformType = Enum('PlatformType', [(ele, ele) for ele in PlatformTypeStrs], + type=str) + +RuntimeTypeStrs = [ + "cpu", + "gpu", + "dsp", + "cpu+gpu" +] + + +class RuntimeType(object): + cpu = 'cpu' + gpu = 'gpu' + dsp = 'dsp' + cpu_gpu = 'cpu+gpu' + + +CPUDataTypeStrs = [ + "fp32", +] + +CPUDataType = Enum('CPUDataType', [(ele, ele) for ele in CPUDataTypeStrs], + type=str) + +GPUDataTypeStrs = [ + "fp16_fp32", + "fp32_fp32", +] + +GPUDataType = Enum('GPUDataType', [(ele, ele) for ele in GPUDataTypeStrs], + type=str) + + +class DefaultValues(object): + omp_num_threads = -1, + cpu_affinity_policy = 1, + gpu_perf_hint = 3, + gpu_priority_hint = 3, + + +class YAMLKeyword(object): + library_name = 'library_name' + target_abis = 'target_abis' + target_socs = 'target_socs' + build_type = 'build_type' + embed_model_data = 'embed_model_data' + models = 'models' + platform = 'platform' + model_file_path = 'model_file_path' + model_sha256_checksum = 'model_sha256_checksum' + weight_file_path = 'weight_file_path' + weight_sha256_checksum = 'weight_sha256_checksum' + subgraphs = 'subgraphs' + input_tensors = 'input_tensors' + input_shapes = 'input_shapes' + output_tensors = 'output_tensors' + output_shapes = 'output_shapes' + runtime = 'runtime' + data_type = 'data_type' + limit_opencl_kernel_time = 'limit_opencl_kernel_time' + nnlib_graph_mode = 'nnlib_graph_mode' + obfuscate = 'obfuscate' + winograd = 'winograd' + validation_inputs_data = 'validation_inputs_data' + + +class ModuleName(object): + YAML_CONFIG = 'YAML CONFIG' + MODEL_CONVERTER = 'Model Converter' + + +CPP_KEYWORDS = [ + 'alignas', 'alignof', 'and', 'and_eq', 'asm', 'atomic_cancel', + 'atomic_commit', 'atomic_noexcept', 'auto', 'bitand', 'bitor', + 'bool', 'break', 'case', 'catch', 'char', 'char16_t', 'char32_t', + 'class', 'compl', 'concept', 'const', 'constexpr', 'const_cast', + 'continue', 'co_await', 'co_return', 'co_yield', 'decltype', 'default', + 'delete', 'do', 'double', 'dynamic_cast', 'else', 'enum', 'explicit', + 'export', 'extern', 'false', 'float', 'for', 'friend', 'goto', 'if', + 'import', 'inline', 'int', 'long', 'module', 'mutable', 'namespace', + 'new', 'noexcept', 'not', 'not_eq', 'nullptr', 'operator', 'or', 'or_eq', + 'private', 'protected', 'public', 'register', 'reinterpret_cast', + 'requires', 'return', 'short', 'signed', 'sizeof', 'static', + 'static_assert', 'static_cast', 'struct', 'switch', 'synchronized', + 'template', 'this', 'thread_local', 'throw', 'true', 'try', 'typedef', + 'typeid', 'typename', 'union', 'unsigned', 'using', 'virtual', 'void', + 'volatile', 'wchar_t', 'while', 'xor', 'xor_eq', 'override', 'final', + 'transaction_safe', 'transaction_safe_dynamic', 'if', 'elif', 'else', + 'endif', 'defined', 'ifdef', 'ifndef', 'define', 'undef', 'include', + 'line', 'error', 'pragma', +] +################################ +# common functions +################################ def parse_device_type(runtime): device_type = "" - if runtime == "dsp": + if runtime == RuntimeType.dsp: device_type = "HEXAGON" - elif runtime == "gpu": + elif runtime == RuntimeType.gpu: device_type = "GPU" - elif runtime == "cpu": + elif runtime == RuntimeType.cpu: device_type = "CPU" return device_type @@ -81,224 +175,272 @@ def get_hexagon_mode(configs): model_runtime = configs["models"][model_name].get("runtime", "") runtime_list.append(model_runtime.lower()) - global_runtime = "" if "dsp" in runtime_list: return True return False -def gen_opencl_and_tuning_code(target_abi, - serialno, - model_output_dirs, - pull_or_not): - cl_built_kernel_file_name = "mace_cl_compiled_program.bin" - cl_platform_info_file_name = "mace_cl_platform_info.txt" - if pull_or_not: - sh_commands.pull_binaries(target_abi, serialno, model_output_dirs, - cl_built_kernel_file_name, - cl_platform_info_file_name) - - # generate opencl binary code - sh_commands.gen_opencl_binary_code(model_output_dirs, - cl_built_kernel_file_name, - cl_platform_info_file_name) +def md5sum(str): + md5 = hashlib.md5() + md5.update(str) + return md5.hexdigest() - sh_commands.gen_tuning_param_code(model_output_dirs) +def sha256_checksum(fname): + hash_func = hashlib.sha256() + with open(fname, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_func.update(chunk) + return hash_func.hexdigest() -def model_benchmark_stdout_processor(stdout, - abi, - serialno, - model_name, - device_type): - metrics = [0] * 3 - for line in stdout.split('\n'): - line = line.strip() - parts = line.split() - if len(parts) == 4 and parts[0].startswith("time"): - metrics[0] = str(float(parts[1])) - metrics[1] = str(float(parts[2])) - metrics[2] = str(float(parts[3])) - break - device_name = "" - target_soc = "" - if abi != "host": - props = sh_commands.adb_getprop_by_serialno(serialno) - device_name = props.get("ro.product.model", "") - target_soc = props.get("ro.board.platform", "") +def format_model_config(config_file_path): + with open(config_file_path) as f: + configs = yaml.load(f) - report_filename = FLAGS.output_dir + "/report.csv" - if not os.path.exists(report_filename): - with open(report_filename, 'w') as f: - f.write("model_name,device_name,soc,abi,runtime," - "init,warmup,run_avg\n") + library_name = configs.get(YAMLKeyword.library_name, "") + mace_check(len(library_name) > 0, + ModuleName.YAML_CONFIG, "library name shuold not be empty") + + target_abis = configs.get(YAMLKeyword.target_abis, []) + mace_check((isinstance(target_abis, list) and len(target_abis) > 0), + ModuleName.YAML_CONFIG, "target_abis list is needed") + for abi in target_abis: + mace_check(abi in ABITypeStrs, + ModuleName.YAML_CONFIG, + "target_abis must be in " + str(ABITypeStrs)) + + target_socs = configs.get(YAMLKeyword.target_socs, "") + if not target_socs: + configs[YAMLKeyword.target_socs] = [] + elif not isinstance(target_socs, list): + configs[YAMLKeyword.target_socs] = [target_socs] + + if ABIType.host not in target_abis: + available_socs = sh_commands.adb_get_all_socs() + if YAMLKeyword.target_socs in configs: + target_socs = set(configs[YAMLKeyword.target_socs]) + for soc in target_socs: + mace_check(soc in available_socs, + ModuleName.YAML_CONFIG, + "Build specified SOC library, " + "you must plug in a phone using the SOC") + + build_type = BuildType.code + build_type_str = configs.get(YAMLKeyword.build_type, "") + if build_type_str == BuildType.proto: + build_type = BuildType.proto + elif build_type_str == BuildType.code: + build_type = BuildType.code + else: + MaceLogger.error(ModuleName.YAML_CONFIG, + "Invalid build type " + build_type_str + + ". only support [proto|code] format, " + + "proto for converting model to ProtoBuf file, " + + "code for converting model to c++ code.") + + embed_model_data = configs.get(YAMLKeyword.embed_model_data, "") + if embed_model_data == "" or not isinstance(embed_model_data, int) or \ + embed_model_data < 0 or embed_model_data > 1: + MaceLogger.error(ModuleName.YAML_CONFIG, + "embed_model_data must be 0 or 1. " + "0 for embed model data to code, 1 not.") + if build_type == BuildType.proto: + configs[YAMLKeyword.embed_model_data] = 0 + + model_names = configs.get(YAMLKeyword.models, []) + mace_check(len(model_names) > 0, ModuleName.YAML_CONFIG, + "no model found in config file") + + model_name_reg = re.compile(r'^[a-z0-9_]+$') + for model_name in model_names: + # check model_name legality + mace_check(model_name not in CPP_KEYWORDS, + ModuleName.YAML_CONFIG, + "model name should not be c++ keyword.") + mace_check((model_name[0] == '_' or model_name[0].isalpha()) + and bool(model_name_reg.match(model_name)), + ModuleName.YAML_CONFIG, + "model name shuold Meet the c++ naming convention" + " which start with '_' or alpha" + " and only contain alpha, number and '_'") + + model_config = configs[YAMLKeyword.models][model_name] + platform = model_config.get(YAMLKeyword.platform, "") + mace_check(platform in PlatformTypeStrs, + ModuleName.YAML_CONFIG, + "'platform' must be in " + str(PlatformTypeStrs)) + + for key in [YAMLKeyword.model_file_path, + YAMLKeyword.model_sha256_checksum]: + value = model_config.get(key, "") + mace_check(value != "", ModuleName.YAML_CONFIG, + "'%s' is necessary" % key) + + weight_file_path = model_config.get(YAMLKeyword.weight_file_path, "") + if weight_file_path: + weight_checksum =\ + model_config.get(YAMLKeyword.weight_sha256_checksum, "") + mace_check(weight_checksum != "", ModuleName.YAML_CONFIG, + "'%s' is necessary" % + YAMLKeyword.weight_sha256_checksum) + else: + model_config[YAMLKeyword.weight_sha256_checksum] = "" + + runtime = model_config.get(YAMLKeyword.runtime, "") + mace_check(runtime in RuntimeTypeStrs, + ModuleName.YAML_CONFIG, + "'runtime' must be in " + str(RuntimeTypeStrs)) + if ABIType.host in target_abis: + mace_check(runtime == RuntimeType.cpu, + ModuleName.YAML_CONFIG, + "host only support cpu runtime now.") + + data_type = model_config.get(YAMLKeyword.data_type, "") + if runtime == RuntimeType.cpu_gpu and data_type not in GPUDataTypeStrs: + model_config[YAMLKeyword.data_type] = \ + GPUDataType.fp16_fp32.value + elif runtime == RuntimeType.cpu: + if len(data_type) > 0: + mace_check(data_type in CPUDataTypeStrs, + ModuleName.YAML_CONFIG, + "'data_type' must be in " + str(CPUDataTypeStrs) + + " for cpu runtime") + else: + model_config[YAMLKeyword.data_type] = \ + CPUDataType.fp32.value + elif runtime == RuntimeType.gpu: + if len(data_type) > 0: + mace_check(data_type in GPUDataTypeStrs, + ModuleName.YAML_CONFIG, + "'data_type' must be in " + str(GPUDataTypeStrs) + + " for gpu runtime") + else: + model_config[YAMLKeyword.data_type] =\ + GPUDataType.fp16_fp32.value + + subgraphs = model_config.get(YAMLKeyword.subgraphs, "") + mace_check(len(subgraphs) > 0, ModuleName.YAML_CONFIG, + "at least one subgraph is needed") + + for subgraph in subgraphs: + for key in [YAMLKeyword.input_tensors, + YAMLKeyword.input_shapes, + YAMLKeyword.output_tensors, + YAMLKeyword.output_shapes]: + value = subgraph.get(key, "") + mace_check(value != "", ModuleName.YAML_CONFIG, + "'%s' is necessary in subgraph" % key) + if not isinstance(value, list): + subgraph[key] = [value] - data_str = "{model_name},{device_name},{soc},{abi},{device_type}," \ - "{init},{warmup},{run_avg}\n" \ - .format( - model_name=model_name, - device_name=device_name, - soc=target_soc, - abi=abi, - device_type=device_type, - init=metrics[0], - warmup=metrics[1], - run_avg=metrics[2] - ) - with open(report_filename, 'a') as f: - f.write(data_str) + for key in [YAMLKeyword.limit_opencl_kernel_time, + YAMLKeyword.nnlib_graph_mode, + YAMLKeyword.obfuscate, + YAMLKeyword.winograd]: + value = model_config.get(key, "") + if value == "": + model_config[key] = 0 + validation_inputs_data = model_config.get("validation_inputs_data", + []) + model_config["validation_inputs_data"] = validation_inputs_data + if not isinstance(validation_inputs_data, list): + model_config["validation_inputs_data"] = [ + validation_inputs_data] -def tuning_run(target_abi, - serialno, - vlog_level, - embed_model_data, - model_output_dir, - input_nodes, - output_nodes, - input_shapes, - output_shapes, - mace_model_dir, - model_name, - device_type, - running_round, - restart_round, - out_of_range_check, - phone_data_dir, - tuning=False, - limit_opencl_kernel_time=0, - omp_num_threads=-1, - cpu_affinity_policy=1, - gpu_perf_hint=3, - gpu_priority_hint=3, - runtime_failure_ratio=0.0): - stdout = sh_commands.tuning_run( - target_abi, - serialno, - vlog_level, - embed_model_data, - model_output_dir, - input_nodes, - output_nodes, - input_shapes, - output_shapes, - mace_model_dir, - model_name, - device_type, - running_round, - restart_round, - limit_opencl_kernel_time, - tuning, - out_of_range_check, - phone_data_dir, - omp_num_threads, - cpu_affinity_policy, - gpu_perf_hint, - gpu_priority_hint, - runtime_failure_ratio, - valgrind=FLAGS.valgrind, - valgrind_path=FLAGS.valgrind_path, - valgrind_args=FLAGS.valgrind_args - ) + weight_file_path = model_config.get("weight_file_path", "") + model_config["weight_file_path"] = weight_file_path - if running_round > 0 and FLAGS.collect_report: - model_benchmark_stdout_processor( - stdout, target_abi, serialno, model_name, device_type) + return configs -def build_mace_run_prod(hexagon_mode, runtime, target_abi, serialno, - vlog_level, embed_model_data, model_load_type, - model_output_dir, input_nodes, output_nodes, - input_shapes, output_shapes, mace_model_dir, - model_name, device_type, running_round, restart_round, - tuning, limit_opencl_kernel_time, phone_data_dir, - enable_openmp): - mace_run_target = "//mace/tools/validation:mace_run" - strip = "always" - debug = False - if FLAGS.valgrind: - strip = "never" - debug = True - - if not runtime or runtime == "gpu": - gen_opencl_and_tuning_code(target_abi, serialno, [], False) - sh_commands.bazel_build( - mace_run_target, - abi=target_abi, - production_mode=False, - hexagon_mode=hexagon_mode, - enable_openmp=enable_openmp - ) - sh_commands.update_mace_run_lib(model_output_dir, model_load_type, - model_name, embed_model_data) - - device_type = parse_device_type("gpu") - tuning_run(target_abi, serialno, vlog_level, embed_model_data, - model_output_dir, input_nodes, output_nodes, input_shapes, - output_shapes, mace_model_dir, model_name, device_type, - running_round=0, restart_round=1, out_of_range_check=False, - phone_data_dir=phone_data_dir, tuning=tuning, - limit_opencl_kernel_time=limit_opencl_kernel_time) - - gen_opencl_and_tuning_code(target_abi, serialno, [model_output_dir], - True) - sh_commands.bazel_build( - mace_run_target, - strip, - abi=target_abi, - production_mode=True, - hexagon_mode=hexagon_mode, - debug=debug, - enable_openmp=enable_openmp - ) - sh_commands.update_mace_run_lib(model_output_dir, model_load_type, - model_name, embed_model_data) +def get_build_binary_dir(library_name, target_abi, target_soc, + serial_num): + if not target_soc or not serial_num: + binary_path_digest = md5sum(target_abi) else: - gen_opencl_and_tuning_code(target_abi, serialno, [], False) - sh_commands.bazel_build( - mace_run_target, - strip, - abi=target_abi, - production_mode=True, - hexagon_mode=hexagon_mode, - debug=debug, - enable_openmp=enable_openmp - ) - sh_commands.update_mace_run_lib(model_output_dir, model_load_type, - model_name, embed_model_data) - - -def merge_libs_and_tuning_results(target_soc, - target_abi, - serialno, - project_name, - output_dir, - model_output_dirs, - mace_model_dirs_kv, - model_load_type, - hexagon_mode, - embed_model_data): - gen_opencl_and_tuning_code( - target_abi, serialno, model_output_dirs, False) - sh_commands.build_production_code(model_load_type, target_abi) + device_name = sh_commands.adb_get_device_name_by_serialno(serial_num)\ + .replace(' ', '') + binary_path_digest = md5sum(target_abi + target_soc + serial_num) + binary_path_digest = "%s_%s_%s" % \ + (device_name, target_soc, binary_path_digest) + return "%s/%s/%s/%s" % ( + BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME, binary_path_digest) + + +def get_build_model_dirs(library_name, model_name, target_abi, target_soc, + serial_num, model_file_path): + model_path_digest = md5sum(model_file_path) + model_output_base_dir = "%s/%s/%s/%s/%s" % ( + BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME, + model_name, model_path_digest) + + if target_abi == ABIType.host: + model_output_dir = "%s/%s" % (model_output_base_dir, target_abi) + elif not target_soc or not serial_num: + model_output_dir = "%s/%s/%s" % ( + model_output_base_dir, BUILD_TMP_GENERAL_OUTPUT_DIR_NAME, + target_abi) + else: + device_name = \ + sh_commands.adb_get_device_name_by_serialno(serial_num) + model_output_dir = "%s/%s_%s/%s" % ( + model_output_base_dir, device_name.replace(' ', ''), + target_soc, target_abi) - sh_commands.merge_libs(target_soc, - target_abi, - project_name, - output_dir, - model_output_dirs, - mace_model_dirs_kv, - model_load_type, - hexagon_mode, - embed_model_data) + mace_model_dir = \ + '%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME) + + return model_output_base_dir, model_output_dir, mace_model_dir + + +################################ +# build +################################ +def pull_opencl_binary_and_tuning_param(target_abi, + serialno, + model_output_dirs): + cl_built_kernel_file_name = "mace_cl_compiled_program.bin" + cl_platform_info_file_name = "mace_cl_platform_info.txt" + sh_commands.pull_binaries(target_abi, serialno, model_output_dirs, + cl_built_kernel_file_name, + cl_platform_info_file_name) + + +def gen_opencl_and_tuning_code(model_output_dirs): + cl_built_kernel_file_name = "mace_cl_compiled_program.bin" + cl_platform_info_file_name = "mace_cl_platform_info.txt" + + # generate opencl binary code + sh_commands.gen_opencl_binary_code(model_output_dirs, + cl_built_kernel_file_name, + cl_platform_info_file_name) + + sh_commands.gen_tuning_param_code(model_output_dirs) + + +def print_configuration(flags, configs): + title = "Common Configuration" + header = ["key", "value"] + data = list() + data.append([YAMLKeyword.library_name, + configs[YAMLKeyword.library_name]]) + data.append([YAMLKeyword.target_abis, + configs[YAMLKeyword.target_abis]]) + data.append([YAMLKeyword.target_socs, + configs[YAMLKeyword.target_socs]]) + data.append([YAMLKeyword.build_type, + configs[YAMLKeyword.build_type]]) + data.append([YAMLKeyword.embed_model_data, + configs[YAMLKeyword.embed_model_data]]) + data.append(["Tuning", flags.tuning]) + MaceLogger.summary(StringFormatter.table(header, data, title)) def download_model_files(model_file_path, model_output_dir, weight_file_path=""): - model_file = "" - weight_file = "" if model_file_path.startswith("http://") or \ model_file_path.startswith("https://"): model_file = model_output_dir + "/model.pb" @@ -313,8 +455,6 @@ def download_model_files(model_file_path, def get_model_files_path(model_file_path, model_output_dir, weight_file_path=""): - model_file = "" - weight_file = "" if model_file_path.startswith("http://") or \ model_file_path.startswith("https://"): model_file = model_output_dir + "/model.pb" @@ -330,10 +470,540 @@ def get_model_files_path(model_file_path, return model_file, weight_file -def md5sum(str): - md5 = hashlib.md5() - md5.update(str) - return md5.hexdigest() +def convert_model(configs): + # Remove previous output dirs + library_name = configs[YAMLKeyword.library_name] + if not os.path.exists(BUILD_OUTPUT_DIR): + os.makedirs(BUILD_OUTPUT_DIR) + elif not os.path.exists(os.path.join(BUILD_OUTPUT_DIR, library_name)): + os.makedirs(os.path.join(BUILD_OUTPUT_DIR, library_name)) + + model_output_dir = \ + '%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME) + if os.path.exists(model_output_dir): + sh.rm("-rf", model_output_dir) + os.makedirs(model_output_dir) + + embed_model_data = configs[YAMLKeyword.embed_model_data] + + sh_commands.clear_model_codegen() + for model_name in configs[YAMLKeyword.models]: + MaceLogger.header( + StringFormatter.block("Convert %s model" % model_name)) + model_config = configs[YAMLKeyword.models][model_name] + runtime = model_config[YAMLKeyword.runtime] + + # Create model build directory + model_path_digest = md5sum( + model_config[YAMLKeyword.model_file_path]) + + model_output_base_dir = "%s/%s/%s/%s/%s" % ( + BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME, + model_name, model_path_digest) + + if os.path.exists(model_output_base_dir): + sh.rm("-rf", model_output_base_dir) + os.makedirs(model_output_base_dir) + + download_model_files( + model_config[YAMLKeyword.model_file_path], + model_output_base_dir, + model_config[YAMLKeyword.weight_file_path]) + + model_file_path, weight_file_path = get_model_files_path( + model_config[YAMLKeyword.model_file_path], + model_output_base_dir, + model_config[YAMLKeyword.weight_file_path]) + + if sha256_checksum(model_file_path) != \ + model_config[YAMLKeyword.model_sha256_checksum]: + MaceLogger.error(ModuleName.MODEL_CONVERTER, + "model file sha256checksum not match") + + if weight_file_path: + if sha256_checksum(weight_file_path) != \ + model_config[YAMLKeyword.weight_sha256_checksum]: + MaceLogger.error(ModuleName.MODEL_CONVERTER, + "weight file sha256checksum not match") + + data_type = model_config[YAMLKeyword.data_type] + if ABIType.host.value in configs[YAMLKeyword.target_abis]: + data_type = CPUDataType.fp32.value + # TODO(liuqi): support multiple subgraphs + subgraphs = model_config[YAMLKeyword.subgraphs] + + model_codegen_dir = "mace/codegen/models/%s" % model_name + sh_commands.gen_model_code( + model_codegen_dir, + model_config[YAMLKeyword.platform], + model_file_path, + weight_file_path, + model_config[YAMLKeyword.model_sha256_checksum], + model_config[YAMLKeyword.weight_sha256_checksum], + ",".join(subgraphs[0][YAMLKeyword.input_tensors]), + ",".join(subgraphs[0][YAMLKeyword.output_tensors]), + runtime, + model_name, + ":".join(subgraphs[0][YAMLKeyword.input_shapes]), + model_config[YAMLKeyword.nnlib_graph_mode], + embed_model_data, + model_config[YAMLKeyword.winograd], + model_config[YAMLKeyword.obfuscate], + configs[YAMLKeyword.build_type], + data_type) + + # mv pb and data file to build/model_name/model + if not embed_model_data: + sh_commands.mv_model_file_to_output_dir( + model_build_type=configs[YAMLKeyword.build_type], + model_codegen_dir=model_codegen_dir, + model_name=model_name, + output_dir=model_output_dir + ) + + MaceLogger.header( + StringFormatter.block("Model %s converted" % model_name)) + + +def build_specific_lib(target_abi, target_soc, serial_num, + configs, tuning, enable_openmp, + address_sanitizer): + mace_run_target = "//mace/tools/validation:mace_run" + library_name = configs[YAMLKeyword.library_name] + build_type = configs[YAMLKeyword.build_type] + embed_model_data = configs[YAMLKeyword.embed_model_data] + hexagon_mode = get_hexagon_mode(configs) + model_output_dirs = [] + + build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi, + target_soc, serial_num) + if os.path.exists(build_tmp_binary_dir): + sh.rm("-rf", build_tmp_binary_dir) + os.makedirs(build_tmp_binary_dir) + + gen_opencl_and_tuning_code([]) + sh_commands.bazel_build( + mace_run_target, + abi=target_abi, + hexagon_mode=hexagon_mode, + enable_openmp=enable_openmp, + address_sanitizer=address_sanitizer + ) + sh_commands.update_mace_run_lib(build_tmp_binary_dir) + binary_changed = False + + for model_name in configs[YAMLKeyword.models]: + model_config = configs[YAMLKeyword.models][model_name] + model_runtime = model_config[YAMLKeyword.runtime] + # Create model build directory + model_output_base_dir, model_output_dir, mace_model_dir = \ + get_build_model_dirs(library_name, model_name, target_abi, + target_soc, serial_num, + model_config[YAMLKeyword.model_file_path]) + + model_output_dirs.append(model_output_dir) + + if os.path.exists(model_output_dir): + sh.rm("-rf", model_output_dir) + os.makedirs(model_output_dir) + + # build for specified soc + if not address_sanitizer and target_abi != ABIType.host \ + and target_soc is not None and \ + model_runtime in [RuntimeType.gpu, RuntimeType.cpu_gpu]: + sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR) + + subgraphs = model_config[YAMLKeyword.subgraphs] + # generate input data + input_file_list = model_config[YAMLKeyword.validation_inputs_data] + sh_commands.gen_random_input( + model_output_dir, + subgraphs[0][YAMLKeyword.input_tensors], + subgraphs[0][YAMLKeyword.input_shapes], + input_file_list) + + device_type = parse_device_type(RuntimeType.gpu) + sh_commands.tuning_run( + abi=target_abi, + serialno=serial_num, + mace_run_dir=build_tmp_binary_dir, + vlog_level=0, + embed_model_data=embed_model_data, + model_output_dir=model_output_dir, + input_nodes=subgraphs[0][YAMLKeyword.input_tensors], + output_nodes=subgraphs[0][YAMLKeyword.output_tensors], + input_shapes=subgraphs[0][YAMLKeyword.input_shapes], + output_shapes=subgraphs[0][YAMLKeyword.output_shapes], + mace_model_dir=mace_model_dir, + model_tag=model_name, + device_type=device_type, + running_round=0, + restart_round=1, + limit_opencl_kernel_time=model_config[YAMLKeyword.limit_opencl_kernel_time], # noqa + tuning=tuning, + out_of_range_check=False, + phone_data_dir=PHONE_DATA_DIR, + build_type=build_type + ) + + pull_opencl_binary_and_tuning_param(target_abi, serial_num, + [model_output_dir]) + binary_changed = True + + if binary_changed: + gen_opencl_and_tuning_code(model_output_dirs) + sh_commands.bazel_build( + mace_run_target, + abi=target_abi, + hexagon_mode=hexagon_mode, + enable_openmp=enable_openmp, + address_sanitizer=address_sanitizer + ) + sh_commands.update_mace_run_lib(build_tmp_binary_dir) + + if target_abi == ABIType.host: + sh_commands.build_host_libraries(build_type, target_abi) + + # build benchmark_model binary + sh_commands.build_benchmark_model(target_abi, + build_tmp_binary_dir, + hexagon_mode) + + # generate library + sh_commands.merge_libs(target_soc, + target_abi, + library_name, + BUILD_OUTPUT_DIR, + OUTPUT_LIBRARY_DIR_NAME, + build_type, + hexagon_mode) + + +def generate_library(configs, tuning, enable_openmp, address_sanitizer): + MaceLogger.header(StringFormatter.block("Building library")) + # generate source + MaceLogger.info('* generate common source files...') + sh_commands.gen_mace_version() + sh_commands.gen_encrypted_opencl_source() + sh_commands.gen_mace_engine_factory_source( + configs[YAMLKeyword.models].keys(), + configs[YAMLKeyword.build_type]) + MaceLogger.info('generate common source files done') + + # create build dirs + library_name = configs[YAMLKeyword.library_name] + if not os.path.exists(BUILD_OUTPUT_DIR): + os.makedirs(BUILD_OUTPUT_DIR) + tmp_build_dir = os.path.join(BUILD_OUTPUT_DIR, library_name, + BUILD_TMP_DIR_NAME) + if not os.path.exists(tmp_build_dir): + os.makedirs(tmp_build_dir) + library_out_dir = os.path.join(BUILD_OUTPUT_DIR, library_name, + OUTPUT_LIBRARY_DIR_NAME) + if os.path.exists(library_out_dir): + sh.rm('-rf', library_out_dir) + + target_socs = configs[YAMLKeyword.target_socs] + for target_abi in configs[YAMLKeyword.target_abis]: + if not target_socs or target_abi == ABIType.host.value: + build_specific_lib(target_abi, None, None, configs, + tuning, enable_openmp, address_sanitizer) + else: + for target_soc in target_socs: + serial_num = sh_commands.get_target_soc_serial_number( + target_soc) + with sh_commands.device_lock(serial_num): + build_specific_lib(target_abi, target_soc, serial_num, + configs, tuning, enable_openmp, + address_sanitizer) + + # package library + sh_commands.packaging_lib(BUILD_OUTPUT_DIR, + configs[YAMLKeyword.library_name]) + + +def print_library_summary(configs): + library_name = configs[YAMLKeyword.library_name] + title = "Library" + header = ["key", "value"] + data = list() + data.append(["library package", + "%s/%s/libmace_%s.tar.gz" + % (BUILD_OUTPUT_DIR, library_name, library_name)]) + MaceLogger.summary(StringFormatter.table(header, data, title)) + + +def build_library(flags): + configs = format_model_config(flags.config) + + print_configuration(flags, configs) + + convert_model(configs) + + generate_library(configs, flags.tuning, + flags.enable_openmp, flags.address_sanitizer) + + print_library_summary(configs) + + +################################ +# run +################################ +def report_run_statistics(stdout, + abi, + serialno, + model_name, + device_type, + output_dir): + metrics = [0] * 3 + for line in stdout.split('\n'): + line = line.strip() + parts = line.split() + if len(parts) == 4 and parts[0].startswith("time"): + metrics[0] = str(float(parts[1])) + metrics[1] = str(float(parts[2])) + metrics[2] = str(float(parts[3])) + break + + device_name = "" + target_soc = "" + if abi != "host": + props = sh_commands.adb_getprop_by_serialno(serialno) + device_name = props.get("ro.product.model", "") + target_soc = props.get("ro.board.platform", "") + + report_filename = output_dir + "/report.csv" + if not os.path.exists(report_filename): + with open(report_filename, 'w') as f: + f.write("model_name,device_name,soc,abi,runtime," + "init,warmup,run_avg\n") + + data_str = "{model_name},{device_name},{soc},{abi},{device_type}," \ + "{init},{warmup},{run_avg}\n" \ + .format(model_name=model_name, + device_name=device_name, + soc=target_soc, + abi=abi, + device_type=device_type, + init=metrics[0], + warmup=metrics[1], + run_avg=metrics[2] + ) + with open(report_filename, 'a') as f: + f.write(data_str) + + +def run_specific_target(flags, configs, target_abi, + target_soc, serial_num): + library_name = configs[YAMLKeyword.library_name] + build_type = configs[YAMLKeyword.build_type] + embed_model_data = configs[YAMLKeyword.embed_model_data] + if not configs[YAMLKeyword.target_socs]: + build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi, + None, None) + else: + build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi, + target_soc, serial_num) + + for model_name in configs[YAMLKeyword.models]: + model_config = configs[YAMLKeyword.models][model_name] + model_runtime = model_config[YAMLKeyword.runtime] + subgraphs = model_config[YAMLKeyword.subgraphs] + + if not configs[YAMLKeyword.target_socs]: + model_output_base_dir, model_output_dir, mace_model_dir = \ + get_build_model_dirs(library_name, model_name, target_abi, + None, None, + model_config[YAMLKeyword.model_file_path]) + else: + model_output_base_dir, model_output_dir, mace_model_dir = \ + get_build_model_dirs(library_name, model_name, target_abi, + target_soc, serial_num, + model_config[YAMLKeyword.model_file_path]) + if target_abi != ABIType.host: + sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR) + + # generate input data + input_file_list = model_config[YAMLKeyword.validation_inputs_data] + sh_commands.gen_random_input( + model_output_dir, + subgraphs[0][YAMLKeyword.input_tensors], + subgraphs[0][YAMLKeyword.input_shapes], + input_file_list) + runtime_list = [] + if target_abi == ABIType.host: + runtime_list.extend([RuntimeType.cpu]) + elif model_runtime == RuntimeType.cpu_gpu: + runtime_list.extend([RuntimeType.cpu, RuntimeType.gpu]) + else: + runtime_list.extend([model_runtime]) + for runtime in runtime_list: + device_type = parse_device_type(runtime) + run_output = sh_commands.tuning_run( + abi=target_abi, + serialno=serial_num, + mace_run_dir=build_tmp_binary_dir, + vlog_level=flags.vlog_level, + embed_model_data=embed_model_data, + model_output_dir=model_output_dir, + input_nodes=subgraphs[0][YAMLKeyword.input_tensors], + output_nodes=subgraphs[0][YAMLKeyword.output_tensors], + input_shapes=subgraphs[0][YAMLKeyword.input_shapes], + output_shapes=subgraphs[0][YAMLKeyword.output_shapes], + mace_model_dir=mace_model_dir, + model_tag=model_name, + device_type=device_type, + running_round=flags.round, + restart_round=flags.restart_round, + limit_opencl_kernel_time=model_config[YAMLKeyword.limit_opencl_kernel_time], # noqa + tuning=False, + out_of_range_check=flags.check_gpu_out_of_memory, + phone_data_dir=PHONE_DATA_DIR, + build_type=build_type, + omp_num_threads=flags.omp_num_threads, + cpu_affinity_policy=flags.cpu_affinity_policy, + gpu_perf_hint=flags.gpu_perf_hint, + gpu_priority_hint=flags.gpu_priority_hint, + runtime_failure_ratio=flags.runtime_failure_ratio, + address_sanitizer=flags.address_sanitizer, + ) + if flags.validate: + model_file_path, weight_file_path = get_model_files_path( + model_config["model_file_path"], + model_output_base_dir, + model_config["weight_file_path"]) + + sh_commands.validate_model( + abi=target_abi, + serialno=serial_num, + model_file_path=model_file_path, + weight_file_path=weight_file_path, + platform=model_config[YAMLKeyword.platform], + device_type=device_type, + input_nodes=subgraphs[0][YAMLKeyword.input_tensors], + output_nodes=subgraphs[0][YAMLKeyword.output_tensors], + input_shapes=subgraphs[0][YAMLKeyword.input_shapes], + output_shapes=subgraphs[0][YAMLKeyword.output_shapes], + model_output_dir=model_output_dir, + phone_data_dir=PHONE_DATA_DIR, + caffe_env=flags.caffe_env) + if flags.report and flags.round > 0: + report_run_statistics( + run_output, target_abi, serial_num, + model_name, device_type, flags.report_dir) + + +def run_mace(flags): + configs = format_model_config(flags.config) + + target_socs = configs[YAMLKeyword.target_socs] + if not target_socs: + target_socs = sh_commands.adb_get_all_socs() + if ABIType.host not in configs[YAMLKeyword.target_abis] \ + and not target_socs: + MaceLogger.warning('There is no device plugin the computer.') + + for target_abi in configs[YAMLKeyword.target_abis]: + if target_abi == ABIType.host: + run_specific_target(flags, configs, target_abi, None, None) + else: + for target_soc in target_socs: + serial_num = sh_commands.get_target_soc_serial_number( + target_soc) + with sh_commands.device_lock(serial_num): + run_specific_target(flags, configs, target_abi, + target_soc, serial_num) + + +################################ +# benchmark model +################################ +def bm_specific_target(flags, configs, target_abi, target_soc, serial_num): + library_name = configs[YAMLKeyword.library_name] + build_type = configs[YAMLKeyword.build_type] + embed_model_data = configs[YAMLKeyword.embed_model_data] + if not configs[YAMLKeyword.target_socs]: + build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi, + None, None) + else: + build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi, + target_soc, serial_num) + + for model_name in configs[YAMLKeyword.models]: + model_config = configs[YAMLKeyword.models][model_name] + model_runtime = model_config[YAMLKeyword.runtime] + subgraphs = model_config[YAMLKeyword.subgraphs] + + if not configs[YAMLKeyword.target_socs]: + model_output_base_dir, model_output_dir, mace_model_dir = \ + get_build_model_dirs(library_name, model_name, target_abi, + None, None, + model_config[YAMLKeyword.model_file_path]) + else: + model_output_base_dir, model_output_dir, mace_model_dir = \ + get_build_model_dirs(library_name, model_name, target_abi, + target_soc, serial_num, + model_config[YAMLKeyword.model_file_path]) + if target_abi != ABIType.host: + sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR) + + input_file_list = model_config[YAMLKeyword.validation_inputs_data] + sh_commands.gen_random_input( + model_output_dir, + subgraphs[0][YAMLKeyword.input_tensors], + subgraphs[0][YAMLKeyword.input_shapes], + input_file_list) + runtime_list = [] + if target_abi == ABIType.host: + runtime_list.extend([RuntimeType.cpu]) + elif model_runtime == RuntimeType.cpu_gpu: + runtime_list.extend([RuntimeType.cpu, RuntimeType.gpu]) + else: + runtime_list.extend([model_runtime]) + for runtime in runtime_list: + device_type = parse_device_type(runtime) + sh_commands.benchmark_model( + abi=target_abi, + serialno=serial_num, + benchmark_binary_dir=build_tmp_binary_dir, + vlog_level=0, + embed_model_data=embed_model_data, + model_output_dir=model_output_dir, + input_nodes=subgraphs[0][YAMLKeyword.input_tensors], + output_nodes=subgraphs[0][YAMLKeyword.output_tensors], + input_shapes=subgraphs[0][YAMLKeyword.input_shapes], + output_shapes=subgraphs[0][YAMLKeyword.output_shapes], + mace_model_dir=mace_model_dir, + model_tag=model_name, + device_type=device_type, + phone_data_dir=PHONE_DATA_DIR, + build_type=build_type, + omp_num_threads=flags.omp_num_threads, + cpu_affinity_policy=flags.cpu_affinity_policy, + gpu_perf_hint=flags.gpu_perf_hint, + gpu_priority_hint=flags.gpu_priority_hint) + + +def benchmark_model(flags): + configs = format_model_config(flags.config) + + target_socs = configs[YAMLKeyword.target_socs] + if not target_socs: + target_socs = sh_commands.adb_get_all_socs() + if ABIType.host.value not in configs[YAMLKeyword.target_abis] \ + and not target_socs: + MaceLogger.warning('There is no device plugin the computer.') + + for target_abi in configs[YAMLKeyword.target_abis]: + if target_abi == ABIType.host.value: + bm_specific_target(flags, configs, target_abi, None, None) + else: + for target_soc in target_socs: + serial_num = sh_commands.get_target_soc_serial_number( + target_soc) + with sh_commands.device_lock(serial_num): + bm_specific_target(flags, configs, target_abi, + target_soc, serial_num) ################################ @@ -350,515 +1020,121 @@ def str2bool(v): def str_to_caffe_env_type(v): if v.lower() == 'docker': - return common.CaffeEnvType.DOCKER + return CaffeEnvType.DOCKER elif v.lower() == 'local': - return common.CaffeEnvType.LOCAL + return CaffeEnvType.LOCAL else: raise argparse.ArgumentTypeError('[docker | local] expected.') -def parse_model_configs(): - print("============== Load and Parse configs ==============") - with open(FLAGS.config) as f: - configs = yaml.load(f) - target_abis = configs.get("target_abis", []) - if not isinstance(target_abis, list) or not target_abis: - print("CONFIG ERROR:") - print("target_abis list is needed!") - print("For example: 'target_abis: [armeabi-v7a, arm64-v8a]'") - exit(1) - - embed_model_data = configs.get("embed_model_data", "") - if embed_model_data == "" or not isinstance(embed_model_data, int) or \ - embed_model_data < 0 or embed_model_data > 1: - print("CONFIG ERROR:") - print("embed_model_data must be integer in range [0, 1]") - exit(1) - elif FLAGS.model_load_type == "pb": - configs["embed_model_data"] = 0 - print("emebed_model_data is set 0") - - model_names = configs.get("models", "") - if not model_names: - print("CONFIG ERROR:") - print("models attribute not found in config file") - exit(1) - - for model_name in model_names: - model_config = configs["models"][model_name] - platform = model_config.get("platform", "") - if platform == "" or platform not in ["tensorflow", "caffe"]: - print("CONFIG ERROR:") - print("'platform' must be 'tensorflow' or 'caffe'") - exit(1) - - for key in ["model_file_path", "model_sha256_checksum"]: - value = model_config.get(key, "") - if value == "": - print("CONFIG ERROR:") - print("'%s' is necessary" % key) - exit(1) - - for key in ["input_nodes", "input_shapes", "output_nodes", - "output_shapes"]: - value = model_config.get(key, "") - if value == "": - print("CONFIG ERROR:") - print("'%s' is necessary" % key) - exit(1) - if not isinstance(value, list): - model_config[key] = [value] - - for key in ["limit_opencl_kernel_time", "dsp_mode", "obfuscate", - "fast_conv"]: - value = model_config.get(key, "") - if value == "": - model_config[key] = 0 - print("'%s' for %s is set to default value: 0" % - (key, model_name)) - - validation_inputs_data = model_config.get("validation_inputs_data", - []) - model_config["validation_inputs_data"] = validation_inputs_data - if not isinstance(validation_inputs_data, list): - model_config["validation_inputs_data"] = [ - validation_inputs_data] - - weight_file_path = model_config.get("weight_file_path", "") - model_config["weight_file_path"] = weight_file_path - - print("Parse model configs successfully!\n") - return configs - - def parse_args(): """Parses command line arguments.""" - parser = argparse.ArgumentParser() - parser.add_argument( - "--config", + all_type_parent_parser = argparse.ArgumentParser(add_help=False) + all_type_parent_parser.add_argument( + '--config', type=str, - default="./tool/config", + default="", required=True, - help="The global config file of models.") - parser.add_argument( - "--output_dir", type=str, default="build", help="The output dir.") - parser.add_argument( - "--round", type=int, default=1, help="The model running round.") - parser.add_argument( - "--run_seconds", - type=int, - default=10, - help="The model throughput test running seconds.") - parser.add_argument( - "--restart_round", - type=int, - default=1, - help="The model restart round.") - parser.add_argument( - "--tuning", - type=str2bool, - default=True, - help="Tune opencl params.") - parser.add_argument( - "--mode", - type=str, - default="all", - help="[build|run|validate|benchmark|merge|all|throughput_test].") - parser.add_argument( - "--target_socs", - type=str, - default="all", - help="SoCs to build, comma seperated list (getprop ro.board.platform)") - parser.add_argument( - "--out_of_range_check", - type=str2bool, - default=False, - help="Enable out of range check for opencl.") - parser.add_argument( - "--enable_openmp", - type=str2bool, - default=True, - help="Enable openmp.") - parser.add_argument( + help="model yaml configuration file path") + build_run_parent_parser = argparse.ArgumentParser(add_help=False) + build_run_parent_parser.add_argument( + '--address_sanitizer', + action="store_true", + help="Whether to use valgrind to check memory error") + run_bm_parent_parser = argparse.ArgumentParser(add_help=False) + run_bm_parent_parser.add_argument( "--omp_num_threads", type=int, - default=-1, + default=DefaultValues.omp_num_threads, help="num of openmp threads") - parser.add_argument( + run_bm_parent_parser.add_argument( "--cpu_affinity_policy", type=int, - default=1, + default=DefaultValues.cpu_affinity_policy, help="0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY") - parser.add_argument( + run_bm_parent_parser.add_argument( "--gpu_perf_hint", type=int, - default=3, + default=DefaultValues.gpu_perf_hint, help="0:DEFAULT/1:LOW/2:NORMAL/3:HIGH") - parser.add_argument( + run_bm_parent_parser.add_argument( "--gpu_priority_hint", type=int, - default=3, + default=DefaultValues.gpu_priority_hint, help="0:DEFAULT/1:LOW/2:NORMAL/3:HIGH") - parser.add_argument( - "--collect_report", - type=str2bool, - default=False, - help="Collect report.") - parser.add_argument( - "--vlog_level", + + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers() + build = subparsers.add_parser( + 'build', + parents=[all_type_parent_parser, build_run_parent_parser], + help='build model library and test tools') + build.set_defaults(func=build_library) + build.add_argument( + '--tuning', + action="store_true", + help="tuning gpu parameters for specified SOC") + build.add_argument( + "--enable_openmp", + action="store_false", + help="Enable openmp for multiple thread.") + run = subparsers.add_parser( + 'run', + parents=[all_type_parent_parser, run_bm_parent_parser, + build_run_parent_parser], + help='run model in command line') + run.set_defaults(func=run_mace) + run.add_argument( + "--round", type=int, - default=0, - help="VLOG level.") - parser.add_argument( + default=1, + help="The model running round.") + run.add_argument( + "--validate", + action="store_true", + help="validate result by comparing mace output and platform's output.") + run.add_argument( "--caffe_env", type=str_to_caffe_env_type, default='docker', help="[docker | local] caffe environment.") - parser.add_argument( - "--valgrind", - type=bool, - default=False, - help="Whether to use valgrind to check memory error.") - parser.add_argument( - "--valgrind_path", - type=str, - default="/data/local/tmp/valgrind", - help="Valgrind install path.") - parser.add_argument( - "--valgrind_args", + run.add_argument( + "--vlog_level", + type=int, + default=0, + help="VLOG level: [1~5].") + run.add_argument( + "--check_gpu_out_of_memory", + action="store_true", + help="Enable out of memory check for gpu.") + run.add_argument( + "--restart_round", + type=int, + default=1, + help="restart round for run.") + run.add_argument( + "--report", + action="store_true", + help="print run statistics report.") + run.add_argument( + "--report_dir", type=str, default="", - help="Valgrind command args.") - parser.add_argument( - "--validation_runtime", - type=str, - default="cpu", - help="validation runtime.") - parser.add_argument( - "--model_load_type", - type=str, - default="source", - help="[source|pb] Load models in generated `source` code" + - "or `pb` file.") - parser.add_argument( - "--gpu_data_type", - type=str, - default="half", - help="[half | float].") - parser.add_argument( + help="print run statistics report.") + run.add_argument( "--runtime_failure_ratio", type=float, default=0.0, help="[mock runtime failure ratio].") + benchmark = subparsers.add_parser( + 'benchmark', + parents=[all_type_parent_parser, run_bm_parent_parser, + build_run_parent_parser], + help='benchmark model for detail information') + benchmark.set_defaults(func=benchmark_model) return parser.parse_known_args() -def process_models(project_name, configs, embed_model_data, vlog_level, - target_abi, phone_data_dir, model_load_type, - target_soc="", serialno=""): - hexagon_mode = get_hexagon_mode(configs) - model_output_dirs = [] - mace_model_dirs_kv = {} - - for model_name in configs["models"]: - print '===================', model_name, '===================' - model_config = configs["models"][model_name] - input_file_list = model_config["validation_inputs_data"] - model_runtime = model_config.get("runtime", "") - model_device_type = parse_device_type(model_runtime) - run_device_type = model_device_type - if not run_device_type: - run_device_type = parse_device_type(FLAGS.validation_runtime) - # Create model build directory - model_path_digest = md5sum(model_config["model_file_path"]) - model_output_base_dir = "%s/%s/%s/%s/%s" % ( - FLAGS.output_dir, project_name, "build", - model_name, model_path_digest) - if model_load_type == "pb": - mace_model_dir = model_output_base_dir - mace_model_dirs_kv[model_name] = mace_model_dir - else: - mace_model_dir = "" - - if target_abi == "host": - model_output_dir = "%s/%s" % (model_output_base_dir, target_abi) - else: - device_name = sh_commands.adb_get_device_name_by_serialno(serialno) - model_output_dir = "%s/%s_%s/%s" % ( - model_output_base_dir, device_name.replace(' ', ''), - target_soc, target_abi) - sh_commands.clear_phone_data_dir(serialno, phone_data_dir) - - model_output_dirs.append(model_output_dir) - - if FLAGS.mode == "build" or FLAGS.mode == "all": - if os.path.exists(model_output_dir): - sh.rm("-rf", model_output_dir) - os.makedirs(model_output_dir) - - model_file_path, weight_file_path = get_model_files_path( - model_config["model_file_path"], - model_output_base_dir, - model_config["weight_file_path"]) - - if FLAGS.mode == "build" or FLAGS.mode == "run" or \ - FLAGS.mode == "validate" or \ - FLAGS.mode == "benchmark" or FLAGS.mode == "all": - sh_commands.gen_random_input(model_output_dir, - model_config["input_nodes"], - model_config["input_shapes"], - input_file_list) - - if FLAGS.mode == "build" or FLAGS.mode == "all": - build_mace_run_prod(hexagon_mode, - model_runtime, - target_abi, - serialno, - vlog_level, - embed_model_data, - model_load_type, - model_output_dir, - model_config["input_nodes"], - model_config["output_nodes"], - model_config["input_shapes"], - model_config["output_shapes"], - mace_model_dir, - model_name, - model_device_type, - FLAGS.round, - FLAGS.restart_round, - FLAGS.tuning, - model_config["limit_opencl_kernel_time"], - phone_data_dir, - FLAGS.enable_openmp) - sh_commands.build_benchmark_model(target_abi, - embed_model_data, - model_output_dir, - model_name, - hexagon_mode) - - if FLAGS.mode == "run" or FLAGS.mode == "validate" or \ - FLAGS.mode == "all": - if FLAGS.mode == "run": - runtime_failure_ratio = FLAGS.runtime_failure_ratio - else: - runtime_failure_ratio = 0.0 - - tuning_run(target_abi, - serialno, - vlog_level, - embed_model_data, - model_output_dir, - model_config["input_nodes"], - model_config["output_nodes"], - model_config["input_shapes"], - model_config["output_shapes"], - mace_model_dir, - model_name, - run_device_type, - FLAGS.round, - FLAGS.restart_round, - FLAGS.out_of_range_check, - phone_data_dir, - omp_num_threads=FLAGS.omp_num_threads, - cpu_affinity_policy=FLAGS.cpu_affinity_policy, - gpu_perf_hint=FLAGS.gpu_perf_hint, - gpu_priority_hint=FLAGS.gpu_priority_hint, - runtime_failure_ratio=runtime_failure_ratio) - - if FLAGS.mode == "benchmark": - gen_opencl_and_tuning_code( - target_abi, serialno, [model_output_dir], False) - sh_commands.benchmark_model(target_abi, - serialno, - vlog_level, - embed_model_data, - model_output_dir, - mace_model_dir, - model_config["input_nodes"], - model_config["output_nodes"], - model_config["input_shapes"], - model_config["output_shapes"], - model_name, - run_device_type, - phone_data_dir, - FLAGS.omp_num_threads, - FLAGS.cpu_affinity_policy, - FLAGS.gpu_perf_hint, - FLAGS.gpu_priority_hint) - - if FLAGS.mode == "validate" or FLAGS.mode == "all": - sh_commands.validate_model(target_abi, - serialno, - model_file_path, - weight_file_path, - model_config["platform"], - run_device_type, - model_config["input_nodes"], - model_config["output_nodes"], - model_config["input_shapes"], - model_config["output_shapes"], - model_output_dir, - phone_data_dir, - FLAGS.caffe_env) - - if FLAGS.mode == "build" or FLAGS.mode == "merge" or \ - FLAGS.mode == "all": - merge_libs_and_tuning_results( - target_soc, - target_abi, - serialno, - project_name, - FLAGS.output_dir, - model_output_dirs, - mace_model_dirs_kv, - model_load_type, - hexagon_mode, - embed_model_data) - - if FLAGS.mode == "throughput_test": - merged_lib_file = FLAGS.output_dir + \ - "/%s/%s/libmace_%s.%s.a" % \ - (project_name, target_abi, project_name, target_soc) - first_model = configs["models"].values()[0] - throughput_test_output_dir = "%s/%s/%s/%s" % ( - FLAGS.output_dir, project_name, "build", - "throughput_test") - if os.path.exists(throughput_test_output_dir): - sh.rm("-rf", throughput_test_output_dir) - os.makedirs(throughput_test_output_dir) - input_file_list = model_config["validation_inputs_data"] - sh_commands.gen_random_input(throughput_test_output_dir, - first_model["input_nodes"], - first_model["input_shapes"], - input_file_list) - model_tag_dict = {} - for model_name in configs["models"]: - runtime = configs["models"][model_name]["runtime"] - model_tag_dict[runtime] = model_name - sh_commands.build_run_throughput_test(target_abi, - serialno, - vlog_level, - FLAGS.run_seconds, - merged_lib_file, - throughput_test_output_dir, - embed_model_data, - model_config["input_nodes"], - model_config["output_nodes"], - model_config["input_shapes"], - model_config["output_shapes"], - model_tag_dict.get("cpu", ""), - model_tag_dict.get("gpu", ""), - model_tag_dict.get("dsp", ""), - phone_data_dir) - - -def main(unused_args): - common.init_logging() - configs = parse_model_configs() - - if FLAGS.mode == "validate": - FLAGS.round = 1 - FLAGS.restart_round = 1 - - project_name = os.path.splitext(os.path.basename(FLAGS.config))[0] - if FLAGS.mode == "build" or FLAGS.mode == "all": - # Remove previous output dirs - if not os.path.exists(FLAGS.output_dir): - os.makedirs(FLAGS.output_dir) - elif os.path.exists(os.path.join(FLAGS.output_dir, "libmace")): - sh.rm("-rf", os.path.join(FLAGS.output_dir, project_name)) - os.makedirs(os.path.join(FLAGS.output_dir, project_name)) - - # generate source - sh_commands.gen_mace_version() - sh_commands.gen_encrypted_opencl_source() - sh_commands.gen_mace_engine_factory_source(configs['models'].keys(), - FLAGS.model_load_type) - - embed_model_data = configs["embed_model_data"] - target_socs = get_target_socs(configs) - - vlog_level = FLAGS.vlog_level - phone_data_dir = "/data/local/tmp/mace_run/" - - if FLAGS.mode == "build" or FLAGS.mode == "all": - print '* Model Convert' - sh_commands.clear_model_codegen() - for model_name in configs["models"]: - print '===================', model_name, '===================' - model_config = configs["models"][model_name] - runtime = model_config.get("runtime", "") - - # Create model build directory - model_path_digest = md5sum(model_config["model_file_path"]) - - model_output_base_dir = "%s/%s/%s/%s/%s" % ( - FLAGS.output_dir, project_name, "build", - model_name, model_path_digest) - - if os.path.exists(model_output_base_dir): - sh.rm("-rf", model_output_base_dir) - os.makedirs(model_output_base_dir) - - download_model_files( - model_config["model_file_path"], - model_output_base_dir, - model_config["weight_file_path"]) - - model_file_path, weight_file_path = get_model_files_path( - model_config["model_file_path"], - model_output_base_dir, - model_config["weight_file_path"]) - - sh_commands.gen_model_code( - "mace/codegen/models/%s" % model_name, - model_config["platform"], - model_file_path, - weight_file_path, - model_config["model_sha256_checksum"], - ",".join(model_config["input_nodes"]), - ",".join(model_config["output_nodes"]), - runtime, - model_name, - ":".join(model_config["input_shapes"]), - model_config["dsp_mode"], - embed_model_data, - model_config["fast_conv"], - model_config["obfuscate"], - model_output_base_dir, - FLAGS.model_load_type, - FLAGS.gpu_data_type) - - for target_abi in configs["target_abis"]: - for target_soc in target_socs: - if target_abi != 'host': - serialnos = sh_commands.get_target_socs_serialnos([target_soc]) - for serialno in serialnos: - props = sh_commands.adb_getprop_by_serialno(serialno) - print( - "====================================================" - ) - print("Trying to lock device %s" % serialno) - with sh_commands.device_lock(serialno): - print("Run on device: %s, %s, %s" % ( - serialno, props["ro.board.platform"], - props["ro.product.model"])) - process_models(project_name, configs, embed_model_data, - vlog_level, target_abi, phone_data_dir, - FLAGS.model_load_type, target_soc, - serialno) - else: - print("====================================================") - print("Run on host") - process_models(project_name, configs, embed_model_data, - vlog_level, target_abi, phone_data_dir, - FLAGS.model_load_type) - - if FLAGS.mode == "build" or FLAGS.mode == "all": - sh_commands.packaging_lib(FLAGS.output_dir, project_name) - - if __name__ == "__main__": - FLAGS, unparsed = parse_args() - main(unused_args=[sys.argv[0]] + unparsed) + flags, unparsed = parse_args() + flags.func(flags) diff --git a/tools/sh_commands.py b/tools/sh_commands.py index 0c61c9d7..81f69398 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -23,6 +23,7 @@ import subprocess import sys import time import urllib +from enum import Enum import common @@ -48,6 +49,12 @@ def strip_invalid_utf8(str): return sh.iconv(str, "-c", "-t", "UTF-8") +def split_stdout(stdout_str): + stdout_str = strip_invalid_utf8(stdout_str) + # Filter out last empty line + return [l.strip() for l in stdout_str.split('\n') if len(l.strip()) > 0] + + def make_output_processor(buff): def process_output(line): print(line.rstrip()) @@ -72,6 +79,11 @@ def is_device_locked(serialno): return True +class BuildType(object): + proto = 'proto' + code = 'code' + + ################################ # clear data ################################ @@ -90,16 +102,10 @@ def clear_model_codegen(model_codegen_dir="mace/codegen/models"): ################################ # adb commands ################################ -def adb_split_stdout(stdout_str): - stdout_str = strip_invalid_utf8(stdout_str) - # Filter out last empty line - return [l.strip() for l in stdout_str.split('\n') if len(l.strip()) > 0] - - def adb_devices(): serialnos = [] p = re.compile(r'(\w+)\s+device') - for line in adb_split_stdout(sh.adb("devices")): + for line in split_stdout(sh.adb("devices")): m = p.match(line) if m: serialnos.append(m.group(1)) @@ -128,9 +134,26 @@ def get_target_socs_serialnos(target_socs=None): return serialnos +def get_soc_serial_number_map(): + serial_numbers = adb_devices() + soc_serial_number_map = {} + for num in serial_numbers: + props = adb_getprop_by_serialno(num) + soc_serial_number_map[props["ro.board.platform"]] = num + return soc_serial_number_map + + +def get_target_soc_serial_number(target_soc): + soc_serial_number_map = get_soc_serial_number_map() + serial_number = None + if target_soc in soc_serial_number_map: + serial_number = soc_serial_number_map[target_soc] + return serial_number + + def adb_getprop_by_serialno(serialno): outputs = sh.adb("-s", serialno, "shell", "getprop") - raw_props = adb_split_stdout(outputs) + raw_props = split_stdout(outputs) props = {} p = re.compile(r'\[(.+)\]: \[(.+)\]') for raw_prop in raw_props: @@ -173,14 +196,16 @@ def adb_pull(src_path, dst_path, serialno): print("Error msg: %s" % e.stderr) -def adb_run(serialno, +def adb_run(abi, + serialno, host_bin_path, bin_name, args="", opencl_profiling=1, vlog_level=0, device_bin_path="/data/local/tmp/mace", - out_of_range_check=1): + out_of_range_check=1, + address_sanitizer=False): host_bin_full_path = "%s/%s" % (host_bin_path, bin_name) device_bin_full_path = "%s/%s" % (device_bin_path, bin_name) props = adb_getprop_by_serialno(serialno) @@ -195,17 +220,24 @@ def adb_run(serialno, sh.adb("-s", serialno, "shell", "rm -rf %s" % device_bin_path) sh.adb("-s", serialno, "shell", "mkdir -p %s" % device_bin_path) adb_push(host_bin_full_path, device_bin_full_path, serialno) + ld_preload = "" + if address_sanitizer: + adb_push(find_asan_rt_library(abi), device_bin_path, serialno) + ld_preload = "LD_PRELOAD=%s/%s" % (device_bin_path, + asan_rt_library_names(abi)), print("Run %s" % device_bin_full_path) + stdout_buff = [] process_output = make_output_processor(stdout_buff) p = sh.adb( "-s", serialno, "shell", - "MACE_OUT_OF_RANGE_CHECK=%d MACE_OPENCL_PROFILING=%d " - "MACE_CPP_MIN_VLOG_LEVEL=%d %s %s" % - (out_of_range_check, opencl_profiling, vlog_level, - device_bin_full_path, args), + ld_preload, + "MACE_OUT_OF_RANGE_CHECK=%d" % out_of_range_check, + "MACE_OPENCL_PROFILING=%d" % opencl_profiling, + "MACE_CPP_MIN_VLOG_LEVEL=%d" % vlog_level, + "%s %s" % (device_bin_full_path, args), _out=process_output, _bg=True, _err_to_out=True) @@ -213,128 +245,77 @@ def adb_run(serialno, return "".join(stdout_buff) -def adb_run_valgrind(serialno, - host_bin_path, - bin_name, - valgrind_path="/data/local/tmp/valgrind", - valgrind_args="", - args="", - opencl_profiling=1, - vlog_level=0, - device_bin_path="/data/local/tmp/mace", - out_of_range_check=1): - valgrind_lib = valgrind_path + "/lib/valgrind" - valgrind_bin = valgrind_path + "/bin/valgrind" - host_bin_full_path = "%s/%s" % (host_bin_path, bin_name) - device_bin_full_path = "%s/%s" % (device_bin_path, bin_name) - props = adb_getprop_by_serialno(serialno) - print( - "=====================================================================" - ) - print("Trying to lock device %s" % serialno) - with device_lock(serialno): - print("Run on device: %s, %s, %s" % - (serialno, props["ro.board.platform"], - props["ro.product.model"])) - result = sh.adb("-s", serialno, "shell", "ls %s" % valgrind_path) - if result.startswith("ls:"): - print("Please install valgrind to %s manually." % valgrind_path) - return result - sh.adb("-s", serialno, "shell", "rm -rf %s" % device_bin_path) - sh.adb("-s", serialno, "shell", "mkdir -p %s" % device_bin_path) - adb_push(host_bin_full_path, device_bin_full_path, serialno) - print("Run %s" % device_bin_full_path) - stdout_buff = [] - process_output = make_output_processor(stdout_buff) - p = sh.adb( - "-s", - serialno, - "shell", - "MACE_OUT_OF_RANGE_CHECK=%d MACE_OPENCL_PROFILING=%d " - "MACE_CPP_MIN_VLOG_LEVEL=%d VALGRIND_LIB=%s %s %s %s %s " % - (out_of_range_check, opencl_profiling, vlog_level, - valgrind_lib, valgrind_bin, valgrind_args, - device_bin_full_path, args), - _out=process_output, - _bg=True, - _err_to_out=True) - p.wait() - return "".join(stdout_buff) +################################ +# Toolchain +################################ +def asan_rt_library_names(abi): + asan_rt_names = { + "armeabi-v7a": "libclang_rt.asan-arm-android.so", + "arm64-v8a": "libclang_rt.asan-aarch64-android.so", + } + return asan_rt_names[abi] + + +def find_asan_rt_library(abi, asan_rt_path=''): + if not asan_rt_path: + find_path = os.environ['ANDROID_NDK_HOME'] + candidates = split_stdout(sh.find(find_path, "-name", + asan_rt_library_names(abi))) + if len(candidates) == 0: + common.MaceLogger.error( + "Toolchain", + "Can't find AddressSanitizer runtime library in % s" % + find_path) + elif len(candidates) > 1: + common.MaceLogger.info( + "More than one AddressSanitizer runtime library, use the 1st") + return candidates[0] + return "%s/%s" % (asan_rt_path, asan_rt_library_names(abi)) ################################ # bazel commands ################################ def bazel_build(target, - strip="always", abi="armeabi-v7a", - production_mode=False, hexagon_mode=False, - disable_no_tuning_warning=False, - debug=False, enable_openmp=True, - enable_neon=True): + enable_neon=True, + address_sanitizer=False): print("* Build %s with ABI %s" % (target, abi)) stdout_buff = [] process_output = make_output_processor(stdout_buff) if abi == "host": bazel_args = ( "build", - "-c", - "opt", - "--strip", - strip, - "--verbose_failures", - target, - "--copt=-std=c++11", - "--copt=-D_GLIBCXX_USE_C99_MATH_TR1", - "--copt=-O3", "--define", "openmp=%s" % str(enable_openmp).lower(), - "--define", - "production=%s" % str(production_mode).lower(), + target, ) - p = sh.bazel( - *bazel_args, - _out=process_output, - _bg=True, - _err_to_out=True) - p.wait() else: bazel_args = ( "build", - "-c", - "opt", - "--strip", - strip, - "--verbose_failures", target, - "--crosstool_top=//external:android/crosstool", - "--host_crosstool_top=@bazel_tools//tools/cpp:toolchain", + "--config", + "android", "--cpu=%s" % abi, - "--copt=-std=c++11", - "--copt=-D_GLIBCXX_USE_C99_MATH_TR1", - "--copt=-DMACE_OBFUSCATE_LITERALS", - "--copt=-O3", "--define", "neon=%s" % str(enable_neon).lower(), "--define", "openmp=%s" % str(enable_openmp).lower(), "--define", - "production=%s" % str(production_mode).lower(), - "--define", "hexagon=%s" % str(hexagon_mode).lower()) - if disable_no_tuning_warning: - bazel_args += ("--copt=-DMACE_DISABLE_NO_TUNING_WARNING",) - if debug: - bazel_args += ("--copt=-g",) - p = sh.bazel( - _out=process_output, - _bg=True, - _err_to_out=True, - *bazel_args) - p.wait() - print("Building done!\n") + if address_sanitizer: + bazel_args += ("--config", "asan") + else: + bazel_args += ("--config", "optimization") + p = sh.bazel( + _out=process_output, + _bg=True, + _err_to_out=True, + *bazel_args) + p.wait() + print("Build done!\n") return "".join(stdout_buff) @@ -461,6 +442,7 @@ def gen_model_code(model_codegen_dir, model_file_path, weight_file_path, model_sha256_checksum, + weight_sha256_checksum, input_nodes, output_nodes, runtime, @@ -470,10 +452,8 @@ def gen_model_code(model_codegen_dir, embed_model_data, fast_conv, obfuscate, - model_output_dir, - model_load_type, - gpu_data_type): - print("* Genearte model code") + model_build_type, + data_type): bazel_build_common("//mace/python/tools:converter") if os.path.exists(model_codegen_dir): @@ -488,6 +468,7 @@ def gen_model_code(model_codegen_dir, "--model_file=%s" % model_file_path, "--weight_file=%s" % weight_file_path, "--model_checksum=%s" % model_sha256_checksum, + "--weight_checksum=%s" % weight_sha256_checksum, "--input_node=%s" % input_nodes, "--output_node=%s" % output_nodes, "--runtime=%s" % runtime, @@ -498,15 +479,13 @@ def gen_model_code(model_codegen_dir, "--embed_model_data=%s" % embed_model_data, "--winograd=%s" % fast_conv, "--obfuscate=%s" % obfuscate, - "--codegen_output=%s/model.cc" % model_codegen_dir, - "--pb_output=%s/%s.pb" % (model_output_dir, model_tag), - "--model_load_type=%s" % model_load_type, - "--gpu_data_type=%s" % gpu_data_type, + "--output_dir=%s" % model_codegen_dir, + "--model_build_type=%s" % model_build_type, + "--data_type=%s" % data_type, _out=process_output, _bg=True, _err_to_out=True) p.wait() - print("Model code gen done!\n") def gen_random_input(model_output_dir, @@ -551,22 +530,25 @@ def gen_random_input(model_output_dir, sh.cp("-f", input_file_list[i], dst_input_file) -def update_mace_run_lib(model_output_dir, - model_load_type, - model_tag, - embed_model_data): +def update_mace_run_lib(model_output_dir): mace_run_filepath = model_output_dir + "/mace_run" if os.path.exists(mace_run_filepath): sh.rm("-rf", mace_run_filepath) sh.cp("-f", "bazel-bin/mace/tools/validation/mace_run", model_output_dir) - if embed_model_data == 0: - sh.cp("-f", "mace/codegen/models/%s/%s.data" % (model_tag, model_tag), - model_output_dir) - if model_load_type == "source": - sh.cp("-f", "mace/codegen/models/%s/%s.h" % (model_tag, model_tag), - model_output_dir) +def mv_model_file_to_output_dir( + model_build_type, + model_codegen_dir, + model_name, + output_dir): + if model_build_type == BuildType.proto: + sh.mv("-f", + '%s/%s.pb' % (model_codegen_dir, model_name), + output_dir) + sh.mv("-f", + '%s/%s.data' % (model_codegen_dir, model_name), + output_dir) def create_internal_storage_dir(serialno, phone_data_dir): @@ -577,6 +559,7 @@ def create_internal_storage_dir(serialno, phone_data_dir): def tuning_run(abi, serialno, + mace_run_dir, vlog_level, embed_model_data, model_output_dir, @@ -593,33 +576,31 @@ def tuning_run(abi, tuning, out_of_range_check, phone_data_dir, + build_type, omp_num_threads=-1, cpu_affinity_policy=1, gpu_perf_hint=3, gpu_priority_hint=3, - runtime_failure_ratio=0.0, - valgrind=False, - valgrind_path="/data/local/tmp/valgrind", - valgrind_args="", input_file_name="model_input", - output_file_name="model_out"): + output_file_name="model_out", + runtime_failure_ratio=0.0, + address_sanitizer=False): print("* Run '%s' with round=%s, restart_round=%s, tuning=%s, " "out_of_range_check=%s, omp_num_threads=%s, cpu_affinity_policy=%s, " "gpu_perf_hint=%s, gpu_priority_hint=%s" % (model_tag, running_round, restart_round, str(tuning), str(out_of_range_check), omp_num_threads, cpu_affinity_policy, gpu_perf_hint, gpu_priority_hint)) + mace_model_path = "" + if build_type == BuildType.proto: + mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag) if abi == "host": - if mace_model_dir: - mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag) - else: - mace_model_path = "" p = subprocess.Popen( [ "env", "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level, "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio, - "%s/mace_run" % model_output_dir, + "%s/mace_run" % mace_run_dir, "--model_name=%s" % model_tag, "--input_node=%s" % ",".join(input_nodes), "--output_node=%s" % ",".join(output_nodes), @@ -627,7 +608,7 @@ def tuning_run(abi, "--output_shape=%s" % ":".join(output_shapes), "--input_file=%s/%s" % (model_output_dir, input_file_name), "--output_file=%s/%s" % (model_output_dir, output_file_name), - "--model_data_file=%s/%s.data" % (model_output_dir, model_tag), + "--model_data_file=%s/%s.data" % (mace_model_dir, model_tag), "--device=%s" % device_type, "--round=%s" % running_round, "--restart_round=%s" % restart_round, @@ -654,21 +635,25 @@ def tuning_run(abi, input_name) adb_push("%s/%s" % (model_output_dir, formatted_name), phone_data_dir, serialno) - adb_push("%s/mace_run" % model_output_dir, phone_data_dir, - serialno) + if address_sanitizer: + adb_push(find_asan_rt_library(abi), phone_data_dir, serialno) + if not embed_model_data: - adb_push("%s/%s.data" % (model_output_dir, model_tag), + adb_push("%s/%s.data" % (mace_model_dir, model_tag), phone_data_dir, serialno) + adb_push("third_party/nnlib/libhexagon_controller.so", phone_data_dir, serialno) - if mace_model_dir: - mace_model_path = "%s/%s.pb" % (phone_data_dir, model_tag) - adb_push("%s/%s.pb" % (mace_model_dir, model_tag), - mace_model_path, + mace_model_phone_path = "" + if build_type == BuildType.proto: + mace_model_phone_path = "%s/%s.pb" % (phone_data_dir, model_tag) + adb_push(mace_model_path, + mace_model_phone_path, serialno) - else: - mace_model_path = "" + + adb_push("%s/mace_run" % mace_run_dir, phone_data_dir, + serialno) stdout_buff = [] process_output = make_output_processor(stdout_buff) @@ -682,11 +667,10 @@ def tuning_run(abi, "MACE_LIMIT_OPENCL_KERNEL_TIME=%s" % limit_opencl_kernel_time, "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio, ] - if valgrind: + if address_sanitizer: adb_cmd.extend([ - "VALGRIND_LIB=%s" % valgrind_path + "/lib/valgrind", - valgrind_path + "/bin/valgrind", - valgrind_args + "LD_PRELOAD=%s/%s" % (phone_data_dir, + asan_rt_library_names(abi)) ]) adb_cmd.extend([ "%s/mace_run" % phone_data_dir, @@ -705,7 +689,7 @@ def tuning_run(abi, "--cpu_affinity_policy=%s" % cpu_affinity_policy, "--gpu_perf_hint=%s" % gpu_perf_hint, "--gpu_priority_hint=%s" % gpu_priority_hint, - "--model_file=%s" % mace_model_path, + "--model_file=%s" % mace_model_phone_path, ]) adb_cmd = ' '.join(adb_cmd) p = sh.adb( @@ -756,7 +740,6 @@ def validate_model(abi, elif platform == "caffe": image_name = "mace-caffe:latest" container_name = "mace_caffe_validator" - res_file = "validation.result" if caffe_env == common.CaffeEnvType.LOCAL: import imp @@ -843,60 +826,51 @@ def validate_model(abi, print("Validation done!\n") -def build_production_code(model_load_type, abi): +def build_host_libraries(model_build_type, abi): + bazel_build("@com_google_protobuf//:protobuf_lite", abi=abi) + bazel_build("//mace/proto:mace_cc", abi=abi) bazel_build("//mace/codegen:generated_opencl", abi=abi) bazel_build("//mace/codegen:generated_tuning_params", abi=abi) - if abi == 'host': - if model_load_type == "source": - bazel_build( - "//mace/codegen:generated_models", - abi=abi) - else: - bazel_build("//mace/core:core", abi=abi) - bazel_build("//mace/ops:ops", abi=abi) + bazel_build("//mace/codegen:generated_version", abi=abi) + bazel_build("//mace/utils:utils", abi=abi) + bazel_build("//mace/core:core", abi=abi) + bazel_build("//mace/kernels:kernels", abi=abi) + bazel_build("//mace/ops:ops", abi=abi) + if model_build_type == BuildType.code: + bazel_build( + "//mace/codegen:generated_models", + abi=abi) def merge_libs(target_soc, abi, project_name, - libmace_output_dir, - model_output_dirs, - mace_model_dirs_kv, - model_load_type, - hexagon_mode, - embed_model_data): + build_output_dir, + library_output_dir, + model_build_type, + hexagon_mode): print("* Merge mace lib") - project_output_dir = "%s/%s" % (libmace_output_dir, project_name) + project_output_dir = "%s/%s" % (build_output_dir, project_name) model_header_dir = "%s/include/mace/public" % project_output_dir - model_data_dir = "%s/data" % project_output_dir hexagon_lib_file = "third_party/nnlib/libhexagon_controller.so" - model_bin_dir = "%s/%s/" % (project_output_dir, abi) - - if not os.path.exists(model_bin_dir): - sh.mkdir("-p", model_bin_dir) - if not os.path.exists(model_header_dir): - sh.mkdir("-p", model_header_dir) + model_bin_dir = "%s/%s/%s/" % (project_output_dir, library_output_dir, abi) + + if os.path.exists(model_bin_dir): + sh.rm("-rf", model_bin_dir) + sh.mkdir("-p", model_bin_dir) + if os.path.exists(model_header_dir): + sh.rm("-rf", model_header_dir) + sh.mkdir("-p", model_header_dir) + # copy header files sh.cp("-f", glob.glob("mace/public/*.h"), model_header_dir) - if not os.path.exists(model_data_dir): - sh.mkdir("-p", model_data_dir) if hexagon_mode: sh.cp("-f", hexagon_lib_file, model_bin_dir) - if model_load_type == "source": + if model_build_type == BuildType.code: sh.cp("-f", glob.glob("mace/codegen/engine/*.h"), model_header_dir) + sh.cp("-f", glob.glob("mace/codegen/models/*/*.h"), model_header_dir) - for model_output_dir in model_output_dirs: - if not embed_model_data: - sh.cp("-f", glob.glob("%s/*.data" % model_output_dir), - model_data_dir) - if model_load_type == "source": - sh.cp("-f", glob.glob("%s/*.h" % model_output_dir), - model_header_dir) - - for model_name in mace_model_dirs_kv: - sh.cp("-f", "%s/%s.pb" % (mace_model_dirs_kv[model_name], model_name), - model_data_dir) - + # make static library mri_stream = "" if abi == "host": mri_stream += "create %s/libmace_%s.a\n" % \ @@ -907,21 +881,39 @@ def merge_libs(target_soc, mri_stream += ( "addlib " "bazel-bin/mace/codegen/libgenerated_tuning_params.pic.a\n") - if model_load_type == "source": + mri_stream += ( + "addlib " + "bazel-bin/mace/codegen/libgenerated_version.pic.a\n") + mri_stream += ( + "addlib " + "bazel-bin/mace/core/libcore.pic.a\n") + mri_stream += ( + "addlib " + "bazel-bin/mace/kernels/libkernels.pic.a\n") + mri_stream += ( + "addlib " + "bazel-bin/mace/utils/libutils.pic.a\n") + mri_stream += ( + "addlib " + "bazel-bin/mace/proto/libmace_cc.pic.a\n") + mri_stream += ( + "addlib " + "bazel-bin/external/com_google_protobuf/libprotobuf_lite.pic.a\n") + mri_stream += ( + "addlib " + "bazel-bin/mace/ops/libops.pic.lo\n") + if model_build_type == BuildType.code: mri_stream += ( "addlib " "bazel-bin/mace/codegen/libgenerated_models.pic.a\n") - else: - mri_stream += ( - "addlib " - "bazel-bin/mace/core/libcore.pic.a\n") - mri_stream += ( - "addlib " - "bazel-bin/mace/ops/libops.pic.lo\n") else: - mri_stream += "create %s/libmace_%s.%s.a\n" % \ - (model_bin_dir, project_name, target_soc) - if model_load_type == "source": + if not target_soc: + mri_stream += "create %s/libmace_%s.a\n" % \ + (model_bin_dir, project_name) + else: + mri_stream += "create %s/libmace_%s.%s.a\n" % \ + (model_bin_dir, project_name, target_soc) + if model_build_type == BuildType.code: mri_stream += ( "addlib " "bazel-bin/mace/codegen/libgenerated_models.a\n") @@ -943,9 +935,6 @@ def merge_libs(target_soc, mri_stream += ( "addlib " "bazel-bin/mace/utils/libutils.a\n") - mri_stream += ( - "addlib " - "bazel-bin/mace/utils/libutils_prod.a\n") mri_stream += ( "addlib " "bazel-bin/mace/proto/libmace_cc.a\n") @@ -985,7 +974,7 @@ def packaging_lib(libmace_output_dir, project_name): "%s" % tar_package_path, glob.glob("%s/*" % project_dir), "--exclude", - "%s/build" % project_dir, + "%s/_tmp" % project_dir, _out=process_output, _bg=True, _err_to_out=True) @@ -994,21 +983,15 @@ def packaging_lib(libmace_output_dir, project_name): def build_benchmark_model(abi, - embed_model_data, model_output_dir, - model_tag, hexagon_mode): benchmark_binary_file = "%s/benchmark_model" % model_output_dir if os.path.exists(benchmark_binary_file): sh.rm("-rf", benchmark_binary_file) - if not embed_model_data: - sh.cp("-f", "mace/codegen/models/%s/%s.data" % (model_tag, model_tag), - model_output_dir) benchmark_target = "//mace/benchmark:benchmark_model" bazel_build(benchmark_target, abi=abi, - production_mode=True, hexagon_mode=hexagon_mode) target_bin = "/".join(bazel_target_to_bin(benchmark_target)) @@ -1017,6 +1000,7 @@ def build_benchmark_model(abi, def benchmark_model(abi, serialno, + benchmark_binary_dir, vlog_level, embed_model_data, model_output_dir, @@ -1028,6 +1012,7 @@ def benchmark_model(abi, model_tag, device_type, phone_data_dir, + build_type, omp_num_threads=-1, cpu_affinity_policy=1, gpu_perf_hint=3, @@ -1037,23 +1022,22 @@ def benchmark_model(abi, stdout_buff = [] process_output = make_output_processor(stdout_buff) + mace_model_path = "" + if build_type == BuildType.proto: + mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag) if abi == "host": - if mace_model_dir: - mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag) - else: - mace_model_path = "" p = subprocess.Popen( [ "env", "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level, - "%s/benchmark_model" % model_output_dir, + "%s/benchmark_model" % benchmark_binary_dir, "--model_name=%s" % model_tag, "--input_node=%s" % ",".join(input_nodes), "--output_node=%s" % ",".join(output_nodes), "--input_shape=%s" % ":".join(input_shapes), "--output_shape=%s" % ":".join(output_shapes), "--input_file=%s/%s" % (model_output_dir, input_file_name), - "--model_data_file=%s/%s.data" % (model_output_dir, model_tag), + "--model_data_file=%s/%s.data" % (mace_model_dir, model_tag), "--device=%s" % device_type, "--omp_num_threads=%s" % omp_num_threads, "--cpu_affinity_policy=%s" % cpu_affinity_policy, @@ -1072,18 +1056,17 @@ def benchmark_model(abi, input_name) adb_push("%s/%s" % (model_output_dir, formatted_name), phone_data_dir, serialno) - adb_push("%s/benchmark_model" % model_output_dir, phone_data_dir, - serialno) if not embed_model_data: - adb_push("%s/%s.data" % (model_output_dir, model_tag), + adb_push("%s/%s.data" % (mace_model_dir, model_tag), phone_data_dir, serialno) - if mace_model_dir: - mace_model_path = "%s/%s.pb" % (phone_data_dir, model_tag) - adb_push("%s/%s.pb" % (mace_model_dir, model_tag), - mace_model_path, + mace_model_phone_path = "" + if build_type == BuildType.proto: + mace_model_phone_path = "%s/%s.pb" % (phone_data_dir, model_tag) + adb_push(mace_model_path, + mace_model_phone_path, serialno) - else: - mace_model_path = "" + adb_push("%s/benchmark_model" % benchmark_binary_dir, phone_data_dir, + serialno) p = sh.adb( "-s", @@ -1108,7 +1091,7 @@ def benchmark_model(abi, "--cpu_affinity_policy=%s" % cpu_affinity_policy, "--gpu_perf_hint=%s" % gpu_perf_hint, "--gpu_priority_hint=%s" % gpu_priority_hint, - "--model_file=%s" % mace_model_path, + "--model_file=%s" % mace_model_phone_path, _out=process_output, _bg=True, _err_to_out=True) diff --git a/tools/validate.py b/tools/validate.py index 9cf89394..3864519d 100644 --- a/tools/validate.py +++ b/tools/validate.py @@ -36,6 +36,8 @@ import common # --input_shape 1,64,64,3 \ # --output_shape 1,64,64,2 +VALIDATION_MODULE = 'VALIDATION' + def load_data(file): if os.path.isfile(file): @@ -51,18 +53,21 @@ def compare_output(platform, device_type, output_name, mace_out_value, mace_out_value = mace_out_value.reshape(-1) assert len(out_value) == len(mace_out_value) similarity = (1 - spatial.distance.cosine(out_value, mace_out_value)) - print output_name, 'MACE VS', platform.upper( - ), 'similarity: ', similarity + common.MaceLogger.summary( + output_name + ' MACE VS ' + platform.upper() + + ' similarity: ' + str(similarity)) if (device_type == "CPU" and similarity > 0.999) or \ (device_type == "GPU" and similarity > 0.995) or \ (device_type == "HEXAGON" and similarity > 0.930): - print '===================Similarity Test Passed==================' + common.MaceLogger.summary( + common.StringFormatter.block("Similarity Test Passed")) else: - print '===================Similarity Test Failed==================' - sys.exit(-1) + common.MaceLogger.error( + "", common.StringFormatter.block("Similarity Test Failed")) else: - print '=======================Skip empty node===================' - sys.exit(-1) + common.MaceLogger.error( + "", common.StringFormatter.block( + "Similarity Test failed because of empty output")) def normalize_tf_tensor_name(name): @@ -76,8 +81,9 @@ def validate_tf_model(platform, device_type, model_file, input_file, mace_out_file, input_names, input_shapes, output_names): import tensorflow as tf if not os.path.isfile(model_file): - print("Input graph file '" + model_file + "' does not exist!") - sys.exit(-1) + common.MaceLogger.error( + VALIDATION_MODULE, + "Input graph file '" + model_file + "' does not exist!") tf.reset_default_graph() input_graph_def = tf.GraphDef() @@ -118,11 +124,13 @@ def validate_caffe_model(platform, device_type, model_file, input_file, os.environ['GLOG_minloglevel'] = '1' # suprress Caffe verbose prints import caffe if not os.path.isfile(model_file): - print("Input graph file '" + model_file + "' does not exist!") - sys.exit(-1) + common.MaceLogger.error( + VALIDATION_MODULE, + "Input graph file '" + model_file + "' does not exist!") if not os.path.isfile(weight_file): - print("Input weight file '" + weight_file + "' does not exist!") - sys.exit(-1) + common.MaceLogger.error( + VALIDATION_MODULE, + "Input weight file '" + weight_file + "' does not exist!") caffe.set_mode_cpu() -- GitLab