From c761a7baa60f19593ebc4d573d566c8ec7a4fb0a Mon Sep 17 00:00:00 2001 From: yejianwu Date: Mon, 21 May 2018 14:51:13 +0800 Subject: [PATCH] remove mace.pb.h deps in public api header, Reserve() for pb resource --- mace/benchmark/model_throughput_test.cc | 20 ++--- mace/benchmark/statistics.cc | 1 - mace/core/mace.cc | 9 ++- mace/core/runtime/cpu/cpu_runtime.cc | 2 + mace/proto/mace.proto | 6 -- mace/public/BUILD | 3 - mace/public/mace.h | 13 ++-- mace/python/tools/converter.py | 26 +++---- .../tools/converter_tool/base_converter.py | 8 +- .../tools/converter_tool/transformer.py | 17 +++-- .../python/tools/mace_engine_factory.h.jinja2 | 6 +- mace/python/tools/model.jinja2 | 75 +++++++++++-------- mace/python/tools/operator.jinja2 | 21 +++++- mace/python/tools/tensor_data.jinja2 | 6 ++ mace/python/tools/tensor_source.jinja2 | 1 + mace/test/mace_api_mt_test.cc | 28 ++++--- mace/test/mace_api_test.cc | 27 ++++--- mace/utils/BUILD | 1 - mace/utils/logging.h | 1 - tools/sh_commands.py | 5 -- 20 files changed, 151 insertions(+), 125 deletions(-) diff --git a/mace/benchmark/model_throughput_test.cc b/mace/benchmark/model_throughput_test.cc index fdcc03a0..fd19be5c 100644 --- a/mace/benchmark/model_throughput_test.cc +++ b/mace/benchmark/model_throughput_test.cc @@ -42,11 +42,11 @@ namespace mace { #ifdef MACE_CPU_MODEL_TAG namespace MACE_CPU_MODEL_TAG { -extern const unsigned char *LoadModelData(const std::string &model_data_file); +extern const unsigned char *LoadModelData(const char *model_data_file); extern void UnloadModelData(const unsigned char *model_data); -extern NetDef CreateNet(); +extern NetDef CreateNet(const unsigned char *model_data); extern const std::string ModelChecksum(); @@ -60,7 +60,7 @@ extern const unsigned char *LoadModelData(const char *model_data_file); extern void UnloadModelData(const unsigned char *model_data); -extern NetDef CreateNet(); +extern NetDef CreateNet(const unsigned char *model_data); extern const std::string ModelChecksum(); @@ -74,7 +74,7 @@ extern const unsigned char *LoadModelData(const char *model_data_file); extern void UnloadModelData(const unsigned char *model_data); -extern NetDef CreateNet(); +extern NetDef CreateNet(const unsigned char *model_data); extern const std::string ModelChecksum(); @@ -255,10 +255,10 @@ int Main(int argc, char **argv) { const unsigned char *cpu_model_data = mace::MACE_CPU_MODEL_TAG::LoadModelData( FLAGS_cpu_model_data_file.c_str()); - NetDef cpu_net_def = mace::MACE_CPU_MODEL_TAG::CreateNet(); + NetDef cpu_net_def = mace::MACE_CPU_MODEL_TAG::CreateNet(cpu_model_data); mace::MaceEngine cpu_engine(&cpu_net_def, DeviceType::CPU, input_names, - output_names, cpu_model_data); + output_names); LOG(INFO) << "CPU Warm up run"; t0 = NowMicros(); @@ -273,10 +273,10 @@ int Main(int argc, char **argv) { const unsigned char *gpu_model_data = mace::MACE_GPU_MODEL_TAG::LoadModelData( FLAGS_gpu_model_data_file.c_str()); - NetDef gpu_net_def = mace::MACE_GPU_MODEL_TAG::CreateNet(); + NetDef gpu_net_def = mace::MACE_GPU_MODEL_TAG::CreateNet(gpu_model_data); mace::MaceEngine gpu_engine(&gpu_net_def, DeviceType::GPU, input_names, - output_names, gpu_model_data); + output_names); mace::MACE_GPU_MODEL_TAG::UnloadModelData(gpu_model_data); LOG(INFO) << "GPU Warm up run"; @@ -292,10 +292,10 @@ int Main(int argc, char **argv) { const unsigned char *dsp_model_data = mace::MACE_DSP_MODEL_TAG::LoadModelData( FLAGS_dsp_model_data_file.c_str()); - NetDef dsp_net_def = mace::MACE_DSP_MODEL_TAG::CreateNet(); + NetDef dsp_net_def = mace::MACE_DSP_MODEL_TAG::CreateNet(dsp_model_data); mace::MaceEngine dsp_engine(&dsp_net_def, DeviceType::HEXAGON, input_names, - output_names, dsp_model_data); + output_names); mace::MACE_DSP_MODEL_TAG::UnloadModelData(dsp_model_data); LOG(INFO) << "DSP Warm up run"; diff --git a/mace/benchmark/statistics.cc b/mace/benchmark/statistics.cc index c221c3f3..ddc1c058 100644 --- a/mace/benchmark/statistics.cc +++ b/mace/benchmark/statistics.cc @@ -17,7 +17,6 @@ #include #include "mace/kernels/conv_pool_2d_util.h" -#include "mace/proto/mace.pb.h" #include "mace/utils/logging.h" #include "mace/utils/string_util.h" diff --git a/mace/core/mace.cc b/mace/core/mace.cc index bdf1fa89..e9e7eb87 100644 --- a/mace/core/mace.cc +++ b/mace/core/mace.cc @@ -323,11 +323,11 @@ MaceStatus CreateMaceEngineFromPB(const std::string &model_data_file, return MaceStatus::MACE_INVALID_ARGS; } - NetDef net_def; - net_def.ParseFromArray(&model_pb[0], model_pb.size()); + std::shared_ptr net_def(new NetDef()); + net_def->ParseFromArray(&model_pb[0], model_pb.size()); index_t model_data_size = 0; - for (auto &const_tensor : net_def.tensors()) { + for (auto &const_tensor : net_def->tensors()) { model_data_size = std::max( model_data_size, static_cast(const_tensor.offset() + @@ -340,7 +340,8 @@ MaceStatus CreateMaceEngineFromPB(const std::string &model_data_file, model_data = LoadModelData(model_data_file, model_data_size); engine->reset(new mace::MaceEngine(device_type)); - status = (*engine)->Init(&net_def, input_nodes, output_nodes, model_data); + status = (*engine)->Init( + net_def.get(), input_nodes, output_nodes, model_data); if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) { UnloadModelData(model_data, model_data_size); diff --git a/mace/core/runtime/cpu/cpu_runtime.cc b/mace/core/runtime/cpu/cpu_runtime.cc index 23de6795..1a070c8f 100644 --- a/mace/core/runtime/cpu/cpu_runtime.cc +++ b/mace/core/runtime/cpu/cpu_runtime.cc @@ -27,6 +27,7 @@ #include #include +#include "mace/core/macros.h" #include "mace/public/mace.h" #include "mace/public/mace_runtime.h" #include "mace/utils/logging.h" @@ -135,6 +136,7 @@ void SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads, << ", CPU core IDs: " << MakeString(cpu_ids); omp_set_num_threads(omp_num_threads); #else + MACE_UNUSED(omp_num_threads); LOG(WARNING) << "Set OpenMP threads number failed: OpenMP not enabled."; #endif diff --git a/mace/proto/mace.proto b/mace/proto/mace.proto index 934c63ce..393067dd 100644 --- a/mace/proto/mace.proto +++ b/mace/proto/mace.proto @@ -9,12 +9,6 @@ enum NetMode { NORMAL = 1; } -enum DeviceType { - CPU = 0; // In default, we will use CPU. - GPU = 2; - HEXAGON = 3; -} - enum DataType { DT_INVALID = 0; diff --git a/mace/public/BUILD b/mace/public/BUILD index d0fe0d94..3669d595 100644 --- a/mace/public/BUILD +++ b/mace/public/BUILD @@ -14,7 +14,4 @@ cc_library( "mace_runtime.h", ], copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], - deps = [ - "//mace/proto:mace_cc", - ], ) diff --git a/mace/public/mace.h b/mace/public/mace.h index 46cf4ac3..edc40153 100644 --- a/mace/public/mace.h +++ b/mace/public/mace.h @@ -24,10 +24,13 @@ #include #include -#include "mace/proto/mace.pb.h" - namespace mace { +class OutputShape; +class NetDef; + +enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3 }; + struct CallStats { int64_t start_micros; int64_t end_micros; @@ -111,12 +114,6 @@ class MaceEngine { MaceEngine &operator=(const MaceEngine &) = delete; }; -const unsigned char *LoadModelData(const std::string &model_data_file, - const size_t &data_size); - -void UnloadModelData(const unsigned char *model_data, - const size_t &data_size); - MaceStatus CreateMaceEngineFromPB(const std::string &model_data_file, const std::vector &input_nodes, const std::vector &output_nodes, diff --git a/mace/python/tools/converter.py b/mace/python/tools/converter.py index 4f65e971..8468847d 100644 --- a/mace/python/tools/converter.py +++ b/mace/python/tools/converter.py @@ -36,13 +36,13 @@ from mace.python.tools.convert_util import mace_check FLAGS = None -device_type_map = {'cpu': mace_pb2.CPU, - 'gpu': mace_pb2.GPU, - 'dsp': mace_pb2.HEXAGON} +device_type_map = {'cpu': cvt.DeviceType.CPU.value, + 'gpu': cvt.DeviceType.GPU.value, + 'dsp': cvt.DeviceType.HEXAGON.value} device_data_type_map = { - mace_pb2.CPU: mace_pb2.DT_FLOAT, - mace_pb2.GPU: mace_pb2.DT_HALF, - mace_pb2.HEXAGON: mace_pb2.DT_UINT8 + cvt.DeviceType.CPU.value: mace_pb2.DT_FLOAT, + cvt.DeviceType.GPU.value: mace_pb2.DT_HALF, + cvt.DeviceType.HEXAGON.value: mace_pb2.DT_UINT8 } @@ -131,8 +131,8 @@ def main(unused_args): print("Transform model to one that can better run on device") if not FLAGS.runtime: cpu_graph_def = copy.deepcopy(output_graph_def) - option.device = mace_pb2.CPU - option.data_type = device_data_type_map[mace_pb2.CPU] + option.device = cvt.DeviceType.CPU.value + option.data_type = device_data_type_map[cvt.DeviceType.CPU.value] option.disable_transpose_filters() mace_cpu_transformer = transformer.Transformer( option, cpu_graph_def) @@ -141,8 +141,8 @@ def main(unused_args): memory_optimizer.optimize_cpu_memory(cpu_graph_def) print "CPU memory optimization done." - option.device = mace_pb2.GPU - option.data_type = device_data_type_map[mace_pb2.GPU] + option.device = cvt.DeviceType.GPU.value + option.data_type = device_data_type_map[cvt.DeviceType.GPU.value] option.enable_transpose_filters() mace_gpu_transformer = transformer.Transformer( option, output_graph_def) @@ -180,9 +180,9 @@ def main(unused_args): if FLAGS.model_load_type == 'pb': with open(FLAGS.pb_output, "wb") as f: f.write(output_graph_def.SerializeToString()) - with open(FLAGS.pb_output + '_txt', "wb") as f: - # output_graph_def.ClearField('tensors') - f.write(str(output_graph_def)) + # with open(FLAGS.pb_output + '_txt', "wb") as f: + # # output_graph_def.ClearField('tensors') + # f.write(str(output_graph_def)) print("Model conversion is completed.") diff --git a/mace/python/tools/converter_tool/base_converter.py b/mace/python/tools/converter_tool/base_converter.py index 8819128a..709d1fee 100644 --- a/mace/python/tools/converter_tool/base_converter.py +++ b/mace/python/tools/converter_tool/base_converter.py @@ -18,6 +18,12 @@ from enum import Enum from mace.proto import mace_pb2 +class DeviceType(Enum): + CPU = 0 + GPU = 2 + HEXAGON = 3 + + class DataFormat(Enum): NHWC = 0 NCHW = 1 @@ -198,7 +204,7 @@ class ConverterOption(object): self._input_nodes = {} self._output_nodes = {} self._data_type = mace_pb2.DT_FLOAT - self._device = mace_pb2.CPU + self._device = DeviceType.CPU.value self._winograd_enabled = False self._transformer_option = [ TransformerRule.REMOVE_USELESS_RESHAPE_OP, diff --git a/mace/python/tools/converter_tool/transformer.py b/mace/python/tools/converter_tool/transformer.py index 825dd64f..4b7e098d 100644 --- a/mace/python/tools/converter_tool/transformer.py +++ b/mace/python/tools/converter_tool/transformer.py @@ -18,14 +18,15 @@ import numpy as np from mace.proto import mace_pb2 from mace.python.tools.converter_tool import base_converter -from mace.python.tools.converter_tool.base_converter import EltwiseType from mace.python.tools.converter_tool.base_converter import ActivationType -from mace.python.tools.converter_tool.base_converter import PaddingMode +from mace.python.tools.converter_tool.base_converter import ConverterUtil from mace.python.tools.converter_tool.base_converter import DataFormat +from mace.python.tools.converter_tool.base_converter import DeviceType +from mace.python.tools.converter_tool.base_converter import EltwiseType from mace.python.tools.converter_tool.base_converter import FilterFormat -from mace.python.tools.converter_tool.base_converter import MaceOp from mace.python.tools.converter_tool.base_converter import MaceKeyword -from mace.python.tools.converter_tool.base_converter import ConverterUtil +from mace.python.tools.converter_tool.base_converter import MaceOp +from mace.python.tools.converter_tool.base_converter import PaddingMode from mace.python.tools.converter_tool.base_converter import TransformerRule from mace.python.tools.convert_util import mace_check @@ -114,7 +115,7 @@ class Transformer(base_converter.ConverterInterface): self._producer = {} self._target_data_format = DataFormat.NHWC - if self._option.device == mace_pb2.CPU: + if self._option.device == DeviceType.CPU.value: self._target_data_format = DataFormat.NCHW def run(self): @@ -488,7 +489,7 @@ class Transformer(base_converter.ConverterInterface): net = self._model filter_format = self.filter_format() - if self._option.device == mace_pb2.GPU: + if self._option.device == DeviceType.GPU.value: for op in net.op: if op.type == MaceOp.Conv2D.name \ and self.check_if_gpu_use_winograd_conv(op): @@ -809,7 +810,7 @@ class Transformer(base_converter.ConverterInterface): op.input[input_idx] = output_name def transform_buffer_image(self): - if self._option.device != mace_pb2.GPU: + if self._option.device != DeviceType.GPU.value: return False print("Transform buffer to image") @@ -935,7 +936,7 @@ class Transformer(base_converter.ConverterInterface): def transform_global_conv_to_fc(self): """Transform global conv to fc should be placed after transposing input/output and filter""" - if self._option.device == mace_pb2.GPU: + if self._option.device == DeviceType.GPU.value: return False net = self._model diff --git a/mace/python/tools/mace_engine_factory.h.jinja2 b/mace/python/tools/mace_engine_factory.h.jinja2 index 2e923d68..b1c7879a 100644 --- a/mace/python/tools/mace_engine_factory.h.jinja2 +++ b/mace/python/tools/mace_engine_factory.h.jinja2 @@ -33,7 +33,7 @@ extern const unsigned char *LoadModelData(const std::string &model_data_file); extern void UnloadModelData(const unsigned char *model_data); -extern NetDef CreateNet(); +extern const std::shared_ptr CreateNet(); extern const std::string ModelName(); extern const std::string ModelChecksum(); @@ -63,7 +63,7 @@ MaceStatus CreateMaceEngineFromCode( return MaceStatus::MACE_INVALID_ARGS; } const unsigned char * model_data = nullptr; - NetDef net_def; + std::shared_ptr net_def; MaceStatus status = MaceStatus::MACE_SUCCESS; switch (model_name_map[model_name]) { {% for i in range(model_tags |length) %} @@ -72,7 +72,7 @@ MaceStatus CreateMaceEngineFromCode( mace::{{model_tags[i]}}::LoadModelData(model_data_file); net_def = mace::{{model_tags[i]}}::CreateNet(); engine->reset(new mace::MaceEngine(device_type)); - status = (*engine)->Init(&net_def, input_nodes, output_nodes, model_data); + status = (*engine)->Init(net_def.get(), input_nodes, output_nodes, model_data); if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) { mace::{{model_tags[i]}}::UnloadModelData(model_data); } diff --git a/mace/python/tools/model.jinja2 b/mace/python/tools/model.jinja2 index 122404cc..ea57053e 100644 --- a/mace/python/tools/model.jinja2 +++ b/mace/python/tools/model.jinja2 @@ -18,6 +18,7 @@ #include #include "mace/core/macros.h" +#include "mace/proto/mace.pb.h" #include "mace/public/mace.h" #include "mace/utils/env_time.h" #include "mace/utils/logging.h" @@ -40,33 +41,37 @@ extern void CreateOperator{{i}}(mace::OperatorDef *op); namespace { {% if net.arg|length != 0 %} -void CreateNetArg(mace::NetDef &net_def) { +void CreateNetArg(NetDef *net_def) { + net_def->mutable_arg()->Reserve({{ net.arg|length }}); mace::Argument *arg = nullptr; - {% for arg in net.arg %} + {% for i in range(net.arg|length) %} - arg = net_def.add_arg(); - arg->set_name({{ arg.name|tojson }}); + arg = net_def->add_arg(); + arg->set_name({{ net.arg[i].name|tojson }}); - {%- if arg.HasField('f') %} - arg->set_f({{ arg.f }}); + {%- if net.arg[i].HasField('f') %} + arg->set_f({{ net.arg[i].f }}); {% endif %} - {%- if arg.HasField('i') %} - arg->set_i({{ arg.i }}); + {%- if net.arg[i].HasField('i') %} + arg->set_i({{ net.arg[i].i }}); {% endif %} - {%- if arg.HasField('s') %} - arg->set_s({{ arg.s|tojson }}); + {%- if net.arg[i].HasField('s') %} + arg->set_s({{ net.arg[i].s|tojson }}); {% endif %} - {% for float_value in arg.floats %} - arg->add_floats({ {{ float_value }} }); + arg->mutable_floats()->Reserve({{ net.arg[i].floats|length }}); + {% for float_value in net.arg[i].floats %} + arg->add_floats({{ float_value }}); {% endfor %} - {% for int_value in arg.ints %} - arg->add_ints({ {{ int_value }} }); + arg->mutable_ints()->Reserve({{ net.arg[i].ints|length }}); + {% for int_value in net.arg[i].ints %} + arg->add_ints({{ int_value }}); {% endfor %} - {% for str_value in arg.strings %} - arg->add_strings({ {{ str_value }} }); + arg->mutable_strings()->Reserve({{ net.arg[i].strings|length }}); + {% for str_value in net.arg[i].strings %} + arg->add_strings({{ str_value }}); {% endfor %} {% endfor %} @@ -74,7 +79,7 @@ void CreateNetArg(mace::NetDef &net_def) { {% endif %} {% if net.output_info | length > 0 %} -void CreateOutputInfo(mace::NetDef &net_def) { +void CreateOutputInfo(NetDef *net_def) { std::vector> dims { {{net.output_info | map(attribute='dims') | join(', ') | replace('[', '{') | replace(']', '}') }} }; std::vector data_types_int { {{ net.output_info | map(attribute='data_type') | join(', ') }} }; @@ -82,27 +87,31 @@ void CreateOutputInfo(mace::NetDef &net_def) { for (int k = 0; k < {{ net.output_info | length }}; ++k) { data_types[k] = static_cast(data_types_int[k]); } + net_def->mutable_output_info()->Reserve({{ net.output_info | length }}); for (int i = 0; i < {{ net.output_info | length }}; ++i) { - net_def.add_output_info(); - net_def.mutable_output_info(i)->set_data_type(data_types[i]); - for (int j = 0; j < (int)dims[i].size(); ++j) { - net_def.mutable_output_info(i)->add_dims(dims[i][j]); + auto output_info = net_def->add_output_info(); + output_info->set_data_type(data_types[i]); + output_info->mutable_dims()->Reserve(dims[i].size()); + for (size_t j = 0; j < dims[i].size(); ++j) { + output_info->add_dims(dims[i][j]); } } } {% endif %} -void CreateOperators(mace::NetDef *net_def) { +void CreateOperators(NetDef *net_def) { MACE_LATENCY_LOGGER(1, "Create operators"); + net_def->mutable_op()->Reserve({{ net.op|length }}); {% for i in range(net.op|length) %} mace::{{tag}}::CreateOperator{{i}}(net_def->add_op()); {% endfor %} } -void CreateTensors(mace::NetDef *net_def) { +void CreateTensors(NetDef *net_def) { MACE_LATENCY_LOGGER(1, "Create tensors"); + net_def->mutable_tensors()->Reserve({{ tensors|length }}); {% for tensor in tensors %} mace::{{tag}}::CreateTensor{{tensor.id}}(net_def->add_tensors()); {% endfor %} @@ -110,6 +119,7 @@ void CreateTensors(mace::NetDef *net_def) { {% if net.mem_arena.mem_block|length != 0 %} void CreateMemoryArena(mace::MemoryArena *mem_arena) { + mem_arena->mutable_mem_block()->Reserve({{ net.mem_arena.mem_block|length }}); {% for i in range(net.mem_arena.mem_block|length) %} mace::MemoryBlock* mem_block{{i}} = mem_arena->add_mem_block(); @@ -127,25 +137,24 @@ void CreateMemoryArena(mace::MemoryArena *mem_arena) { namespace {{tag}} { -NetDef CreateNet() { +const std::shared_ptr CreateNet() { MACE_LATENCY_LOGGER(1, "Create net {{ net.name }}"); - NetDef net_def; + std::shared_ptr net_def(new NetDef()); + net_def->set_name("{{ net.name}}"); + net_def->set_version("{{ net.version }}"); - net_def.set_name("{{ net.name}}"); - net_def.set_version("{{ net.version }}"); - - CreateOperators(&net_def); - CreateTensors(&net_def); + CreateOperators(net_def.get()); + CreateTensors(net_def.get()); {% if net.arg|length != 0 %} - CreateNetArg(net_def); + CreateNetArg(net_def.get()); {% endif %} {% if net.mem_arena.mem_block|length != 0 %} - CreateMemoryArena(net_def.mutable_mem_arena()); + CreateMemoryArena(net_def->mutable_mem_arena()); {% endif %} {% if net.output_info | length > 0 %} - CreateOutputInfo(net_def); + CreateOutputInfo(net_def.get()); {% endif %} return net_def; diff --git a/mace/python/tools/operator.jinja2 b/mace/python/tools/operator.jinja2 index fb1a0cc9..bd7e36eb 100644 --- a/mace/python/tools/operator.jinja2 +++ b/mace/python/tools/operator.jinja2 @@ -17,6 +17,7 @@ #include #include +#include "mace/proto/mace.pb.h" #include "mace/public/mace.h" #include "mace/utils/env_time.h" #include "mace/utils/logging.h" @@ -36,15 +37,19 @@ void UpdateOp(mace::OperatorDef *op, op->set_type(type); op->set_node_id(node_id); + op->mutable_input()->Reserve(inputs.size()); for (auto input : inputs) { op->add_input(input); } + op->mutable_output()->Reserve(outputs.size()); for (auto output : outputs) { op->add_output(output); } + op->mutable_output_type()->Reserve(output_types.size()); for (auto output_type : output_types) { op->add_output_type(output_type); } + op->mutable_mem_id()->Reserve(mem_ids.size()); for (auto mem_id : mem_ids) { op->add_mem_id(mem_id); } @@ -62,6 +67,7 @@ void CreateOperator{{i}}(mace::OperatorDef *op) { MACE_LATENCY_LOGGER(2, "Create operator {{ net.op[i].name }}"); mace::Argument *arg = nullptr; + op->mutable_arg()->Reserve({{ net.op[i].arg|length }}); {% for arg in net.op[i].arg %} arg = op->add_arg(); @@ -77,23 +83,28 @@ void CreateOperator{{i}}(mace::OperatorDef *op) { arg->set_s({{ arg.s|tojson }}); {%- endif %} + arg->mutable_floats()->Reserve({{ arg.floats|length }}); {% for float_value in arg.floats %} arg->add_floats({{ float_value }}); {% endfor %} + arg->mutable_ints()->Reserve({{ arg.ints|length }}); {% for int_value in arg.ints %} arg->add_ints({{ int_value }}); {% endfor %} + arg->mutable_strings()->Reserve({{ arg.strings|length }}); {% for str_value in arg.strings %} arg->add_strings({{ str_value }}); {% endfor %} {% endfor %} - {% for shape in net.op[i].output_shape %} - {% if shape.dims | length > 0 %} + op->mutable_output_shape()->Reserve({{ net.op[i].output_shape|length }}); mace::OutputShape * output_shape = nullptr; + {% for shape in net.op[i].output_shape %} + {% if shape.dims|length > 0 %} output_shape = op->add_output_shape(); + output_shape->mutable_dims()->Reserve({{ shape.dims|length }}); {% for dim in shape.dims %} output_shape->add_dims({{ dim }}); {% endfor %} @@ -120,7 +131,8 @@ void CreateOperator{{i}}(mace::OperatorDef *op) { std::vector input_output_ports({ {{ net.op[i].node_input | map(attribute='output_port') | join(', ')}} }); mace::NodeInput *node_input = nullptr; - for (size_t i = 0; i < {{ net.op[i].node_input | length }}; ++i) { + op->mutable_node_input()->Reserve({{ net.op[i].node_input|length }}); + for (size_t i = 0; i < {{ net.op[i].node_input|length }}; ++i) { node_input = op->add_node_input(); node_input->set_node_id(input_node_ids[i]); node_input->set_output_port(input_output_ports[i]); @@ -129,7 +141,8 @@ void CreateOperator{{i}}(mace::OperatorDef *op) { {% if net.op[i].out_max_byte_size | length > 0 %} std::vector out_max_byte_sizes {{ net.op[i].out_max_byte_size | replace('[', '{') | replace(']', '}') }}; - for (size_t i = 0; i < {{ net.op[i].out_max_byte_size | length }}; ++i) { + op->mutable_out_max_byte_size()->Reserve({{ net.op[i].out_max_byte_size|length }}); + for (size_t i = 0; i < {{ net.op[i].out_max_byte_size|length }}; ++i) { op->add_out_max_byte_size(out_max_byte_sizes[i]); } {% endif %} diff --git a/mace/python/tools/tensor_data.jinja2 b/mace/python/tools/tensor_data.jinja2 index 5520ce05..ce3ceb81 100644 --- a/mace/python/tools/tensor_data.jinja2 +++ b/mace/python/tools/tensor_data.jinja2 @@ -23,6 +23,12 @@ #include "mace/utils/logging.h" namespace mace { + +extern const unsigned char *LoadModelData(const std::string &model_data_file, + const size_t &data_size); +extern void UnloadModelData(const unsigned char *model_data, + const size_t &data_size); + namespace {{tag}} { {% if embed_model_data %} diff --git a/mace/python/tools/tensor_source.jinja2 b/mace/python/tools/tensor_source.jinja2 index e97e46f5..e982e41f 100644 --- a/mace/python/tools/tensor_source.jinja2 +++ b/mace/python/tools/tensor_source.jinja2 @@ -17,6 +17,7 @@ #include #include +#include "mace/proto/mace.pb.h" #include "mace/public/mace.h" #include "mace/utils/env_time.h" #include "mace/utils/logging.h" diff --git a/mace/test/mace_api_mt_test.cc b/mace/test/mace_api_mt_test.cc index 5d2b018c..ab4317d4 100644 --- a/mace/test/mace_api_mt_test.cc +++ b/mace/test/mace_api_mt_test.cc @@ -69,6 +69,7 @@ void BufferToImage(const std::string &input_name, .AddIntArg("mode", mode) .Finalize(&operator_def); + operator_def.mutable_mem_id()->Reserve(mem_ids.size()); for (auto mem_id : mem_ids) { operator_def.add_mem_id(mem_id); } @@ -113,6 +114,7 @@ void Conv3x3(const std::string &input_name, .AddIntArg("device", static_cast(device_type)) .Finalize(&operator_def); + operator_def.mutable_mem_id()->Reserve(mem_ids.size()); for (auto mem_id : mem_ids) { operator_def.add_mem_id(mem_id); } @@ -144,6 +146,7 @@ void AddTensor(const std::string &name, NetDef *net_def) { ConstTensor *tensor_ptr = net_def->add_tensors(); tensor_ptr->set_name(name); + tensor_ptr->mutable_dims()->Reserve(shape.size()); for (auto dim : shape) { tensor_ptr->add_dims(dim); } @@ -225,8 +228,10 @@ std::map AddMemoryOptimization( input_shapes[i][1]); } size_t input_size = input_names.size(); + size_t output_size = output_names.size(); + MemoryArena *mem_arena_ptr = net_def->mutable_mem_arena(); + mem_arena_ptr->mutable_mem_block()->Reserve(input_size + output_size); for (size_t i = 0; i < input_size; ++i) { - MemoryArena *mem_arena_ptr = net_def->mutable_mem_arena(); MemoryBlock *mem_blk_ptr = mem_arena_ptr->add_mem_block(); mem_blk_ptr->set_mem_id(mem_id); mem_blk_ptr->set_x(in_mem_block_x); @@ -245,9 +250,7 @@ std::map AddMemoryOptimization( output_shapes[i][0] * output_shapes[i][1]); } - size_t output_size = output_names.size(); for (size_t i = 0; i < output_size; ++i) { - MemoryArena *mem_arena_ptr = net_def->mutable_mem_arena(); MemoryBlock *mem_blk_ptr = mem_arena_ptr->add_mem_block(); mem_blk_ptr->set_mem_id(mem_id); mem_blk_ptr->set_x(out_mem_block_x); @@ -275,16 +278,17 @@ void MaceRunFunc(const int in_out_size) { const std::vector> output_shapes = {{1, 32, 32, 16}}; const std::vector filter_shape = {16, 16, 3, 3}; - NetDef net_def; + std::shared_ptr net_def(new NetDef()); // Add memory optimization auto mem_map = AddMemoryOptimization(input_names, output_names, input_shapes, output_shapes, - &net_def); + net_def.get()); std::vector data; ops::test::GenerateRandomRealTypeData(filter_shape, &data); - AddTensor(filter_tensor_name, filter_shape, 0, data.size(), &net_def); + AddTensor( + filter_tensor_name, filter_shape, 0, data.size(), net_def.get()); for (size_t i = 0; i < input_names.size(); ++i) { std::string input_name = MakeString("mace_input_node_", @@ -293,16 +297,16 @@ void MaceRunFunc(const int in_out_size) { mace::kernels::IN_OUT_CHANNEL, {mem_map[input_names[i]]}, device, - &net_def); + net_def.get()); } BufferToImage(filter_tensor_name, filter_tensor_img_name, mace::kernels::CONV2D_FILTER, {}, device, - &net_def, NetMode::INIT); + net_def.get(), NetMode::INIT); for (size_t i = 0; i < output_names.size(); ++i) { Conv3x3(input_names[i], filter_tensor_img_name, output_names[i], {mem_map[output_names[i]]}, device, - &net_def); + net_def.get()); } for (size_t i = 0; i < output_names.size(); ++i) { std::string output_name = MakeString("mace_output_node_", @@ -310,7 +314,7 @@ void MaceRunFunc(const int in_out_size) { ImageToBuffer(output_names[i], output_name, mace::kernels::IN_OUT_CHANNEL, device, - &net_def); + net_def.get()); } const std::string file_path ="/data/local/tmp/mace"; @@ -319,7 +323,7 @@ void MaceRunFunc(const int in_out_size) { mace::SetKVStorageFactory(storage_factory); MaceEngine engine(device); - MaceStatus status = engine.Init(&net_def, input_names, output_names, + MaceStatus status = engine.Init(net_def.get(), input_names, output_names, reinterpret_cast(data.data())); ASSERT_EQ(status, MaceStatus::MACE_SUCCESS); @@ -337,7 +341,7 @@ void MaceRunFunc(const int in_out_size) { } } - CheckOutputs(net_def, inputs, outputs, data); + CheckOutputs(*net_def, inputs, outputs, data); } } // namespace diff --git a/mace/test/mace_api_test.cc b/mace/test/mace_api_test.cc index df8270cb..f061ecc3 100644 --- a/mace/test/mace_api_test.cc +++ b/mace/test/mace_api_test.cc @@ -79,6 +79,7 @@ void BufferToImage(const std::string &input_name, .AddIntArg("mode", mode) .Finalize(&operator_def); + operator_def.mutable_mem_id()->Reserve(mem_ids.size()); for (auto mem_id : mem_ids) { operator_def.add_mem_id(mem_id); } @@ -124,6 +125,7 @@ void Conv3x3(const std::string &input_name, .AddIntArg("device", static_cast(device_type)) .Finalize(&operator_def); + operator_def.mutable_mem_id()->Reserve(mem_ids.size()); for (auto mem_id : mem_ids) { operator_def.add_mem_id(mem_id); } @@ -155,6 +157,7 @@ void AddTensor(const std::string &name, NetDef *net_def) { ConstTensor *tensor_ptr = net_def->add_tensors(); tensor_ptr->set_name(name); + tensor_ptr->mutable_dims()->Reserve(shape.size()); for (auto dim : shape) { tensor_ptr->add_dims(dim); } @@ -236,8 +239,10 @@ std::map AddMemoryOptimization( input_shapes[i][1]); } size_t input_size = input_names.size(); + size_t output_size = output_names.size(); + MemoryArena *mem_arena_ptr = net_def->mutable_mem_arena(); + mem_arena_ptr->mutable_mem_block()->Reserve(input_size + output_size); for (size_t i = 0; i < input_size; ++i) { - MemoryArena *mem_arena_ptr = net_def->mutable_mem_arena(); MemoryBlock *mem_blk_ptr = mem_arena_ptr->add_mem_block(); mem_blk_ptr->set_mem_id(mem_id); mem_blk_ptr->set_x(in_mem_block_x); @@ -256,9 +261,7 @@ std::map AddMemoryOptimization( output_shapes[i][0] * output_shapes[i][1]); } - size_t output_size = output_names.size(); for (size_t i = 0; i < output_size; ++i) { - MemoryArena *mem_arena_ptr = net_def->mutable_mem_arena(); MemoryBlock *mem_blk_ptr = mem_arena_ptr->add_mem_block(); mem_blk_ptr->set_mem_id(mem_id); mem_blk_ptr->set_x(out_mem_block_x); @@ -286,16 +289,16 @@ void MaceRun(const int in_out_size, const DeviceType device = DeviceType::GPU; - NetDef net_def; + std::shared_ptr net_def(new NetDef()); // Add memory optimization auto mem_map = AddMemoryOptimization(input_names, output_names, input_shapes, output_shapes, - &net_def); + net_def.get()); std::vector data; ops::test::GenerateRandomRealTypeData(filter_shape, &data); - AddTensor(filter_tensor_name, filter_shape, 0, data.size(), &net_def); + AddTensor(filter_tensor_name, filter_shape, 0, data.size(), net_def.get()); for (size_t i = 0; i < input_names.size(); ++i) { std::string input_name = MakeString("mace_input_node_", @@ -304,15 +307,15 @@ void MaceRun(const int in_out_size, mace::kernels::IN_OUT_CHANNEL, {mem_map[input_names[i]]}, device, - &net_def); + net_def.get()); } BufferToImage(filter_tensor_name, filter_tensor_img_name, mace::kernels::CONV2D_FILTER, {}, device, - &net_def, NetMode::INIT); + net_def.get(), NetMode::INIT); for (size_t i = 0; i < output_names.size(); ++i) { Conv3x3(input_names[i], filter_tensor_img_name, output_names[i], {mem_map[output_names[i]]}, - device, &net_def); + device, net_def.get()); } for (size_t i = 0; i < output_names.size(); ++i) { std::string output_name = MakeString("mace_output_node_", @@ -320,11 +323,11 @@ void MaceRun(const int in_out_size, ImageToBuffer(output_names[i], output_name, mace::kernels::IN_OUT_CHANNEL, device, - &net_def); + net_def.get()); } MaceEngine engine(device); - MaceStatus status = engine.Init(&net_def, input_names, output_names, + MaceStatus status = engine.Init(net_def.get(), input_names, output_names, reinterpret_cast(data.data())); ASSERT_EQ(status, MaceStatus::MACE_SUCCESS); @@ -342,7 +345,7 @@ void MaceRun(const int in_out_size, } } - CheckOutputs(net_def, inputs, outputs, data); + CheckOutputs(*net_def, inputs, outputs, data); } } // namespace diff --git a/mace/utils/BUILD b/mace/utils/BUILD index 140d1103..76e8864a 100644 --- a/mace/utils/BUILD +++ b/mace/utils/BUILD @@ -31,7 +31,6 @@ cc_library( copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], deps = [ "//mace/public", - "//mace/proto:mace_cc", ], ) diff --git a/mace/utils/logging.h b/mace/utils/logging.h index 34cf5d8f..1f540fa6 100644 --- a/mace/utils/logging.h +++ b/mace/utils/logging.h @@ -21,7 +21,6 @@ #include #include -#include "mace/proto/mace.pb.h" #include "mace/public/mace.h" #include "mace/utils/env_time.h" #include "mace/utils/string_util.h" diff --git a/tools/sh_commands.py b/tools/sh_commands.py index a74cf864..4de17ff7 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -865,7 +865,6 @@ def merge_libs(target_soc, model_data_dir = "%s/data" % project_output_dir hexagon_lib_file = "mace/third_party/nnlib/libhexagon_controller.so" model_bin_dir = "%s/%s/" % (project_output_dir, abi) - mace_proto_dir = "%s/proto" % project_output_dir if not os.path.exists(model_bin_dir): sh.mkdir("-p", model_bin_dir) @@ -874,8 +873,6 @@ def merge_libs(target_soc, sh.cp("-f", glob.glob("mace/public/*.h"), model_header_dir) if not os.path.exists(model_data_dir): sh.mkdir("-p", model_data_dir) - if not os.path.exists(mace_proto_dir): - sh.mkdir("-p", mace_proto_dir) if hexagon_mode: sh.cp("-f", hexagon_lib_file, model_bin_dir) @@ -894,8 +891,6 @@ def merge_libs(target_soc, sh.cp("-f", "%s/%s.pb" % (mace_model_dirs_kv[model_name], model_name), model_data_dir) - sh.cp("-f", "mace/proto/mace.proto", mace_proto_dir) - mri_stream = "" if abi == "host": mri_stream += "create %s/libmace_%s.a\n" % \ -- GitLab