diff --git a/mace/benchmark/BUILD b/mace/benchmark/BUILD index d38448ec5cea4381646d0e5fb57bfe4a4e411011..4bbb48e0ec267d7f9e06fb078268e8a78f6cc11d 100644 --- a/mace/benchmark/BUILD +++ b/mace/benchmark/BUILD @@ -16,6 +16,7 @@ cc_library( srcs = ["statistics.cc"], hdrs = ["statistics.h"], linkstatic = 1, + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], deps = [ "//mace/kernels", "//mace/utils", @@ -27,7 +28,11 @@ cc_binary( srcs = [ "benchmark_model.cc", ], - copts = if_android(["-DMACE_ENABLE_OPENCL"]), + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ] + if_android(["-DMACE_ENABLE_OPENCL"]), linkopts = if_openmp_enabled(["-fopenmp"]), linkstatic = 1, deps = [ @@ -51,6 +56,7 @@ cc_binary( srcs = ["model_throughput_test.cc"], linkopts = if_openmp_enabled(["-fopenmp"]), linkstatic = 1, + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], deps = [ ":libmace_merged", "//external:gflags_nothreads", diff --git a/mace/benchmark/benchmark_model.cc b/mace/benchmark/benchmark_model.cc index 07a92d2efdd133ecb77c13b2297dac0ee5586e26..7317b8bef6447eb64b6c3ddb55ad0e4509abfc29 100644 --- a/mace/benchmark/benchmark_model.cc +++ b/mace/benchmark/benchmark_model.cc @@ -24,6 +24,7 @@ #include "mace/public/mace.h" #include "mace/public/mace_runtime.h" #include "mace/utils/logging.h" +#include "mace/utils/utils.h" #include "mace/benchmark/statistics.h" #include "mace/codegen/engine/mace_engine_factory.h" @@ -189,6 +190,8 @@ DEFINE_string(max_time, "10.0", "length to run max"); DEFINE_int32(warmup_runs, 1, "how many runs to initialize model"); DEFINE_string(model_data_file, "", "model data file name, used when EMBED_MODEL_DATA set to 0"); +DEFINE_string(model_file, "", + "model file name, used when load mace model in pb"); DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(omp_num_threads, -1, "num of openmp threads"); @@ -202,6 +205,7 @@ int Main(int argc, char **argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); LOG(INFO) << "Model name: [" << FLAGS_model_name << "]"; + LOG(INFO) << "Model_file: " << FLAGS_model_file; LOG(INFO) << "Device: [" << FLAGS_device << "]"; LOG(INFO) << "gpu_perf_hint: [" << FLAGS_gpu_perf_hint << "]"; LOG(INFO) << "gpu_priority_hint: [" << FLAGS_gpu_priority_hint << "]"; @@ -268,22 +272,28 @@ int Main(int argc, char **argv) { std::shared_ptr engine; MaceStatus create_engine_status; // Create Engine - if (FLAGS_model_data_file.empty()) { + const char *model_data_file_ptr = + FLAGS_model_data_file.empty() ? nullptr : FLAGS_model_data_file.c_str(); + if (FLAGS_model_file != "") { + std::vector model_pb_data; + if (!mace::ReadBinaryFile(&model_pb_data, FLAGS_model_file)) { + LOG(FATAL) << "Failed to read file: " << FLAGS_model_file; + } create_engine_status = - CreateMaceEngine(FLAGS_model_name.c_str(), - nullptr, - input_names, - output_names, - device_type, - &engine); + CreateMaceEngineFromProto(model_pb_data, + model_data_file_ptr, + input_names, + output_names, + device_type, + &engine); } else { create_engine_status = - CreateMaceEngine(FLAGS_model_name.c_str(), - FLAGS_model_data_file.c_str(), - input_names, - output_names, - device_type, - &engine); + CreateMaceEngineFromCode(FLAGS_model_name, + model_data_file_ptr, + input_names, + output_names, + device_type, + &engine); } if (create_engine_status != MaceStatus::MACE_SUCCESS) { LOG(FATAL) << "Create engine error, please check the arguments"; diff --git a/mace/benchmark/statistics.cc b/mace/benchmark/statistics.cc index 278c88d7cd3a231a1ef3f6093c5c843e77c94526..ddc1c058aaabe996658dc0ffdfa4734347ffe1c6 100644 --- a/mace/benchmark/statistics.cc +++ b/mace/benchmark/statistics.cc @@ -17,7 +17,6 @@ #include #include "mace/kernels/conv_pool_2d_util.h" -#include "mace/public/mace_types.h" #include "mace/utils/logging.h" #include "mace/utils/string_util.h" @@ -59,10 +58,10 @@ std::string ShapeToString(const std::vector &output_shape) { std::stringstream stream; stream << "["; for (size_t i = 0; i < output_shape.size(); ++i) { - const std::vector &dims = output_shape[i].dims(); - for (size_t j = 0; j < dims.size(); ++j) { - stream << dims[j]; - if (j != dims.size() - 1) { + size_t dims_size = output_shape[i].dims_size(); + for (size_t j = 0; j < dims_size; ++j) { + stream << output_shape[i].dims(j); + if (j != dims_size - 1) { stream << ","; } } diff --git a/mace/codegen/BUILD b/mace/codegen/BUILD index be0978a72ad03235e53ab7e176b34e5081583bf4..16f09ac66904384c143d1d5826be3e6fdc44a2fa 100644 --- a/mace/codegen/BUILD +++ b/mace/codegen/BUILD @@ -10,6 +10,7 @@ cc_library( srcs = glob(["models/*/*.cc"]), hdrs = glob(["models/*/*.h"]), linkstatic = 1, + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], deps = [ "//mace/core", "//mace/ops", @@ -19,24 +20,28 @@ cc_library( cc_library( name = "generated_opencl", srcs = glob(["opencl/*.cc"]), + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], linkstatic = 1, ) cc_library( name = "generated_tuning_params", srcs = ["tuning/tuning_params.cc"], + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], linkstatic = 1, ) cc_library( name = "generated_version", srcs = ["version/version.cc"], + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], linkstatic = 1, ) cc_library( name = "generated_mace_engine_factory", hdrs = ["engine/mace_engine_factory.h"], + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], deps = [ "//mace/public", ], diff --git a/mace/core/BUILD b/mace/core/BUILD index d1fdd7acb888fa2efa341955994d8b31c38f5137..9917dfe3254c11df195187511deb1f1afacd7df2 100644 --- a/mace/core/BUILD +++ b/mace/core/BUILD @@ -43,7 +43,11 @@ cc_library( "runtime/opencl/*.h", ], )) + if_hexagon_enabled(glob(["runtime/hexagon/*.h"])), - copts = if_openmp_enabled([ + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ] + if_openmp_enabled([ "-fopenmp", "-DMACE_ENABLE_OPENMP", ]) + if_android([ @@ -58,8 +62,9 @@ cc_library( "-lm", ]), deps = [ - "//mace/utils", "//mace/codegen:generated_version", + "//mace/proto:mace_cc", + "//mace/utils", ] + if_android([ ":opencl_headers", "//mace/codegen:generated_opencl", @@ -79,6 +84,7 @@ cc_library( hdrs = glob([ "runtime/opencl/*.h", ]), + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], deps = [ "@opencl_clhpp//:opencl_clhpp", "@opencl_headers//:opencl20_headers", @@ -95,6 +101,7 @@ cc_library( hdrs = [ "testing/test_benchmark.h", ], + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], deps = [ ":core", "//external:gflags_nothreads", diff --git a/mace/core/arg_helper.h b/mace/core/arg_helper.h index 6b3cb1cc2314ff798f4f484e0bbc878a30258692..afbd3b331ca8148273a9474524bfa1926c612e33 100644 --- a/mace/core/arg_helper.h +++ b/mace/core/arg_helper.h @@ -19,8 +19,8 @@ #include #include +#include "mace/proto/mace.pb.h" #include "mace/public/mace.h" -#include "mace/public/mace_types.h" namespace mace { diff --git a/mace/core/mace.cc b/mace/core/mace.cc index 83ab4bd19ae48debffd5585fe9035a150ac24dac..b2a084548d334e53329cc5a4f6d4264771ea9ac0 100644 --- a/mace/core/mace.cc +++ b/mace/core/mace.cc @@ -12,6 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include +#include +#include +#include + #include #include "mace/core/net.h" @@ -88,7 +94,8 @@ class MaceEngine::Impl { MaceStatus Init(const NetDef *net_def, const std::vector &input_nodes, - const std::vector &output_nodes); + const std::vector &output_nodes, + const unsigned char *model_data); MaceStatus Run(const std::map &inputs, std::map *outputs, @@ -106,7 +113,6 @@ class MaceEngine::Impl { DISABLE_COPY_AND_ASSIGN(Impl); }; - MaceEngine::Impl::Impl(DeviceType device_type) : op_registry_(new OperatorRegistry()), device_type_(device_type), @@ -120,7 +126,8 @@ MaceEngine::Impl::Impl(DeviceType device_type) MaceStatus MaceEngine::Impl::Init( const NetDef *net_def, const std::vector &input_nodes, - const std::vector &output_nodes) { + const std::vector &output_nodes, + const unsigned char *model_data) { LOG(INFO) << "MACE version: " << MaceVersion(); // Set storage path for internal usage for (auto input_name : input_nodes) { @@ -141,14 +148,15 @@ MaceStatus MaceEngine::Impl::Init( int dsp_mode = ArgumentHelper::GetSingleArgument(*net_def, "dsp_mode", 0); hexagon_controller_->SetGraphMode(dsp_mode); - MACE_CHECK(hexagon_controller_->SetupGraph(*net_def), + MACE_CHECK(hexagon_controller_->SetupGraph(*net_def, model_data), "hexagon setup graph error"); if (VLOG_IS_ON(2)) { hexagon_controller_->PrintGraph(); } } else { #endif - MaceStatus status = ws_->LoadModelTensor(*net_def, device_type_); + MaceStatus status = + ws_->LoadModelTensor(*net_def, device_type_, model_data); if (status != MaceStatus::MACE_SUCCESS) { return status; } @@ -260,8 +268,9 @@ MaceEngine::~MaceEngine() = default; MaceStatus MaceEngine::Init(const NetDef *net_def, const std::vector &input_nodes, - const std::vector &output_nodes) { - return impl_->Init(net_def, input_nodes, output_nodes); + const std::vector &output_nodes, + const unsigned char *model_data) { + return impl_->Init(net_def, input_nodes, output_nodes, model_data); } MaceStatus MaceEngine::Run(const std::map &inputs, @@ -275,4 +284,68 @@ MaceStatus MaceEngine::Run(const std::map &inputs, return impl_->Run(inputs, outputs, nullptr); } +const unsigned char *LoadModelData(const std::string &model_data_file, + const size_t &data_size) { + int fd = open(model_data_file.c_str(), O_RDONLY); + MACE_CHECK(fd >= 0, "Failed to open model data file ", + model_data_file, ", error code: ", errno); + + const unsigned char *model_data = static_cast( + mmap(nullptr, data_size, PROT_READ, MAP_PRIVATE, fd, 0)); + MACE_CHECK(model_data != MAP_FAILED, "Failed to map model data file ", + model_data_file, ", error code: ", errno); + + int ret = close(fd); + MACE_CHECK(ret == 0, "Failed to close model data file ", + model_data_file, ", error code: ", errno); + + return model_data; +} + +void UnloadModelData(const unsigned char *model_data, + const size_t &data_size) { + int ret = munmap(const_cast(model_data), + data_size); + MACE_CHECK(ret == 0, "Failed to unmap model data file, error code: ", errno); +} + +MaceStatus CreateMaceEngineFromProto( + const std::vector &model_pb, + const std::string &model_data_file, + const std::vector &input_nodes, + const std::vector &output_nodes, + const DeviceType device_type, + std::shared_ptr *engine) { + LOG(INFO) << "Create MaceEngine from model pb"; + // load model + if (engine == nullptr) { + return MaceStatus::MACE_INVALID_ARGS; + } + + std::shared_ptr net_def(new NetDef()); + net_def->ParseFromArray(&model_pb[0], model_pb.size()); + + index_t model_data_size = 0; + for (auto &const_tensor : net_def->tensors()) { + model_data_size = std::max( + model_data_size, + static_cast(const_tensor.offset() + + const_tensor.data_size() * + GetEnumTypeSize(const_tensor.data_type()))); + } + + MaceStatus status; + const unsigned char *model_data = nullptr; + model_data = LoadModelData(model_data_file, model_data_size); + + engine->reset(new mace::MaceEngine(device_type)); + status = (*engine)->Init( + net_def.get(), input_nodes, output_nodes, model_data); + + if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) { + UnloadModelData(model_data, model_data_size); + } + return status; +} + } // namespace mace diff --git a/mace/core/mace_types.cc b/mace/core/mace_types.cc deleted file mode 100644 index a98b1d5e7a93efaa3b36474ae7e087ac91d6cf43..0000000000000000000000000000000000000000 --- a/mace/core/mace_types.cc +++ /dev/null @@ -1,363 +0,0 @@ -// Copyright 2018 Xiaomi, Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include "mace/public/mace_types.h" -#include "mace/utils/logging.h" - -namespace mace { - -ConstTensor::ConstTensor(const std::string &name, - const unsigned char *data, - const std::vector &dims, - const DataType data_type, - uint32_t node_id) - : name_(name), - data_(data), - data_size_(std::accumulate( - dims.begin(), dims.end(), 1, std::multiplies())), - dims_(dims.begin(), dims.end()), - data_type_(data_type), - node_id_(node_id) {} - -ConstTensor::ConstTensor(const std::string &name, - const unsigned char *data, - const std::vector &dims, - const int data_type, - uint32_t node_id) - : name_(name), - data_(data), - data_size_(std::accumulate( - dims.begin(), dims.end(), 1, std::multiplies())), - dims_(dims.begin(), dims.end()), - data_type_(static_cast(data_type)), - node_id_(node_id) {} - -const std::string &ConstTensor::name() const { return name_; } -const unsigned char *ConstTensor::data() const { return data_; } -int64_t ConstTensor::data_size() const { return data_size_; } -const std::vector &ConstTensor::dims() const { return dims_; } -DataType ConstTensor::data_type() const { return data_type_; } -uint32_t ConstTensor::node_id() const { return node_id_; } - -Argument::Argument() : has_bits_(0) {} - -void Argument::CopyFrom(const Argument &from) { - this->name_ = from.name(); - this->f_ = from.f(); - this->i_ = from.i(); - this->s_ = from.s(); - auto floats = from.floats(); - this->floats_.resize(floats.size()); - std::copy(floats.begin(), floats.end(), this->floats_.begin()); - auto ints = from.ints(); - this->ints_.resize(ints.size()); - std::copy(ints.begin(), ints.end(), this->ints_.begin()); - auto strings = from.floats(); - this->strings_.resize(strings.size()); - std::copy(floats.begin(), floats.end(), this->floats_.begin()); - - this->has_bits_ = from.has_bits_; -} -const std::string &Argument::name() const { return name_; } -void Argument::set_name(const std::string &value) { name_ = value; } -bool Argument::has_f() const { return (has_bits_ & 0x00000001u) != 0; } -void Argument::set_has_f() { has_bits_ |= 0x00000001u; } -float Argument::f() const { return f_; } -void Argument::set_f(float value) { - set_has_f(); - f_ = value; -} -bool Argument::has_i() const { return (has_bits_ & 0x00000002u) != 0; } -void Argument::set_has_i() { has_bits_ |= 0x00000002u; } -int64_t Argument::i() const { return i_; } -void Argument::set_i(int64_t value) { - set_has_i(); - i_ = value; -} -bool Argument::has_s() const { return (has_bits_ & 0x00000004u) != 0; } -void Argument::set_has_s() { has_bits_ |= 0x00000004u; } -std::string Argument::s() const { return s_; } -void Argument::set_s(const std::string &value) { - set_has_s(); - s_ = value; -} -const std::vector &Argument::floats() const { return floats_; } -void Argument::add_floats(float value) { floats_.push_back(value); } -void Argument::set_floats(const std::vector &value) { - floats_.resize(value.size()); - std::copy(value.begin(), value.end(), floats_.begin()); -} -const std::vector &Argument::ints() const { return ints_; } -void Argument::add_ints(int64_t value) { ints_.push_back(value); } -void Argument::set_ints(const std::vector &value) { - ints_.resize(value.size()); - std::copy(value.begin(), value.end(), ints_.begin()); -} -const std::vector &Argument::strings() const { return strings_; } -void Argument::add_strings(const ::std::string &value) { - strings_.push_back(value); -} -void Argument::set_strings(const std::vector &value) { - strings_.resize(value.size()); - std::copy(value.begin(), value.end(), strings_.begin()); -} - -// Node Input -NodeInput::NodeInput(int node_id, int output_port) - : node_id_(node_id), output_port_(output_port) {} -void NodeInput::CopyFrom(const NodeInput &from) { - node_id_ = from.node_id(); - output_port_ = from.output_port(); -} -int NodeInput::node_id() const { return node_id_; } -void NodeInput::set_node_id(int node_id) { node_id_ = node_id; } -int NodeInput::output_port() const { return output_port_; } -void NodeInput::set_output_port(int output_port) { output_port_ = output_port; } - -// OutputShape -OutputShape::OutputShape() {} -OutputShape::OutputShape(const std::vector &dims) - : dims_(dims.begin(), dims.end()) {} -void OutputShape::CopyFrom(const OutputShape &from) { - auto from_dims = from.dims(); - dims_.resize(from_dims.size()); - std::copy(from_dims.begin(), from_dims.end(), dims_.begin()); -} -const std::vector &OutputShape::dims() const { return dims_; } - -// Operator Def -void OperatorDef::CopyFrom(const OperatorDef &from) { - name_ = from.name(); - type_ = from.type(); - - auto from_input = from.input(); - input_.resize(from_input.size()); - std::copy(from_input.begin(), from_input.end(), input_.begin()); - auto from_output = from.output(); - output_.resize(from_output.size()); - std::copy(from_output.begin(), from_output.end(), output_.begin()); - auto from_arg = from.arg(); - arg_.resize(from_arg.size()); - for (size_t i = 0; i < from_arg.size(); ++i) { - arg_[i].CopyFrom(from_arg[i]); - } - auto from_output_shape = from.output_shape(); - output_shape_.resize(from_output_shape.size()); - for (size_t i = 0; i < from_output_shape.size(); ++i) { - output_shape_[i].CopyFrom(from_output_shape[i]); - } - auto from_data_type = from.output_type(); - output_type_.resize(from_data_type.size()); - std::copy(from_data_type.begin(), from_data_type.end(), output_type_.begin()); - - auto mem_ids = from.mem_id(); - mem_id_.resize(mem_ids.size()); - std::copy(mem_ids.begin(), mem_ids.end(), mem_id_.begin()); - - // nnlib - node_id_ = from.node_id(); - op_id_ = from.op_id(); - padding_ = from.padding(); - auto from_node_input = from.node_input(); - node_input_.resize(from_node_input.size()); - for (size_t i = 0; i < from_node_input.size(); ++i) { - node_input_[i].CopyFrom(from_node_input[i]); - } - auto from_out_max_byte_size = from.out_max_byte_size(); - out_max_byte_size_.resize(from_out_max_byte_size.size()); - std::copy(from_out_max_byte_size.begin(), from_out_max_byte_size.end(), - out_max_byte_size_.begin()); - - has_bits_ = from.has_bits_; -} - -const std::string &OperatorDef::name() const { return name_; } -void OperatorDef::set_name(const std::string &name_) { - set_has_name(); - OperatorDef::name_ = name_; -} -bool OperatorDef::has_name() const { return (has_bits_ & 0x00000001u) != 0; } -void OperatorDef::set_has_name() { has_bits_ |= 0x00000001u; } -const std::string &OperatorDef::type() const { return type_; } -void OperatorDef::set_type(const std::string &type_) { - set_has_type(); - OperatorDef::type_ = type_; -} -bool OperatorDef::has_type() const { return (has_bits_ & 0x00000002u) != 0; } -void OperatorDef::set_has_type() { has_bits_ |= 0x00000002u; } -const std::vector &OperatorDef::mem_id() const { return mem_id_; } -void OperatorDef::set_mem_id(const std::vector &value) { - mem_id_.resize(value.size()); - std::copy(value.begin(), value.end(), mem_id_.begin()); -} -uint32_t OperatorDef::node_id() const { return node_id_; } -void OperatorDef::set_node_id(uint32_t node_id) { node_id_ = node_id; } -uint32_t OperatorDef::op_id() const { return op_id_; } -uint32_t OperatorDef::padding() const { return padding_; } -void OperatorDef::set_padding(uint32_t padding) { padding_ = padding; } -const std::vector &OperatorDef::node_input() const { - return node_input_; -} -void OperatorDef::add_node_input(const NodeInput &value) { - node_input_.push_back(value); -} -const std::vector &OperatorDef::out_max_byte_size() const { - return out_max_byte_size_; -} -void OperatorDef::add_out_max_byte_size(int value) { - out_max_byte_size_.push_back(value); -} -const std::vector &OperatorDef::input() const { return input_; } -const std::string &OperatorDef::input(int index) const { - MACE_CHECK(0 <= index && index <= static_cast(input_.size())); - return input_[index]; -} -std::string *OperatorDef::add_input() { - input_.push_back(""); - return &input_.back(); -} -void OperatorDef::add_input(const ::std::string &value) { - input_.push_back(value); -} -void OperatorDef::add_input(::std::string &&value) { input_.push_back(value); } -void OperatorDef::set_input(const std::vector &value) { - input_.resize(value.size()); - std::copy(value.begin(), value.end(), input_.begin()); -} -const std::vector &OperatorDef::output() const { return output_; } -const std::string &OperatorDef::output(int index) const { - MACE_CHECK(0 <= index && index <= static_cast(output_.size())); - return output_[index]; -} -std::string *OperatorDef::add_output() { - output_.push_back(""); - return &output_.back(); -} -void OperatorDef::add_output(const ::std::string &value) { - output_.push_back(value); -} -void OperatorDef::add_output(::std::string &&value) { - output_.push_back(value); -} -void OperatorDef::set_output(const std::vector &value) { - output_.resize(value.size()); - std::copy(value.begin(), value.end(), output_.begin()); -} -const std::vector &OperatorDef::arg() const { return arg_; } -Argument *OperatorDef::add_arg() { - arg_.emplace_back(Argument()); - return &arg_.back(); -} -const std::vector &OperatorDef::output_shape() const { - return output_shape_; -} -void OperatorDef::add_output_shape(const OutputShape &value) { - output_shape_.push_back(value); -} -const std::vector &OperatorDef::output_type() const { - return output_type_; -} -void OperatorDef::set_output_type(const std::vector &value) { - output_type_.resize(value.size()); - std::copy(value.begin(), value.end(), output_type_.begin()); -} - -// MemoryBlock -MemoryBlock::MemoryBlock(int mem_id, uint32_t x, uint32_t y) - : mem_id_(mem_id), x_(x), y_(y) {} - -int MemoryBlock::mem_id() const { return mem_id_; } -uint32_t MemoryBlock::x() const { return x_; } -uint32_t MemoryBlock::y() const { return y_; } - -// MemoryArena -const std::vector &MemoryArena::mem_block() const { - return mem_block_; -} -std::vector &MemoryArena::mutable_mem_block() { - return mem_block_; -} -int MemoryArena::mem_block_size() const { return mem_block_.size(); } - -// InputInfo -const std::string &InputInfo::name() const { return name_; } -int32_t InputInfo::node_id() const { return node_id_; } -int32_t InputInfo::max_byte_size() const { return max_byte_size_; } -DataType InputInfo::data_type() const { return data_type_; } -const std::vector &InputInfo::dims() const { return dims_; } - -// OutputInfo -const std::string &OutputInfo::name() const { return name_; } -int32_t OutputInfo::node_id() const { return node_id_; } -int32_t OutputInfo::max_byte_size() const { return max_byte_size_; } -DataType OutputInfo::data_type() const { return data_type_; } -void OutputInfo::set_data_type(DataType data_type) { data_type_ = data_type; } -const std::vector &OutputInfo::dims() const { return dims_; } -void OutputInfo::set_dims(const std::vector &dims) { dims_ = dims; } - -// NetDef -NetDef::NetDef() : has_bits_(0) {} - -const std::string &NetDef::name() const { return name_; } -void NetDef::set_name(const std::string &value) { - set_has_name(); - name_ = value; -} -bool NetDef::has_name() const { return (has_bits_ & 0x00000001u) != 0; } -void NetDef::set_has_name() { has_bits_ |= 0x00000001u; } -const std::string &NetDef::version() const { return version_; } -void NetDef::set_version(const std::string &value) { - set_has_version(); - version_ = value; -} -bool NetDef::has_version() const { return (has_bits_ & 0x00000002u) != 0; } -void NetDef::set_has_version() { has_bits_ |= 0x00000002u; } -const std::vector &NetDef::op() const { return op_; } -OperatorDef *NetDef::add_op() { - op_.emplace_back(OperatorDef()); - return &op_.back(); -} -std::vector &NetDef::mutable_op() { return op_; } -const std::vector &NetDef::arg() const { return arg_; } -Argument *NetDef::add_arg() { - arg_.emplace_back(Argument()); - return &arg_.back(); -} -std::vector &NetDef::mutable_arg() { return arg_; } -const std::vector &NetDef::tensors() const { return tensors_; } -std::vector &NetDef::mutable_tensors() { return tensors_; } -const MemoryArena &NetDef::mem_arena() const { return mem_arena_; } -MemoryArena &NetDef::mutable_mem_arena() { - set_has_mem_arena(); - return mem_arena_; -} -bool NetDef::has_mem_arena() const { return (has_bits_ & 0x00000004u) != 0; } -void NetDef::set_has_mem_arena() { has_bits_ |= 0x00000004u; } -const std::vector &NetDef::input_info() const { return input_info_; } -const std::vector &NetDef::output_info() const { - return output_info_; -} -std::vector &NetDef::mutable_output_info() { return output_info_; } - -int NetDef::op_size() const { return op_.size(); } - -const OperatorDef &NetDef::op(const int idx) const { - MACE_CHECK(0 <= idx && idx < op_size()); - return op_[idx]; -} - -}; // namespace mace diff --git a/mace/core/net.cc b/mace/core/net.cc index ea5ea504b6019fc35e930d69addcd5af502c76d0..87d9db7af3ecbfd7db5b5b34657990c5aa9cf91c 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -110,7 +110,8 @@ bool SerialNet::Run(RunMetadata *run_metadata) { } OperatorStats op_stats = {op->debug_def().name(), op->debug_def().type(), - op->debug_def().output_shape(), + {op->debug_def().output_shape().begin(), + op->debug_def().output_shape().end()}, {strides, padding_type, paddings, dilations, kernels}, call_stats}; run_metadata->op_stats.emplace_back(op_stats); diff --git a/mace/core/operator.h b/mace/core/operator.h index 037aa1e0d0873a0c6de896c501f7ea8cc6f6f79d..11b1f88981a35ad1b9d0233b21b5554c3940a61f 100644 --- a/mace/core/operator.h +++ b/mace/core/operator.h @@ -25,8 +25,8 @@ #include "mace/core/registry.h" #include "mace/core/tensor.h" #include "mace/core/workspace.h" +#include "mace/proto/mace.pb.h" #include "mace/public/mace.h" -#include "mace/public/mace_types.h" namespace mace { @@ -108,20 +108,20 @@ class Operator : public OperatorBase { inputs_.push_back(tensor); } - for (size_t i = 0; i < operator_def.output().size(); ++i) { - const std::string output_str = operator_def.output()[i]; + for (int i = 0; i < operator_def.output_size(); ++i) { + const std::string output_str = operator_def.output(i); if (ws->HasTensor(output_str)) { outputs_.push_back(ws->GetTensor(output_str)); } else { MACE_CHECK( - operator_def.output_type().size() == 0 - || operator_def.output().size() == operator_def.output_type().size(), + operator_def.output_type_size() == 0 + || operator_def.output_size() == operator_def.output_type_size(), "operator output size != operator output type size", - operator_def.output().size(), - operator_def.output_type().size()); + operator_def.output_size(), + operator_def.output_type_size()); DataType output_type; - if (i < operator_def.output_type().size()) { - output_type = operator_def.output_type()[i]; + if (i < operator_def.output_type_size()) { + output_type = operator_def.output_type(i); } else { output_type = DataTypeToEnum::v(); } diff --git a/mace/core/runtime/cpu/cpu_runtime.cc b/mace/core/runtime/cpu/cpu_runtime.cc index 23de679509bc1ea72ac7fab721bf76d8105c53ec..1a070c8fb1bce26343ec2c202d782803b0e2ad64 100644 --- a/mace/core/runtime/cpu/cpu_runtime.cc +++ b/mace/core/runtime/cpu/cpu_runtime.cc @@ -27,6 +27,7 @@ #include #include +#include "mace/core/macros.h" #include "mace/public/mace.h" #include "mace/public/mace_runtime.h" #include "mace/utils/logging.h" @@ -135,6 +136,7 @@ void SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads, << ", CPU core IDs: " << MakeString(cpu_ids); omp_set_num_threads(omp_num_threads); #else + MACE_UNUSED(omp_num_threads); LOG(WARNING) << "Set OpenMP threads number failed: OpenMP not enabled."; #endif diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc index e2ba59433857b1b37aac03a9909678d2efff1ea0..7293a0aa42c5079f466a24343ed0c03ae76bd020 100644 --- a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc +++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc @@ -68,7 +68,8 @@ bool HexagonControlWrapper::Finalize() { return hexagon_controller_DeInitHexagon() == 0; } -bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) { +bool HexagonControlWrapper::SetupGraph(const NetDef &net_def, + unsigned const char *model_data) { LOG(INFO) << "Hexagon setup graph"; int64_t t0 = NowMicros(); @@ -96,7 +97,7 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) { const_node.tensor.dataLen = 0; } else { const_node.tensor.data = - const_cast(const_tensor.data()); + const_cast(model_data + const_tensor.offset()); const_node.tensor.dataLen = const_tensor.data_size() * GetEnumTypeSize(const_tensor.data_type()); } @@ -133,12 +134,12 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) { for (const OperatorDef &op : net_def.op()) { int op_id = op_map.GetOpId(op.type()); inputs.resize(op.node_input().size()); - for (size_t i = 0; i < op.node_input().size(); ++i) { + for (int i = 0; i < op.node_input().size(); ++i) { inputs[i].src_id = node_id(op.node_input()[i].node_id()); inputs[i].output_idx = op.node_input()[i].output_port(); } outputs.resize(op.out_max_byte_size().size()); - for (size_t i = 0; i < op.out_max_byte_size().size(); ++i) { + for (int i = 0; i < op.out_max_byte_size().size(); ++i) { outputs[i].max_size = op.out_max_byte_size()[i]; } cached_inputs.push_back(inputs); diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.h b/mace/core/runtime/hexagon/hexagon_control_wrapper.h index 8e8eb53234c916b9cb6dd2250e559625bb6d34ed..cfa6661db6d0266c7067af97e0bdafd4bbc3eec8 100644 --- a/mace/core/runtime/hexagon/hexagon_control_wrapper.h +++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.h @@ -31,7 +31,7 @@ class HexagonControlWrapper { bool Config(); bool Init(); bool Finalize(); - bool SetupGraph(const NetDef &net_def); + bool SetupGraph(const NetDef &net_def, const unsigned char *model_data); bool ExecuteGraph(const Tensor &input_tensor, Tensor *output_tensor); bool ExecuteGraphNew(const std::vector &input_tensors, std::vector *output_tensors); diff --git a/mace/core/types.h b/mace/core/types.h index c07c48297f046bb1f3624c36c523ab7c7797353b..3a7236810130db464f5b1736009f130c96fe1f48 100644 --- a/mace/core/types.h +++ b/mace/core/types.h @@ -18,7 +18,7 @@ #include #include -#include "mace/public/mace_types.h" +#include "mace/proto/mace.pb.h" #ifdef MACE_ENABLE_OPENCL #include "include/half.hpp" #endif diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index 73c2f8ea7b6479613ece12055e31aab882080bb7..545ace635da255256d189b709c5fd9b68545b33b 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -60,24 +60,17 @@ std::vector Workspace::Tensors() const { return names; } -MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { +MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, + DeviceType type, + const unsigned char *model_data) { MACE_LATENCY_LOGGER(1, "Load model tensors"); index_t model_data_size = 0; - unsigned char *model_data_ptr = nullptr; - for (auto &const_tensor : net_def.tensors()) { - if (model_data_ptr == nullptr || - reinterpret_cast(const_tensor.data()) < - reinterpret_cast(model_data_ptr)) { - model_data_ptr = const_cast(const_tensor.data()); - } - } for (auto &const_tensor : net_def.tensors()) { model_data_size = std::max( model_data_size, - static_cast((reinterpret_cast(const_tensor.data()) - - reinterpret_cast(model_data_ptr)) + + static_cast(const_tensor.offset() + const_tensor.data_size() * - GetEnumTypeSize(const_tensor.data_type()))); + GetEnumTypeSize(const_tensor.data_type()))); } VLOG(3) << "Model data size: " << model_data_size; @@ -85,7 +78,7 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { if (type == DeviceType::CPU) { tensor_buffer_ = std::unique_ptr( new Buffer(GetDeviceAllocator(type), - model_data_ptr, + const_cast(model_data), model_data_size)); } else { tensor_buffer_ = std::unique_ptr( @@ -95,7 +88,8 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { return status; } tensor_buffer_->Map(nullptr); - tensor_buffer_->Copy(model_data_ptr, 0, model_data_size); + tensor_buffer_->Copy(const_cast(model_data), + 0, model_data_size); tensor_buffer_->UnMap(); } } @@ -111,10 +105,8 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { dims.push_back(d); } - index_t offset = reinterpret_cast(const_tensor.data()) - - reinterpret_cast(model_data_ptr); std::unique_ptr tensor( - new Tensor(BufferSlice(tensor_buffer_.get(), offset, + new Tensor(BufferSlice(tensor_buffer_.get(), const_tensor.offset(), const_tensor.data_size() * GetEnumTypeSize(const_tensor.data_type())), const_tensor.data_type())); diff --git a/mace/core/workspace.h b/mace/core/workspace.h index 06236fb987fed16ba42fb3a3ce885d2ccf7dbf72..7399562dd63ad86b7b5b835c3b1eb2f9cda36d71 100644 --- a/mace/core/workspace.h +++ b/mace/core/workspace.h @@ -47,7 +47,9 @@ class Workspace { std::vector Tensors() const; - MaceStatus LoadModelTensor(const NetDef &net_def, DeviceType type); + MaceStatus LoadModelTensor(const NetDef &net_def, + DeviceType type, + const unsigned char *model_data); ScratchBuffer *GetScratchBuffer(DeviceType device_type); diff --git a/mace/examples/BUILD b/mace/examples/BUILD index 0355575aed874fd7da507ef023a27a7de1e7429b..3f2fc38da2437948acd29b1702691a780945e52a 100644 --- a/mace/examples/BUILD +++ b/mace/examples/BUILD @@ -6,6 +6,7 @@ cc_binary( srcs = ["example.cc"], linkopts = if_openmp_enabled(["-fopenmp"]), linkstatic = 1, + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], deps = [ "//external:gflags_nothreads", "//mace/codegen:generated_models", diff --git a/mace/examples/example.cc b/mace/examples/example.cc index d1beceb4b3f4f4c819252c519d9b83a57a9d62f7..c54f3c504af23413488a648a0f08534bdf3f32d4 100644 --- a/mace/examples/example.cc +++ b/mace/examples/example.cc @@ -120,6 +120,9 @@ DEFINE_string(output_file, DEFINE_string(model_data_file, "", "model data file name, used when EMBED_MODEL_DATA set to 0"); +DEFINE_string(model_file, + "", + "model file name, used when load mace model in pb"); DEFINE_string(device, "GPU", "CPU/GPU/HEXAGON"); DEFINE_int32(round, 1, "round"); DEFINE_int32(restart_round, 1, "restart round"); @@ -163,23 +166,31 @@ bool RunModel(const std::vector &input_names, std::shared_ptr engine; MaceStatus create_engine_status; // Create Engine - if (FLAGS_model_data_file.empty()) { + MaceStatus create_engine_status; + // Create Engine + int64_t t0 = NowMicros(); + if (FLAGS_model_file != "") { + std::vector model_pb_data; + if (!mace::ReadBinaryFile(&model_pb_data, FLAGS_model_file)) { + LOG(FATAL) << "Failed to read file: " << FLAGS_model_file; + } create_engine_status = - CreateMaceEngine(FLAGS_model_name.c_str(), - nullptr, - input_names, - output_names, - device_type, - &engine); + CreateMaceEngineFromProto(model_pb_data, + FLAGS_model_data_file, + input_names, + output_names, + device_type, + &engine); } else { create_engine_status = - CreateMaceEngine(FLAGS_model_name.c_str(), - FLAGS_model_data_file.c_str(), - input_names, - output_names, - device_type, - &engine); + CreateMaceEngineFromCode(model_name, + FLAGS_model_data_file, + input_names, + output_names, + device_type, + &engine); } + if (create_engine_status != MaceStatus::MACE_SUCCESS) { LOG(FATAL) << "Create engine error, please check the arguments"; } @@ -258,6 +269,7 @@ int Main(int argc, char **argv) { LOG(INFO) << "input_file: " << FLAGS_input_file; LOG(INFO) << "output_file: " << FLAGS_output_file; LOG(INFO) << "model_data_file: " << FLAGS_model_data_file; + LOG(INFO) << "model_file: " << FLAGS_model_file; LOG(INFO) << "device: " << FLAGS_device; LOG(INFO) << "round: " << FLAGS_round; LOG(INFO) << "restart_round: " << FLAGS_restart_round; diff --git a/mace/kernels/BUILD b/mace/kernels/BUILD index c7f144b02ddb395097e73ae5436348872b65c032..3e837d85fd176975150ec32df48a607ad8d0075a 100644 --- a/mace/kernels/BUILD +++ b/mace/kernels/BUILD @@ -38,7 +38,8 @@ cc_library( "opencl/*.h", "buffer_to_image.h", ])), - copts = if_openmp_enabled(["-fopenmp"]) + + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"] + + if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon"]) + if_android_armv7(["-mfloat-abi=softfp"]) + @@ -61,7 +62,8 @@ cc_test( "opencl/*_test.cc", ], ), - copts = if_openmp_enabled(["-fopenmp"]) + + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"] + + if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon"]) + if_android_armv7(["-mfloat-abi=softfp"]) + diff --git a/mace/ops/BUILD b/mace/ops/BUILD index a03b9d178fa8c7543003f70d60ee236fc0eb663e..c925ff134ca63772c7a549055900767bd9750878 100644 --- a/mace/ops/BUILD +++ b/mace/ops/BUILD @@ -15,6 +15,7 @@ cc_library( hdrs = [ "ops_test_util.h", ], + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], deps = [ "//mace/core", "@gtest//:gtest", @@ -40,7 +41,8 @@ cc_library( ["*.h"], exclude = ["ops_test_util.h"], ), - copts = if_openmp_enabled(["-fopenmp"]) + + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"] + + if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon"]) + if_android_armv7(["-mfloat-abi=softfp"]) + @@ -58,7 +60,8 @@ cc_test( srcs = glob( ["*_test.cc"], ), - copts = if_openmp_enabled(["-fopenmp"]) + + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"] + + if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon"]) + if_android_armv7(["-mfloat-abi=softfp"]) + @@ -77,7 +80,8 @@ cc_test( name = "ops_benchmark", testonly = 1, srcs = glob(["*_benchmark.cc"]), - copts = if_openmp_enabled(["-fopenmp"]) + + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"] + + if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon"]) + if_android_armv7(["-mfloat-abi=softfp"]) + diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index 0f46e7a0d5c683564ccae806ad7d218c8592bc93..733673a7273dae6cc19f308e8561ef5c5fd16201 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -53,7 +53,9 @@ class OpDefBuilder { } OpDefBuilder &OutputType(const std::vector &output_type) { - op_def_.set_output_type(output_type); + for (auto out_t : output_type) { + op_def_.add_output_type(out_t); + } return *this; } diff --git a/mace/proto/BUILD b/mace/proto/BUILD index 5222b06bda6e1681b15ac7f60317376c5d34fa3d..c3004a4fd240416a7af4f2e13f8d8e64c115052d 100644 --- a/mace/proto/BUILD +++ b/mace/proto/BUILD @@ -8,7 +8,9 @@ package( licenses(["notice"]) # Apache 2.0 -load("@com_google_protobuf//:protobuf.bzl", "py_proto_library") +load("@com_google_protobuf//:protobuf.bzl", + "py_proto_library", + "cc_proto_library") py_proto_library( name = "mace_py", @@ -19,11 +21,9 @@ py_proto_library( deps = ["@com_google_protobuf//:protobuf_python"], ) -py_proto_library( - name = "caffe_py", - srcs = ["caffe.proto"], - default_runtime = "@com_google_protobuf//:protobuf_python", +cc_proto_library( + name = "mace_cc", + srcs = ["mace.proto"], + default_runtime = "@com_google_protobuf//:protobuf_lite", protoc = "@com_google_protobuf//:protoc", - srcs_version = "PY2AND3", - deps = ["@com_google_protobuf//:protobuf_python"], ) diff --git a/mace/proto/mace.proto b/mace/proto/mace.proto index a54fa45c1d89f4b005cdd7671a0e8c62ee0157b2..393067dd6cc7d8e6bcc90d993fd8784b277781f9 100644 --- a/mace/proto/mace.proto +++ b/mace/proto/mace.proto @@ -2,17 +2,13 @@ syntax = "proto2"; package mace; +option optimize_for = LITE_RUNTIME; + enum NetMode { INIT = 0; NORMAL = 1; } -enum DeviceType { - CPU = 0; // In default, we will use CPU. - GPU = 2; - HEXAGON = 3; -} - enum DataType { DT_INVALID = 0; @@ -32,7 +28,7 @@ enum DataType { DT_UINT32 = 22; } -message TensorProto { +message ConstTensor { // The dimensions in the tensor. repeated int64 dims = 1; optional DataType data_type = 2 [default = DT_FLOAT]; @@ -52,6 +48,8 @@ message TensorProto { repeated int64 int64_data = 10 [packed = true]; // Optionally, a name for the tensor. optional string name = 7; + optional int64 offset = 11; + optional int64 data_size = 12; optional uint32 node_id = 100; } @@ -126,7 +124,7 @@ message NetDef { repeated OperatorDef op = 2; optional string version = 3; repeated Argument arg = 4; - repeated TensorProto tensors = 5; + repeated ConstTensor tensors = 5; // for mem optimization optional MemoryArena mem_arena = 10; diff --git a/mace/public/BUILD b/mace/public/BUILD index 41c709878728c120daf7f2c82954f04b4a7e1809..3669d59518f3b89484626d1f023195f58395b924 100644 --- a/mace/public/BUILD +++ b/mace/public/BUILD @@ -12,6 +12,6 @@ cc_library( hdrs = [ "mace.h", "mace_runtime.h", - "mace_types.h", ], + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], ) diff --git a/mace/public/mace.h b/mace/public/mace.h index 1bc8c36a3fc0eced343ac66f26ff5eee49ab82dd..2ebf72a0cf9ffe1147d710600808ed5458e22bd2 100644 --- a/mace/public/mace.h +++ b/mace/public/mace.h @@ -26,10 +26,39 @@ namespace mace { -const char *MaceVersion(); +class OutputShape; +class NetDef; enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3 }; +struct CallStats { + int64_t start_micros; + int64_t end_micros; +}; + +struct ConvPoolArgs { + std::vector strides; + int padding_type; + std::vector paddings; + std::vector dilations; + std::vector kernels; +}; + +struct OperatorStats { + std::string operator_name; + std::string type; + std::vector output_shape; + ConvPoolArgs args; + CallStats stats; +}; + +class RunMetadata { + public: + std::vector op_stats; +}; + +const char *MaceVersion(); + enum MaceStatus { MACE_SUCCESS = 0, MACE_INVALID_ARGS = 1, @@ -60,9 +89,6 @@ class MaceTensor { std::unique_ptr impl_; }; -class NetDef; -class RunMetadata; - class MaceEngine { public: explicit MaceEngine(DeviceType device_type); @@ -70,7 +96,8 @@ class MaceEngine { MaceStatus Init(const NetDef *net_def, const std::vector &input_nodes, - const std::vector &output_nodes); + const std::vector &output_nodes, + const unsigned char *model_data); MaceStatus Run(const std::map &inputs, std::map *outputs); @@ -87,6 +114,14 @@ class MaceEngine { MaceEngine &operator=(const MaceEngine &) = delete; }; +MaceStatus CreateMaceEngineFromProto( + const std::vector &model_pb, + const std::string &model_data_file, + const std::vector &input_nodes, + const std::vector &output_nodes, + const DeviceType device_type, + std::shared_ptr *engine); + } // namespace mace #endif // MACE_PUBLIC_MACE_H_ diff --git a/mace/public/mace_types.h b/mace/public/mace_types.h deleted file mode 100644 index 48da1dd3bc6f9b063a09465d97ec461676882b0e..0000000000000000000000000000000000000000 --- a/mace/public/mace_types.h +++ /dev/null @@ -1,362 +0,0 @@ -// Copyright 2018 Xiaomi, Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// This file defines data types used by net creation and benchmark tools. -// These APIs are not stable and should only be used by advanced users. - -#ifndef MACE_PUBLIC_MACE_TYPES_H_ -#define MACE_PUBLIC_MACE_TYPES_H_ - -#include -#include - -namespace mace { - -// Disable the copy and assignment operator for a class. -#ifndef DISABLE_COPY_AND_ASSIGN -#define DISABLE_COPY_AND_ASSIGN(classname) \ - private: \ - classname(const classname &) = delete; \ - classname &operator=(const classname &) = delete -#endif - -enum NetMode { INIT = 0, NORMAL = 1 }; - -enum DataType { - DT_INVALID = 0, - DT_FLOAT = 1, - DT_DOUBLE = 2, - DT_INT32 = 3, - DT_UINT8 = 4, - DT_INT16 = 5, - DT_INT8 = 6, - DT_STRING = 7, - DT_INT64 = 8, - DT_UINT16 = 9, - DT_BOOL = 10, - DT_HALF = 19, - DT_UINT32 = 22 -}; - -class ConstTensor { - public: - ConstTensor(const std::string &name, - const unsigned char *data, - const std::vector &dims, - const DataType data_type = DT_FLOAT, - uint32_t node_id = 0); - ConstTensor(const std::string &name, - const unsigned char *data, - const std::vector &dims, - const int data_type, - uint32_t node_id = 0); - - const std::string &name() const; - const unsigned char *data() const; - int64_t data_size() const; - const std::vector &dims() const; - DataType data_type() const; - uint32_t node_id() const; - - private: - const std::string name_; - const unsigned char *data_; - const int64_t data_size_; - const std::vector dims_; - const DataType data_type_; - const uint32_t node_id_; -}; - -class Argument { - public: - Argument(); - void CopyFrom(const Argument &from); - - public: - const std::string &name() const; - void set_name(const std::string &value); - bool has_f() const; - float f() const; - void set_f(float value); - bool has_i() const; - int64_t i() const; - void set_i(int64_t value); - bool has_s() const; - std::string s() const; - void set_s(const std::string &value); - const std::vector &floats() const; - void add_floats(float value); - void set_floats(const std::vector &value); - const std::vector &ints() const; - void add_ints(int64_t value); - void set_ints(const std::vector &value); - const std::vector &strings() const; - void add_strings(const ::std::string &value); - void set_strings(const std::vector &value); - - private: - void set_has_f(); - void set_has_i(); - void set_has_s(); - - private: - std::string name_; - float f_; - int64_t i_; - std::string s_; - std::vector floats_; - std::vector ints_; - std::vector strings_; - uint32_t has_bits_; -}; - -class NodeInput { - public: - NodeInput() {} - NodeInput(int node_id, int output_port); - void CopyFrom(const NodeInput &from); - - public: - int node_id() const; - void set_node_id(int node_id); - int output_port() const; - void set_output_port(int output_port); - - private: - int node_id_; - int output_port_; -}; - -class OutputShape { - public: - OutputShape(); - OutputShape(const std::vector &dims); // NOLINT(runtime/explicit) - void CopyFrom(const OutputShape &from); - - public: - const std::vector &dims() const; - - private: - std::vector dims_; -}; - -class OperatorDef { - public: - void CopyFrom(const OperatorDef &from); - - public: - const std::string &name() const; - void set_name(const std::string &name_); - bool has_name() const; - const std::string &type() const; - void set_type(const std::string &type_); - bool has_type() const; - const std::vector &mem_id() const; - void set_mem_id(const std::vector &value); - uint32_t node_id() const; - void set_node_id(uint32_t node_id); - uint32_t op_id() const; - uint32_t padding() const; - void set_padding(uint32_t padding); - const std::vector &node_input() const; - void add_node_input(const NodeInput &value); - const std::vector &out_max_byte_size() const; - void add_out_max_byte_size(int value); - const std::vector &input() const; - const std::string &input(int index) const; - std::string *add_input(); - void add_input(const ::std::string &value); - void add_input(::std::string &&value); - void set_input(const std::vector &value); - const std::vector &output() const; - const std::string &output(int index) const; - std::string *add_output(); - void add_output(const ::std::string &value); - void add_output(::std::string &&value); - void set_output(const std::vector &value); - const std::vector &arg() const; - Argument *add_arg(); - const std::vector &output_shape() const; - void add_output_shape(const OutputShape &value); - const std::vector &output_type() const; - void set_output_type(const std::vector &value); - - private: - void set_has_name(); - void set_has_type(); - void set_has_mem_id(); - - private: - std::string name_; - std::string type_; - - std::vector input_; - std::vector output_; - std::vector arg_; - std::vector output_shape_; - std::vector output_type_; - - std::vector mem_id_; - - // nnlib - uint32_t node_id_; - uint32_t op_id_; - uint32_t padding_; - std::vector node_input_; - std::vector out_max_byte_size_; - - uint32_t has_bits_; -}; - -class MemoryBlock { - public: - MemoryBlock(int mem_id, uint32_t x, uint32_t y); - - public: - int mem_id() const; - uint32_t x() const; - uint32_t y() const; - - private: - int mem_id_; - uint32_t x_; - uint32_t y_; -}; - -class MemoryArena { - public: - const std::vector &mem_block() const; - std::vector &mutable_mem_block(); - int mem_block_size() const; - - private: - std::vector mem_block_; -}; - -// for hexagon mace-nnlib -class InputInfo { - public: - const std::string &name() const; - int32_t node_id() const; - int32_t max_byte_size() const; - DataType data_type() const; - const std::vector &dims() const; - - private: - std::string name_; - int32_t node_id_; - int32_t max_byte_size_; // only support 32-bit len - DataType data_type_; - std::vector dims_; -}; - -class OutputInfo { - public: - const std::string &name() const; - int32_t node_id() const; - int32_t max_byte_size() const; - DataType data_type() const; - void set_data_type(DataType data_type); - const std::vector &dims() const; - void set_dims(const std::vector &dims); - - private: - std::string name_; - int32_t node_id_; - int32_t max_byte_size_; // only support 32-bit len - DataType data_type_; - std::vector dims_; -}; - -class NetDef { - public: - NetDef(); - int op_size() const; - - const OperatorDef &op(const int idx) const; - - public: - const std::string &name() const; - bool has_name() const; - void set_name(const std::string &value); - const std::string &version() const; - bool has_version() const; - void set_version(const std::string &value); - - const std::vector &op() const; - OperatorDef *add_op(); - std::vector &mutable_op(); - const std::vector &arg() const; - Argument *add_arg(); - std::vector &mutable_arg(); - const std::vector &tensors() const; - std::vector &mutable_tensors(); - const MemoryArena &mem_arena() const; - bool has_mem_arena() const; - MemoryArena &mutable_mem_arena(); - const std::vector &input_info() const; - const std::vector &output_info() const; - std::vector &mutable_output_info(); - - private: - void set_has_name(); - void set_has_version(); - void set_has_mem_arena(); - - private: - std::string name_; - std::string version_; - std::vector op_; - std::vector arg_; - std::vector tensors_; - - // for mem optimization - MemoryArena mem_arena_; - - // for hexagon mace-nnlib - std::vector input_info_; - std::vector output_info_; - - uint32_t has_bits_; -}; - -struct CallStats { - int64_t start_micros; - int64_t end_micros; -}; - -struct ConvPoolArgs { - std::vector strides; - int padding_type; - std::vector paddings; - std::vector dilations; - std::vector kernels; -}; - -struct OperatorStats { - std::string operator_name; - std::string type; - std::vector output_shape; - ConvPoolArgs args; - CallStats stats; -}; - -class RunMetadata { - public: - std::vector op_stats; -}; - -} // namespace mace - -#endif // MACE_PUBLIC_MACE_TYPES_H_ diff --git a/mace/python/tools/BUILD b/mace/python/tools/BUILD index e903d817033b05287d82d7fec4ec2d8868c1dfbb..b466ac729d9944cc6e19d88147dea8a36257e0b1 100644 --- a/mace/python/tools/BUILD +++ b/mace/python/tools/BUILD @@ -3,6 +3,7 @@ py_library( srcs = [ "convert_util.py", "graph_util.py", + "tensor_util.py", "tf_dsp_converter_lib.py", "converter_tool/base_converter.py", "converter_tool/shape_inference.py", diff --git a/mace/python/tools/converter.py b/mace/python/tools/converter.py index a38bd8cc6bc109333c3c56c12e9ba4d4eca69d13..164b54e45595891752a6302a8883b9271e2bc7aa 100644 --- a/mace/python/tools/converter.py +++ b/mace/python/tools/converter.py @@ -22,6 +22,7 @@ from mace.proto import mace_pb2 from mace.python.tools import tf_dsp_converter_lib from mace.python.tools import memory_optimizer from mace.python.tools import source_converter_lib +from mace.python.tools import tensor_util from mace.python.tools.converter_tool import base_converter as cvt from mace.python.tools.converter_tool import tensorflow_converter from mace.python.tools.converter_tool import caffe_converter @@ -36,9 +37,9 @@ from mace.python.tools.convert_util import mace_check FLAGS = None -device_type_map = {'cpu': mace_pb2.CPU, - 'gpu': mace_pb2.GPU, - 'dsp': mace_pb2.HEXAGON} +device_type_map = {'cpu': cvt.DeviceType.CPU.value, + 'gpu': cvt.DeviceType.GPU.value, + 'dsp': cvt.DeviceType.HEXAGON.value} def file_checksum(fname): @@ -129,16 +130,16 @@ def main(unused_args): else: gpu_data_type = mace_pb2.DT_FLOAT device_data_type_map = { - mace_pb2.CPU: mace_pb2.DT_FLOAT, - mace_pb2.GPU: gpu_data_type, - mace_pb2.HEXAGON: mace_pb2.DT_UINT8 + cvt.DeviceType.CPU.value: mace_pb2.DT_FLOAT, + cvt.DeviceType.GPU.value: gpu_data_type, + cvt.DeviceType.HEXAGON.value: mace_pb2.DT_UINT8 } print("Transform model to one that can better run on device") if not FLAGS.runtime: cpu_graph_def = copy.deepcopy(output_graph_def) - option.device = mace_pb2.CPU - option.data_type = device_data_type_map[mace_pb2.CPU] + option.device = cvt.DeviceType.CPU.value + option.data_type = device_data_type_map[cvt.DeviceType.CPU.value] option.disable_transpose_filters() mace_cpu_transformer = transformer.Transformer( option, cpu_graph_def) @@ -147,8 +148,8 @@ def main(unused_args): memory_optimizer.optimize_cpu_memory(cpu_graph_def) print "CPU memory optimization done." - option.device = mace_pb2.GPU - option.data_type = device_data_type_map[mace_pb2.GPU] + option.device = cvt.DeviceType.GPU.value + option.data_type = device_data_type_map[cvt.DeviceType.GPU.value] option.enable_transpose_filters() mace_gpu_transformer = transformer.Transformer( option, output_graph_def) @@ -179,18 +180,35 @@ def main(unused_args): print "Memory optimization done." - if FLAGS.output_type == 'source': - source_converter_lib.convert_to_source( - output_graph_def, model_checksum, weight_checksum, FLAGS.template, - FLAGS.obfuscate, FLAGS.model_tag, FLAGS.output, FLAGS.runtime, - FLAGS.embed_model_data, FLAGS.winograd, - FLAGS.gpu_data_type) + if FLAGS.obfuscate: + tensor_util.obfuscate_name(output_graph_def) else: - with open(FLAGS.output, "wb") as f: + tensor_util.rename_tensor(output_graph_def) + + tensor_infos, model_data = tensor_util.get_tensor_info_and_model_data( + output_graph_def, FLAGS.runtime, FLAGS.gpu_data_type) + + source_converter_lib.convert_to_source( + output_graph_def, model_checksum, weight_checksum, FLAGS.template, + FLAGS.obfuscate, FLAGS.model_tag, FLAGS.codegen_output, + FLAGS.runtime, FLAGS.embed_model_data, FLAGS.winograd, + FLAGS.model_load_type, tensor_infos, model_data) + + if not FLAGS.embed_model_data: + output_dir = os.path.dirname(FLAGS.codegen_output) + '/' + with open(output_dir + FLAGS.model_tag + '.data', "wb") as f: + f.write(bytearray(model_data)) + + if FLAGS.model_load_type == 'pb': + tensor_util.del_tensor_data( + output_graph_def, FLAGS.runtime, FLAGS.gpu_data_type) + tensor_util.update_tensor_data_type( + output_graph_def, FLAGS.runtime, FLAGS.gpu_data_type) + with open(FLAGS.pb_output, "wb") as f: f.write(output_graph_def.SerializeToString()) - with open(FLAGS.output + '_txt', "wb") as f: - # output_graph_def.ClearField('tensors') - f.write(str(output_graph_def)) + # with open(FLAGS.pb_output + '_txt', "wb") as f: + # # output_graph_def.ClearField('tensors') + # f.write(str(output_graph_def)) print("Model conversion is completed.") @@ -226,10 +244,15 @@ def parse_args(): default="", help="Weight file sha256 checksum") parser.add_argument( - "--output", + "--codegen_output", type=str, default="", help="File to save the output graph to.") + parser.add_argument( + "--pb_output", + type=str, + default="", + help="File to save the mace model to.") parser.add_argument( "--runtime", type=str, default="", help="Runtime: cpu/gpu/dsp") parser.add_argument( @@ -239,8 +262,6 @@ def parse_args(): help="e.g., input_node") parser.add_argument( "--output_node", type=str, default="softmax", help="e.g., softmax") - parser.add_argument( - "--output_type", type=str, default="pb", help="output type: source/pb") parser.add_argument( "--template", type=str, default="", help="template path") parser.add_argument( @@ -273,6 +294,12 @@ def parse_args(): type=str2bool, default=True, help="embed model data.") + parser.add_argument( + "--model_load_type", + type=str, + default="source", + help="[source|pb] Load models in generated `source` code" + + "or `pb` file.") parser.add_argument( "--gpu_data_type", type=str, default="half", help="half/float") return parser.parse_known_args() diff --git a/mace/python/tools/converter_tool/base_converter.py b/mace/python/tools/converter_tool/base_converter.py index 1601ff53538338c91f0f51873aa71a9dce463b4f..be6e67529166d6678df326af14c369124fff7e08 100644 --- a/mace/python/tools/converter_tool/base_converter.py +++ b/mace/python/tools/converter_tool/base_converter.py @@ -18,6 +18,12 @@ from enum import Enum from mace.proto import mace_pb2 +class DeviceType(Enum): + CPU = 0 + GPU = 2 + HEXAGON = 3 + + class DataFormat(Enum): NHWC = 0 NCHW = 1 @@ -199,7 +205,7 @@ class ConverterOption(object): self._input_nodes = {} self._output_nodes = {} self._data_type = mace_pb2.DT_FLOAT - self._device = mace_pb2.CPU + self._device = DeviceType.CPU.value self._winograd_enabled = False self._transformer_option = [ TransformerRule.REMOVE_USELESS_RESHAPE_OP, diff --git a/mace/python/tools/converter_tool/transformer.py b/mace/python/tools/converter_tool/transformer.py index d59ce2c1fd1e8293e68c677759883a9c76098506..0fa5ddd967f026757e886a39f2e84f5a63c975bf 100644 --- a/mace/python/tools/converter_tool/transformer.py +++ b/mace/python/tools/converter_tool/transformer.py @@ -18,14 +18,15 @@ import numpy as np from mace.proto import mace_pb2 from mace.python.tools.converter_tool import base_converter -from mace.python.tools.converter_tool.base_converter import EltwiseType from mace.python.tools.converter_tool.base_converter import ActivationType -from mace.python.tools.converter_tool.base_converter import PaddingMode +from mace.python.tools.converter_tool.base_converter import ConverterUtil from mace.python.tools.converter_tool.base_converter import DataFormat +from mace.python.tools.converter_tool.base_converter import DeviceType +from mace.python.tools.converter_tool.base_converter import EltwiseType from mace.python.tools.converter_tool.base_converter import FilterFormat -from mace.python.tools.converter_tool.base_converter import MaceOp from mace.python.tools.converter_tool.base_converter import MaceKeyword -from mace.python.tools.converter_tool.base_converter import ConverterUtil +from mace.python.tools.converter_tool.base_converter import MaceOp +from mace.python.tools.converter_tool.base_converter import PaddingMode from mace.python.tools.converter_tool.base_converter import TransformerRule from mace.python.tools.convert_util import mace_check @@ -117,7 +118,7 @@ class Transformer(base_converter.ConverterInterface): self._producer = {} self._target_data_format = DataFormat.NHWC - if self._option.device == mace_pb2.CPU: + if self._option.device == DeviceType.CPU.value: self._target_data_format = DataFormat.NCHW def run(self): @@ -491,7 +492,7 @@ class Transformer(base_converter.ConverterInterface): net = self._model filter_format = self.filter_format() - if self._option.device == mace_pb2.GPU: + if self._option.device == DeviceType.GPU.value: for op in net.op: if op.type == MaceOp.Conv2D.name \ and self.check_if_gpu_use_winograd_conv(op): @@ -619,7 +620,7 @@ class Transformer(base_converter.ConverterInterface): return False def flatten_atrous_conv(self): - if self._option.device != mace_pb2.GPU: + if self._option.device != DeviceType.GPU.value: return net = self._model @@ -871,7 +872,7 @@ class Transformer(base_converter.ConverterInterface): op.input[input_idx] = output_name def transform_buffer_image(self): - if self._option.device != mace_pb2.GPU: + if self._option.device != DeviceType.GPU.value: return False print("Transform buffer to image") @@ -997,7 +998,7 @@ class Transformer(base_converter.ConverterInterface): def transform_global_conv_to_fc(self): """Transform global conv to fc should be placed after transposing input/output and filter""" - if self._option.device == mace_pb2.GPU: + if self._option.device == DeviceType.GPU.value: return False net = self._model diff --git a/mace/python/tools/mace_engine_factory.h.jinja2 b/mace/python/tools/mace_engine_factory.h.jinja2 index 3110ab37b3e4fa6f3a47825c604a24e4ae2101d3..b1c7879af66ab8576df863571766276841181cd8 100644 --- a/mace/python/tools/mace_engine_factory.h.jinja2 +++ b/mace/python/tools/mace_engine_factory.h.jinja2 @@ -19,18 +19,21 @@ #include #include +#include "mace/core/macros.h" #include "mace/public/mace.h" #include "mace/public/mace_runtime.h" namespace mace { + +{% if model_type == 'source' %} {% for tag in model_tags %} namespace {{tag}} { -extern const unsigned char *LoadModelData(const char *model_data_file); +extern const unsigned char *LoadModelData(const std::string &model_data_file); extern void UnloadModelData(const unsigned char *model_data); -extern NetDef CreateNet(const unsigned char *model_data); +extern const std::shared_ptr CreateNet(); extern const std::string ModelName(); extern const std::string ModelChecksum(); @@ -48,9 +51,9 @@ std::map model_name_map { }; } // namespace -MaceStatus CreateMaceEngine( - const char *model_name, - const char *model_data_file, +MaceStatus CreateMaceEngineFromCode( + const std::string &model_name, + const std::string &model_data_file, const std::vector &input_nodes, const std::vector &output_nodes, const DeviceType device_type, @@ -60,16 +63,16 @@ MaceStatus CreateMaceEngine( return MaceStatus::MACE_INVALID_ARGS; } const unsigned char * model_data = nullptr; - NetDef net_def; + std::shared_ptr net_def; MaceStatus status = MaceStatus::MACE_SUCCESS; switch (model_name_map[model_name]) { {% for i in range(model_tags |length) %} case {{ i }}: model_data = mace::{{model_tags[i]}}::LoadModelData(model_data_file); - net_def = mace::{{model_tags[i]}}::CreateNet(model_data); + net_def = mace::{{model_tags[i]}}::CreateNet(); engine->reset(new mace::MaceEngine(device_type)); - status = (*engine)->Init(&net_def, input_nodes, output_nodes); + status = (*engine)->Init(net_def.get(), input_nodes, output_nodes, model_data); if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) { mace::{{model_tags[i]}}::UnloadModelData(model_data); } @@ -81,5 +84,22 @@ MaceStatus CreateMaceEngine( return status; } +{% else %} +MaceStatus CreateMaceEngineFromCode( + const std::string &model_name, + const std::string &model_data_file, + const std::vector &input_nodes, + const std::vector &output_nodes, + const DeviceType device_type, + std::shared_ptr *engine) { + MACE_UNUSED(model_name); + MACE_UNUSED(model_data_file); + MACE_UNUSED(input_nodes); + MACE_UNUSED(output_nodes); + MACE_UNUSED(device_type); + MACE_UNUSED(engine); + return MaceStatus::MACE_INVALID_ARGS; +} +{% endif %} } // namespace mace diff --git a/mace/python/tools/mace_engine_factory_codegen.py b/mace/python/tools/mace_engine_factory_codegen.py index e74c0952e0583a5b9eab2c4404b339e5693d20c6..c744069a5db3209006b89dd88403effa24d6182f 100644 --- a/mace/python/tools/mace_engine_factory_codegen.py +++ b/mace/python/tools/mace_engine_factory_codegen.py @@ -20,7 +20,7 @@ from jinja2 import Environment, FileSystemLoader FLAGS = None -def gen_mace_engine_factory(model_tags, template_dir, output_dir): +def gen_mace_engine_factory(model_tags, template_dir, model_type, output_dir): # Create the jinja2 environment. j2_env = Environment( loader=FileSystemLoader(template_dir), trim_blocks=True) @@ -29,6 +29,7 @@ def gen_mace_engine_factory(model_tags, template_dir, output_dir): template_name = 'mace_engine_factory.h.jinja2' source = j2_env.get_template(template_name).render( model_tags=model_tags, + model_type=model_type, ) with open(output_dir + '/mace_engine_factory.h', "wb") as f: f.write(source) @@ -45,11 +46,16 @@ def parse_args(): parser.add_argument( "--template_dir", type=str, default="", help="template path") parser.add_argument( - "--output_dir", type=str, default="", help="template path") + "--output_dir", type=str, default="", help="output path") + parser.add_argument( + "--model_type", + type=str, + default="", + help="[source|pb] model load type") return parser.parse_known_args() if __name__ == '__main__': FLAGS, unparsed = parse_args() gen_mace_engine_creator(FLAGS.model_tag, FLAGS.template_dir, - FLAGS.output_dir) + FLAGS.model_type, FLAGS.output_dir) diff --git a/mace/python/tools/model.jinja2 b/mace/python/tools/model.jinja2 index bd228229b5e339b22172ddda650af2340b169a28..4aaf6e47b5e656668456aaf837cdc3ee2f7d1164 100644 --- a/mace/python/tools/model.jinja2 +++ b/mace/python/tools/model.jinja2 @@ -17,6 +17,8 @@ #include #include +#include "mace/core/macros.h" +#include "mace/proto/mace.pb.h" #include "mace/public/mace.h" #include "mace/utils/env_time.h" #include "mace/utils/logging.h" @@ -24,9 +26,8 @@ namespace mace { namespace {{tag}} { -{% for tensor in tensors %} -extern void CreateTensor{{ tensor.id }}(std::vector *tensors, - const unsigned char *model_data); +{% for i in range(net.tensors|length) %} +extern void CreateTensor{{ i }}(mace::ConstTensor *tensor); {% endfor %} @@ -39,116 +40,119 @@ extern void CreateOperator{{i}}(mace::OperatorDef *op); namespace { {% if net.arg|length != 0 %} -void CreateNetArg(mace::NetDef &net_def) { - net_def.mutable_arg().reserve({{ net.arg|length }}); +void CreateNetArg(NetDef *net_def) { + net_def->mutable_arg()->Reserve({{ net.arg|length }}); mace::Argument *arg = nullptr; - {% for arg in net.arg %} + {% for i in range(net.arg|length) %} - arg = net_def.add_arg(); - arg->set_name({{ arg.name|tojson }}); + arg = net_def->add_arg(); + arg->set_name({{ net.arg[i].name|tojson }}); - {%- if arg.HasField('f') %} - arg->set_f({{ arg.f }}); + {%- if net.arg[i].HasField('f') %} + arg->set_f({{ net.arg[i].f }}); {% endif %} - {%- if arg.HasField('i') %} - arg->set_i({{ arg.i }}); + {%- if net.arg[i].HasField('i') %} + arg->set_i({{ net.arg[i].i }}); {% endif %} - {%- if arg.HasField('s') %} - arg->set_s({{ arg.s|tojson }}); + {%- if net.arg[i].HasField('s') %} + arg->set_s({{ net.arg[i].s|tojson }}); {% endif %} - {% if arg.floats|length != 0 %} - arg->set_floats({ {{ arg.floats|join(', ') }} }); - {% endif %} - {% if arg.ints|length != 0 %} - arg->set_ints({ {{ arg.ints|join(', ') }} }); - {% endif %} - {% if arg.strings|length != 0 %} - arg->set_strings({ {{ arg.strings|stringfy() }} }); - {% endif %} + arg->mutable_floats()->Reserve({{ net.arg[i].floats|length }}); + {% for float_value in net.arg[i].floats %} + arg->add_floats({{ float_value }}); + {% endfor %} + arg->mutable_ints()->Reserve({{ net.arg[i].ints|length }}); + {% for int_value in net.arg[i].ints %} + arg->add_ints({{ int_value }}); + {% endfor %} + arg->mutable_strings()->Reserve({{ net.arg[i].strings|length }}); + {% for str_value in net.arg[i].strings %} + arg->add_strings({{ str_value }}); + {% endfor %} {% endfor %} } {% endif %} {% if net.output_info | length > 0 %} -void CreateOutputInfo(mace::NetDef &net_def) { - std::vector> dims { {{net.output_info | map(attribute='dims') | join(', ') | replace('[', '{') | replace(']', '}') }} }; +void CreateOutputInfo(NetDef *net_def) { + std::vector> dims { {{net.output_info | map(attribute='dims') | join(', ') | replace('[', '{') | replace(']', '}') }} }; std::vector data_types_int { {{ net.output_info | map(attribute='data_type') | join(', ') }} }; std::vector data_types({{ net.output_info | length }}); for (int k = 0; k < {{ net.output_info | length }}; ++k) { data_types[k] = static_cast(data_types_int[k]); } - net_def.mutable_output_info().resize({{ net.output_info | length }}); + net_def->mutable_output_info()->Reserve({{ net.output_info | length }}); for (int i = 0; i < {{ net.output_info | length }}; ++i) { - net_def.mutable_output_info()[i].set_data_type(data_types[i]); - net_def.mutable_output_info()[i].set_dims(dims[i]); + auto output_info = net_def->add_output_info(); + output_info->set_data_type(data_types[i]); + output_info->mutable_dims()->Reserve(dims[i].size()); + for (size_t j = 0; j < dims[i].size(); ++j) { + output_info->add_dims(dims[i][j]); + } } } {% endif %} -void CreateOperators(std::vector *ops) { +void CreateOperators(NetDef *net_def) { MACE_LATENCY_LOGGER(1, "Create operators"); - ops->resize({{ net.op|length }}); - + net_def->mutable_op()->Reserve({{ net.op|length }}); {% for i in range(net.op|length) %} - mace::{{tag}}::CreateOperator{{i}}(&ops->at({{i}})); + mace::{{tag}}::CreateOperator{{i}}(net_def->add_op()); {% endfor %} } -void CreateTensors(std::vector *tensors, - const unsigned char *model_data) { +void CreateTensors(NetDef *net_def) { MACE_LATENCY_LOGGER(1, "Create tensors"); - tensors->reserve({{ net.tensors|length }}); - {% for tensor in tensors %} - mace::{{tag}}::CreateTensor{{tensor.id}}(tensors, model_data); + net_def->mutable_tensors()->Reserve({{ net.tensors|length }}); + {% for i in range(net.tensors|length) %} + mace::{{tag}}::CreateTensor{{ i }}(net_def->add_tensors()); {% endfor %} } - {% if net.mem_arena.mem_block|length != 0 %} void CreateMemoryArena(mace::MemoryArena *mem_arena) { - std::vector &mem_block = mem_arena->mutable_mem_block(); - mem_block.reserve({{ net.mem_arena.mem_block|length }}); + mem_arena->mutable_mem_block()->Reserve({{ net.mem_arena.mem_block|length }}); + {% for i in range(net.mem_arena.mem_block|length) %} - {% for mem_blk in net.mem_arena.mem_block %} - mem_block.emplace_back(mace::MemoryBlock({{ mem_blk.mem_id }}, - {{mem_blk.x}}, - {{mem_blk.y}})); - {% endfor %} + mace::MemoryBlock* mem_block{{i}} = mem_arena->add_mem_block(); + mem_block{{i}}->set_mem_id({{net.mem_arena.mem_block[i].mem_id}}); + mem_block{{i}}->set_x({{net.mem_arena.mem_block[i].x}}); + mem_block{{i}}->set_y({{net.mem_arena.mem_block[i].y}}); + {% endfor %} } {% endif %} + } // namespace namespace {{tag}} { -NetDef CreateNet(const unsigned char *model_data) { +const std::shared_ptr CreateNet() { MACE_LATENCY_LOGGER(1, "Create net {{ net.name }}"); - NetDef net_def; - net_def.set_name("{{ net.name}}"); - net_def.set_version("{{ net.version }}"); - - {% if net.arg|length != 0 %} - CreateNetArg(net_def); - {% endif %} - CreateOperators(&net_def.mutable_op()); + std::shared_ptr net_def(new NetDef()); + net_def->set_name("{{ net.name}}"); + net_def->set_version("{{ net.version }}"); - CreateTensors(&net_def.mutable_tensors(), model_data); + CreateOperators(net_def.get()); + CreateTensors(net_def.get()); + {% if net.arg|length != 0 %} + CreateNetArg(net_def.get()); + {% endif %} {% if net.mem_arena.mem_block|length != 0 %} - CreateMemoryArena(&net_def.mutable_mem_arena()); + CreateMemoryArena(net_def->mutable_mem_arena()); {% endif %} - {% if net.output_info | length > 0 %} - CreateOutputInfo(net_def); + CreateOutputInfo(net_def.get()); {% endif %} return net_def; diff --git a/mace/python/tools/model_header.jinja2 b/mace/python/tools/model_header.jinja2 index ace89933f014658a2777a4723921ecb504562a16..28b1b54563943509e3b4d2ec2ef0c8a90bc87e9c 100644 --- a/mace/python/tools/model_header.jinja2 +++ b/mace/python/tools/model_header.jinja2 @@ -24,11 +24,12 @@ namespace mace { namespace {{tag}} { -const unsigned char *LoadModelData(const char *model_data_file); + +const unsigned char *LoadModelData(const std::string &model_data_file); void UnloadModelData(const unsigned char *model_data); -NetDef CreateNet(const unsigned char *model_data); +const std::shared_ptr CreateNet(); const std::string ModelName(); diff --git a/mace/python/tools/operator.jinja2 b/mace/python/tools/operator.jinja2 index fa9610411c6c861a95abded984c3257a495b7393..bd7e36eb870276a9b37b8c249ed074c272d2a7f2 100644 --- a/mace/python/tools/operator.jinja2 +++ b/mace/python/tools/operator.jinja2 @@ -17,6 +17,7 @@ #include #include +#include "mace/proto/mace.pb.h" #include "mace/public/mace.h" #include "mace/utils/env_time.h" #include "mace/utils/logging.h" @@ -34,11 +35,24 @@ void UpdateOp(mace::OperatorDef *op, const std::vector &mem_ids) { op->set_name(name); op->set_type(type); - op->set_input(inputs); - op->set_output(outputs); - op->set_output_type(output_types); op->set_node_id(node_id); - op->set_mem_id(mem_ids); + + op->mutable_input()->Reserve(inputs.size()); + for (auto input : inputs) { + op->add_input(input); + } + op->mutable_output()->Reserve(outputs.size()); + for (auto output : outputs) { + op->add_output(output); + } + op->mutable_output_type()->Reserve(output_types.size()); + for (auto output_type : output_types) { + op->add_output_type(output_type); + } + op->mutable_mem_id()->Reserve(mem_ids.size()); + for (auto mem_id : mem_ids) { + op->add_mem_id(mem_id); + } } } // namespace @@ -53,6 +67,7 @@ void CreateOperator{{i}}(mace::OperatorDef *op) { MACE_LATENCY_LOGGER(2, "Create operator {{ net.op[i].name }}"); mace::Argument *arg = nullptr; + op->mutable_arg()->Reserve({{ net.op[i].arg|length }}); {% for arg in net.op[i].arg %} arg = op->add_arg(); @@ -68,20 +83,32 @@ void CreateOperator{{i}}(mace::OperatorDef *op) { arg->set_s({{ arg.s|tojson }}); {%- endif %} - {% if arg.floats|length != 0 %} - arg->set_floats({ {{ arg.floats|join(', ') }} }); - {% endif %} - {% if arg.ints|length != 0 %} - arg->set_ints({ {{ arg.ints|join(', ') }} }); - {% endif %} - {% if arg.strings|length != 0 %} - arg->set_strings({ {{ arg.strings|stringfy() }} }); - {% endif %} + arg->mutable_floats()->Reserve({{ arg.floats|length }}); + {% for float_value in arg.floats %} + arg->add_floats({{ float_value }}); + {% endfor %} + arg->mutable_ints()->Reserve({{ arg.ints|length }}); + {% for int_value in arg.ints %} + arg->add_ints({{ int_value }}); + {% endfor %} + arg->mutable_strings()->Reserve({{ arg.strings|length }}); + {% for str_value in arg.strings %} + arg->add_strings({{ str_value }}); + {% endfor %} + {% endfor %} + op->mutable_output_shape()->Reserve({{ net.op[i].output_shape|length }}); + mace::OutputShape * output_shape = nullptr; {% for shape in net.op[i].output_shape %} - {% if shape.dims | length > 0 %} - op->add_output_shape(mace::OutputShape({ {{ shape.dims|join(', ') }} })); + {% if shape.dims|length > 0 %} + output_shape = op->add_output_shape(); + + output_shape->mutable_dims()->Reserve({{ shape.dims|length }}); + {% for dim in shape.dims %} + output_shape->add_dims({{ dim }}); + {% endfor %} + {% endif %} {% endfor %} @@ -103,14 +130,19 @@ void CreateOperator{{i}}(mace::OperatorDef *op) { std::vector input_node_ids({ {{ net.op[i].node_input | map(attribute='node_id') | join(', ') }} }); std::vector input_output_ports({ {{ net.op[i].node_input | map(attribute='output_port') | join(', ')}} }); - for (size_t i = 0; i < {{ net.op[i].node_input | length }}; ++i) { - mace::NodeInput input(input_node_ids[i], input_output_ports[i]); - op->add_node_input(input); + mace::NodeInput *node_input = nullptr; + op->mutable_node_input()->Reserve({{ net.op[i].node_input|length }}); + for (size_t i = 0; i < {{ net.op[i].node_input|length }}; ++i) { + node_input = op->add_node_input(); + node_input->set_node_id(input_node_ids[i]); + node_input->set_output_port(input_output_ports[i]); } {% endif %} + {% if net.op[i].out_max_byte_size | length > 0 %} std::vector out_max_byte_sizes {{ net.op[i].out_max_byte_size | replace('[', '{') | replace(']', '}') }}; - for (size_t i = 0; i < {{ net.op[i].out_max_byte_size | length }}; ++i) { + op->mutable_out_max_byte_size()->Reserve({{ net.op[i].out_max_byte_size|length }}); + for (size_t i = 0; i < {{ net.op[i].out_max_byte_size|length }}; ++i) { op->add_out_max_byte_size(out_max_byte_sizes[i]); } {% endif %} diff --git a/mace/python/tools/source_converter_lib.py b/mace/python/tools/source_converter_lib.py index 2adcd383f515b7aae88ca81ca376e1e865855e4d..145d1d4127801cfd4c9387ca4b4078be8a87d72d 100644 --- a/mace/python/tools/source_converter_lib.py +++ b/mace/python/tools/source_converter_lib.py @@ -14,122 +14,10 @@ import datetime import os -import uuid -import numpy as np -import hashlib from mace.proto import mace_pb2 from jinja2 import Environment, FileSystemLoader -GENERATED_NAME = set() - - -def generate_obfuscated_name(namespace, name): - md5 = hashlib.md5() - md5.update(namespace) - md5.update(name) - md5_digest = md5.hexdigest() - - name = md5_digest[:8] - while name in GENERATED_NAME: - name = md5_digest - assert name not in GENERATED_NAME - GENERATED_NAME.add(name) - return name - - -def generate_tensor_map(tensors): - tensor_map = {} - for t in tensors: - if t.name not in tensor_map: - tensor_map[t.name] = generate_obfuscated_name("tensor", t.name) - return tensor_map - - -def generate_in_out_map(ops, tensor_map): - in_out_map = {} - for op in ops: - op.name = generate_obfuscated_name("op", op.name) - for input_name in op.input: - if input_name not in in_out_map: - if input_name in tensor_map: - in_out_map[input_name] = tensor_map[input_name] - else: - in_out_map[input_name] = generate_obfuscated_name( - "in", input_name) - for output_name in op.output: - if output_name not in in_out_map: - if output_name in tensor_map: - in_out_map[output_name] = tensor_map[output_name] - else: - in_out_map[output_name] = generate_obfuscated_name( - "out", output_name) - return in_out_map - - -def obfuscate_name(net_def): - input_node = "mace_input_node" - output_node = "mace_output_node" - tensor_map = generate_tensor_map(net_def.tensors) - in_out_map = generate_in_out_map(net_def.op, tensor_map) - for t in net_def.tensors: - if input_node not in t.name and output_node not in t.name: - t.name = tensor_map[t.name] - for op in net_def.op: - for i in range(len(op.input)): - if input_node not in op.input[i]: - op.input[i] = in_out_map[op.input[i]] - for i in range(len(op.output)): - if output_node not in op.output[i]: - op.output[i] = in_out_map[op.output[i]] - - -def normalize_op_name(op_name): - idx = op_name.rfind(':') - if idx == -1: - return op_name - else: - return op_name[:idx] - - -def rename_tensor(net_def): - tensor_map = {} - for t in net_def.tensors: - if t.name not in tensor_map: - tensor_map[t.name] = "_" + normalize_op_name(t.name).replace("/", - "_") - t.name = tensor_map[t.name] - for op in net_def.op: - for i in range(len(op.input)): - if op.input[i] in tensor_map: - op.input[i] = tensor_map[op.input[i]] - for i in range(len(op.output)): - if op.output[i] in tensor_map: - op.output[i] = tensor_map[op.output[i]] - - -class TensorInfo: - def __init__(self, id, t, runtime, gpu_data_type): - self.id = id - self.data_type = mace_pb2.DataType.Name(t.data_type) - if t.data_type == mace_pb2.DT_FLOAT: - if runtime == 'gpu' and gpu_data_type == 'half': - self.data_type = mace_pb2.DT_HALF - self.data = bytearray( - np.array(t.float_data).astype(np.float16).tobytes()) - else: - self.data_type = mace_pb2.DT_FLOAT - self.data = bytearray( - np.array(t.float_data).astype(np.float32).tobytes()) - elif t.data_type == mace_pb2.DT_INT32: - self.data = bytearray( - np.array(t.int32_data).astype(np.int32).tobytes()) - elif t.data_type == mace_pb2.DT_UINT8: - self.data = bytearray( - np.array(t.int32_data).astype(np.uint8).tolist()) - else: - raise Exception('Tensor data type %s not supported' % t.data_type) - def stringfy(value): return ', '.join('"{0}"'.format(w) for w in value) @@ -137,12 +25,8 @@ def stringfy(value): def convert_to_source(net_def, model_checksum, weight_checksum, template_dir, obfuscate, model_tag, output, runtime, embed_model_data, - winograd_conv, gpu_data_type): - if obfuscate: - obfuscate_name(net_def) - else: - rename_tensor(net_def) - + winograd_conv, model_load_type, tensor_infos, + model_data): # Capture our current directory print template_dir @@ -153,82 +37,63 @@ def convert_to_source(net_def, model_checksum, weight_checksum, template_dir, output_dir = os.path.dirname(output) + '/' # generate tensor source files template_name = 'tensor_source.jinja2' - model_data = [] - offset = 0 - counter = 0 - for t in net_def.tensors: - tensor_info = TensorInfo(counter, t, runtime, gpu_data_type) - # align - if tensor_info.data_type != 'DT_UINT8' and offset % 4 != 0: - padding = 4 - offset % 4 - model_data.extend(bytearray([0] * padding)) - offset += padding + for i in range(len(net_def.tensors)): + if model_load_type == 'source': + source = j2_env.get_template(template_name).render( + tensor_info=tensor_infos[i], + tensor=net_def.tensors[i], + tag=model_tag, + ) + with open(output_dir + 'tensor' + str(i) + '.cc', "wb") as f: + f.write(source) + + if model_load_type == 'source': + # generate tensor data + template_name = 'tensor_data.jinja2' source = j2_env.get_template(template_name).render( - tensor_info=tensor_info, - tensor=t, tag=model_tag, - offset=offset, - ) - model_data.extend(tensor_info.data) - offset += len(tensor_info.data) - with open(output_dir + 'tensor' + str(counter) + '.cc', "wb") as f: + embed_model_data=embed_model_data, + model_data_size=len(model_data), + model_data=model_data) + with open(output_dir + 'tensor_data' + '.cc', "wb") as f: f.write(source) - counter += 1 - - # generate tensor data - template_name = 'tensor_data.jinja2' - source = j2_env.get_template(template_name).render( - tag=model_tag, - embed_model_data=embed_model_data, - model_data_size=offset, - model_data=model_data) - with open(output_dir + 'tensor_data' + '.cc', "wb") as f: - f.write(source) - if not embed_model_data: - with open(output_dir + model_tag + '.data', "wb") as f: - f.write(bytearray(model_data)) - # generate op source files - template_name = 'operator.jinja2' - counter = 0 - op_size = len(net_def.op) - for start in range(0, op_size, 10): + # generate op source files + template_name = 'operator.jinja2' + counter = 0 + op_size = len(net_def.op) + for start in range(0, op_size, 10): + source = j2_env.get_template(template_name).render( + start=start, + end=min(start + 10, op_size), + net=net_def, + tag=model_tag, + runtime=runtime, + ) + with open(output_dir + 'op' + str(counter) + '.cc', "wb") as f: + f.write(source) + counter += 1 + + # generate model source files + build_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + template_name = 'model.jinja2' + checksum = model_checksum + if weight_checksum is not None: + checksum = "{},{}".format(model_checksum, weight_checksum) source = j2_env.get_template(template_name).render( - start=start, - end=min(start + 10, op_size), net=net_def, tag=model_tag, runtime=runtime, - ) - with open(output_dir + 'op' + str(counter) + '.cc', "wb") as f: + obfuscate=obfuscate, + embed_model_data=embed_model_data, + winograd_conv=winograd_conv, + checksum=checksum, + build_time=build_time) + with open(output, "wb") as f: f.write(source) - counter += 1 - # generate model source files - build_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') - template_name = 'model.jinja2' - tensors = [ - TensorInfo(i, net_def.tensors[i], runtime, gpu_data_type) - for i in range(len(net_def.tensors)) - ] - checksum = model_checksum - if weight_checksum is not None: - checksum = "{},{}".format(model_checksum, weight_checksum) - source = j2_env.get_template(template_name).render( - tensors=tensors, - net=net_def, - tag=model_tag, - runtime=runtime, - obfuscate=obfuscate, - embed_model_data=embed_model_data, - winograd_conv=winograd_conv, - checksum=checksum, - build_time=build_time) - with open(output, "wb") as f: - f.write(source) - - # generate model header file - template_name = 'model_header.jinja2' - source = j2_env.get_template(template_name).render(tag=model_tag, ) - with open(output_dir + model_tag + '.h', "wb") as f: - f.write(source) + # generate model header file + template_name = 'model_header.jinja2' + source = j2_env.get_template(template_name).render(tag=model_tag, ) + with open(output_dir + model_tag + '.h', "wb") as f: + f.write(source) diff --git a/mace/python/tools/tensor_data.jinja2 b/mace/python/tools/tensor_data.jinja2 index 0f00eb3b48afa01888fc834e161b84982928fe94..ce3ceb81d9813fb391a9d06fd846ce0ca62899a2 100644 --- a/mace/python/tools/tensor_data.jinja2 +++ b/mace/python/tools/tensor_data.jinja2 @@ -22,17 +22,13 @@ #include "mace/utils/env_time.h" #include "mace/utils/logging.h" -{% if not embed_model_data %} +namespace mace { -#include -#include -#include -#include -#include +extern const unsigned char *LoadModelData(const std::string &model_data_file, + const size_t &data_size); +extern void UnloadModelData(const unsigned char *model_data, + const size_t &data_size); -{% endif %} - -namespace mace { namespace {{tag}} { {% if embed_model_data %} @@ -41,34 +37,18 @@ alignas(4) const unsigned char model_data[{{ model_data_size }}] = { }; {% endif %} -const unsigned char *LoadModelData(const char *model_data_file) { +const unsigned char *LoadModelData(const std::string &model_data_file) { {% if embed_model_data %} MACE_UNUSED(model_data_file); return model_data; {% else %} - int fd = open(model_data_file, O_RDONLY); - MACE_CHECK(fd >= 0, "Failed to open model data file ", - model_data_file, ", error code: ", errno); - - const unsigned char *model_data = - static_cast(mmap(nullptr, {{ model_data_size }}, - PROT_READ, MAP_PRIVATE, fd, 0)); - MACE_CHECK(model_data != MAP_FAILED, "Failed to map model data file ", - model_data_file, ", error code: ", errno); - - int ret = close(fd); - MACE_CHECK(ret == 0, "Failed to close model data file ", - model_data_file, ", error code: ", errno); - - return model_data; + return mace::LoadModelData(model_data_file, {{ model_data_size }}); {% endif %} } void UnloadModelData(const unsigned char *model_data) { {% if not embed_model_data %} - int ret = munmap(const_cast(model_data), - {{ model_data_size }}); - MACE_CHECK(ret == 0, "Failed to unmap model data file, error code: ", errno); + mace::UnloadModelData(model_data, {{ model_data_size }}); {% else %} MACE_UNUSED(model_data); {% endif %} diff --git a/mace/python/tools/tensor_source.jinja2 b/mace/python/tools/tensor_source.jinja2 index 42a8f16ef877e9c2208dc298ed33cc40fc546bb8..e982e41f45c7fe02a3eda75e761e14fe30d1eb51 100644 --- a/mace/python/tools/tensor_source.jinja2 +++ b/mace/python/tools/tensor_source.jinja2 @@ -17,6 +17,7 @@ #include #include +#include "mace/proto/mace.pb.h" #include "mace/public/mace.h" #include "mace/utils/env_time.h" #include "mace/utils/logging.h" @@ -24,12 +25,16 @@ namespace mace { namespace {{tag}} { -void CreateTensor{{tensor_info.id}}(std::vector *tensors, - const unsigned char *model_data) { +void CreateTensor{{tensor_info.id}}(mace::ConstTensor *const_tensor) { MACE_LATENCY_LOGGER(2, "Create tensor {{ tensor.name }}"); - tensors->emplace_back(mace::ConstTensor( - {{ tensor.name|tojson }}, model_data + {{ offset }}, - { {{ tensor.dims|join(', ') }} }, {{ tensor_info.data_type }}, {{ tensor.node_id }})); + const_tensor->set_name({{ tensor.name|tojson }}); + const_tensor->set_offset({{ tensor.offset }}); + const_tensor->set_data_size({{ tensor.data_size }}); + {% for dim in tensor.dims %} + const_tensor->add_dims({{ dim }}); + {% endfor %} + const_tensor->set_data_type(static_cast({{ tensor_info.data_type }})); + const_tensor->set_node_id({{ tensor.node_id }}); } } // namespace {{tag}} diff --git a/mace/python/tools/tensor_util.py b/mace/python/tools/tensor_util.py new file mode 100644 index 0000000000000000000000000000000000000000..61a5e90cf0a8a73c73bd092dfc340760e493ab18 --- /dev/null +++ b/mace/python/tools/tensor_util.py @@ -0,0 +1,173 @@ +# Copyright 2018 Xiaomi, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import hashlib +import numpy as np + +from mace.proto import mace_pb2 + +GENERATED_NAME = set() + + +def generate_obfuscated_name(namespace, name): + md5 = hashlib.md5() + md5.update(namespace) + md5.update(name) + md5_digest = md5.hexdigest() + + name = md5_digest[:8] + while name in GENERATED_NAME: + name = md5_digest + assert name not in GENERATED_NAME + GENERATED_NAME.add(name) + return name + + +def generate_tensor_map(tensors): + tensor_map = {} + for t in tensors: + if t.name not in tensor_map: + tensor_map[t.name] = generate_obfuscated_name("tensor", t.name) + return tensor_map + + +def generate_in_out_map(ops, tensor_map): + in_out_map = {} + for op in ops: + op.name = generate_obfuscated_name("op", op.name) + for input_name in op.input: + if input_name not in in_out_map: + if input_name in tensor_map: + in_out_map[input_name] = tensor_map[input_name] + else: + in_out_map[input_name] = generate_obfuscated_name( + "in", input_name) + for output_name in op.output: + if output_name not in in_out_map: + if output_name in tensor_map: + in_out_map[output_name] = tensor_map[output_name] + else: + in_out_map[output_name] = generate_obfuscated_name( + "out", output_name) + return in_out_map + + +def obfuscate_name(net_def): + input_node = "mace_input_node" + output_node = "mace_output_node" + tensor_map = generate_tensor_map(net_def.tensors) + in_out_map = generate_in_out_map(net_def.op, tensor_map) + for t in net_def.tensors: + if input_node not in t.name and output_node not in t.name: + t.name = tensor_map[t.name] + for op in net_def.op: + for i in range(len(op.input)): + if input_node not in op.input[i]: + op.input[i] = in_out_map[op.input[i]] + for i in range(len(op.output)): + if output_node not in op.output[i]: + op.output[i] = in_out_map[op.output[i]] + + +def normalize_op_name(op_name): + idx = op_name.rfind(':') + if idx == -1: + return op_name + else: + return op_name[:idx] + + +def rename_tensor(net_def): + tensor_map = {} + for t in net_def.tensors: + if t.name not in tensor_map: + tensor_map[t.name] = "_" + normalize_op_name(t.name).replace("/", + "_") + t.name = tensor_map[t.name] + for op in net_def.op: + for i in range(len(op.input)): + if op.input[i] in tensor_map: + op.input[i] = tensor_map[op.input[i]] + for i in range(len(op.output)): + if op.output[i] in tensor_map: + op.output[i] = tensor_map[op.output[i]] + + +class TensorInfo: + def __init__(self, id, t, runtime, gpu_data_type): + self.id = id + self.data_type = mace_pb2.DataType.Name(t.data_type) + if t.data_type == mace_pb2.DT_FLOAT: + if runtime == 'gpu' and gpu_data_type == 'half': + self.data_type = mace_pb2.DT_HALF + self.data = bytearray( + np.array(t.float_data).astype(np.float16).tobytes()) + else: + self.data_type = mace_pb2.DT_FLOAT + self.data = bytearray( + np.array(t.float_data).astype(np.float32).tobytes()) + elif t.data_type == mace_pb2.DT_INT32: + self.data = bytearray( + np.array(t.int32_data).astype(np.int32).tobytes()) + elif t.data_type == mace_pb2.DT_UINT8: + self.data = bytearray( + np.array(t.int32_data).astype(np.uint8).tolist()) + else: + raise Exception('Tensor data type %s not supported' % t.data_type) + + +def get_tensor_info_and_model_data(net_def, runtime, gpu_data_type): + model_data = [] + offset = 0 + counter = 0 + tensor_infos = [] + for t in net_def.tensors: + tensor_info = TensorInfo(counter, t, runtime, gpu_data_type) + tensor_infos.append(tensor_info) + # align + if tensor_info.data_type != 'DT_UINT8' and offset % 4 != 0: + padding = 4 - offset % 4 + model_data.extend(bytearray([0] * padding)) + offset += padding + + if t.data_type == mace_pb2.DT_FLOAT: + t.data_size = len(t.float_data) + elif t.data_type == mace_pb2.DT_INT32: + t.data_size = len(t.int32_data) + elif t.data_type == mace_pb2.DT_UINT8: + t.data_size = len(t.int32_data) + t.offset = offset + + counter += 1 + model_data.extend(tensor_info.data) + offset += len(tensor_info.data) + + return tensor_infos, model_data + + +def del_tensor_data(net_def, runtime, gpu_data_type): + for t in net_def.tensors: + if t.data_type == mace_pb2.DT_FLOAT: + del t.float_data[:] + elif t.data_type == mace_pb2.DT_INT32: + del t.int32_data[:] + elif t.data_type == mace_pb2.DT_UINT8: + del t.int32_data[:] + + +def update_tensor_data_type(net_def, runtime, gpu_data_type): + for t in net_def.tensors: + if t.data_type == mace_pb2.DT_FLOAT and runtime == 'gpu' \ + and gpu_data_type == 'half': + t.data_type = mace_pb2.DT_HALF diff --git a/mace/test/BUILD b/mace/test/BUILD index c76287f91ffd9db8bb781ea2bd502dcbaa93b321..afc2738aa49aa05335d333ea04b8ab379c221f48 100644 --- a/mace/test/BUILD +++ b/mace/test/BUILD @@ -13,7 +13,8 @@ cc_test( name = "mace_api_test", testonly = 1, srcs = ["mace_api_test.cc"], - copts = if_openmp_enabled(["-fopenmp"]) + + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"] + + if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon"]) + if_android_armv7(["-mfloat-abi=softfp"]) + @@ -33,7 +34,8 @@ cc_test( name = "mace_api_mt_test", testonly = 1, srcs = ["mace_api_mt_test.cc"], - copts = if_openmp_enabled(["-fopenmp"]) + + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"] + + if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon"]) + if_android_armv7(["-mfloat-abi=softfp"]) + diff --git a/mace/test/mace_api_mt_test.cc b/mace/test/mace_api_mt_test.cc index a4088930dcc57ab1f2ed4976195519747a1810b8..ab4317d45fb9f5ae46c27c47de6d05b5680c8be6 100644 --- a/mace/test/mace_api_mt_test.cc +++ b/mace/test/mace_api_mt_test.cc @@ -69,8 +69,10 @@ void BufferToImage(const std::string &input_name, .AddIntArg("mode", mode) .Finalize(&operator_def); - operator_def.set_mem_id(mem_ids); - + operator_def.mutable_mem_id()->Reserve(mem_ids.size()); + for (auto mem_id : mem_ids) { + operator_def.add_mem_id(mem_id); + } net_def->add_op()->CopyFrom(operator_def); } @@ -112,7 +114,10 @@ void Conv3x3(const std::string &input_name, .AddIntArg("device", static_cast(device_type)) .Finalize(&operator_def); - operator_def.set_mem_id(mem_ids); + operator_def.mutable_mem_id()->Reserve(mem_ids.size()); + for (auto mem_id : mem_ids) { + operator_def.add_mem_id(mem_id); + } net_def->add_op()->CopyFrom(operator_def); } @@ -136,20 +141,25 @@ void Relu(const std::string &input_name, template void AddTensor(const std::string &name, const std::vector &shape, - T *data, + const int offset, + const int data_size, NetDef *net_def) { - ConstTensor tensor(name, - reinterpret_cast(data), - shape, - DataTypeToEnum::value); - - net_def->mutable_tensors().push_back(tensor); + ConstTensor *tensor_ptr = net_def->add_tensors(); + tensor_ptr->set_name(name); + tensor_ptr->mutable_dims()->Reserve(shape.size()); + for (auto dim : shape) { + tensor_ptr->add_dims(dim); + } + tensor_ptr->set_offset(offset); + tensor_ptr->set_data_size(data_size); + tensor_ptr->set_data_type(DataTypeToEnum::value); } template void CheckOutputs(const NetDef &net_def, const std::map &inputs, - const std::map &outputs) { + const std::map &outputs, + const std::vector &tensor_data) { ops::test::OpsTestNet net; for (auto input : inputs) { auto input_shape = input.second.shape(); @@ -166,13 +176,14 @@ void CheckOutputs(const NetDef &net_def, } auto tensors = net_def.tensors(); for (auto tensor : tensors) { - auto shape = tensor.dims(); + std::vector shape = {tensor.dims().begin(), tensor.dims().end()}; const int64_t data_size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); std::vector data(data_size); - memcpy(data.data(), reinterpret_cast(tensor.data()), - data_size * sizeof(T)); + memcpy(data.data(), + reinterpret_cast(tensor_data.data()) + tensor.offset(), + tensor.data_size() * sizeof(T)); net.AddInputFromArray(tensor.name(), shape, data); } net.RunNet(net_def, D); @@ -217,9 +228,14 @@ std::map AddMemoryOptimization( input_shapes[i][1]); } size_t input_size = input_names.size(); + size_t output_size = output_names.size(); + MemoryArena *mem_arena_ptr = net_def->mutable_mem_arena(); + mem_arena_ptr->mutable_mem_block()->Reserve(input_size + output_size); for (size_t i = 0; i < input_size; ++i) { - net_def->mutable_mem_arena().mutable_mem_block().push_back( - MemoryBlock(mem_id, in_mem_block_x, in_mem_block_y)); + MemoryBlock *mem_blk_ptr = mem_arena_ptr->add_mem_block(); + mem_blk_ptr->set_mem_id(mem_id); + mem_blk_ptr->set_x(in_mem_block_x); + mem_blk_ptr->set_y(in_mem_block_y); res[input_names[i]] = mem_id; mem_id++; } @@ -234,10 +250,11 @@ std::map AddMemoryOptimization( output_shapes[i][0] * output_shapes[i][1]); } - size_t output_size = output_names.size(); for (size_t i = 0; i < output_size; ++i) { - net_def->mutable_mem_arena().mutable_mem_block().push_back( - MemoryBlock(mem_id, out_mem_block_x, out_mem_block_y)); + MemoryBlock *mem_blk_ptr = mem_arena_ptr->add_mem_block(); + mem_blk_ptr->set_mem_id(mem_id); + mem_blk_ptr->set_x(out_mem_block_x); + mem_blk_ptr->set_y(out_mem_block_y); res[output_names[i]] = mem_id; mem_id++; } @@ -261,16 +278,17 @@ void MaceRunFunc(const int in_out_size) { const std::vector> output_shapes = {{1, 32, 32, 16}}; const std::vector filter_shape = {16, 16, 3, 3}; - NetDef net_def; + std::shared_ptr net_def(new NetDef()); // Add memory optimization auto mem_map = AddMemoryOptimization(input_names, output_names, input_shapes, output_shapes, - &net_def); + net_def.get()); std::vector data; ops::test::GenerateRandomRealTypeData(filter_shape, &data); - AddTensor(filter_tensor_name, filter_shape, data.data(), &net_def); + AddTensor( + filter_tensor_name, filter_shape, 0, data.size(), net_def.get()); for (size_t i = 0; i < input_names.size(); ++i) { std::string input_name = MakeString("mace_input_node_", @@ -279,16 +297,16 @@ void MaceRunFunc(const int in_out_size) { mace::kernels::IN_OUT_CHANNEL, {mem_map[input_names[i]]}, device, - &net_def); + net_def.get()); } BufferToImage(filter_tensor_name, filter_tensor_img_name, mace::kernels::CONV2D_FILTER, {}, device, - &net_def, NetMode::INIT); + net_def.get(), NetMode::INIT); for (size_t i = 0; i < output_names.size(); ++i) { Conv3x3(input_names[i], filter_tensor_img_name, output_names[i], {mem_map[output_names[i]]}, device, - &net_def); + net_def.get()); } for (size_t i = 0; i < output_names.size(); ++i) { std::string output_name = MakeString("mace_output_node_", @@ -296,7 +314,7 @@ void MaceRunFunc(const int in_out_size) { ImageToBuffer(output_names[i], output_name, mace::kernels::IN_OUT_CHANNEL, device, - &net_def); + net_def.get()); } const std::string file_path ="/data/local/tmp/mace"; @@ -305,7 +323,8 @@ void MaceRunFunc(const int in_out_size) { mace::SetKVStorageFactory(storage_factory); MaceEngine engine(device); - MaceStatus status = engine.Init(&net_def, input_names, output_names); + MaceStatus status = engine.Init(net_def.get(), input_names, output_names, + reinterpret_cast(data.data())); ASSERT_EQ(status, MaceStatus::MACE_SUCCESS); std::map inputs; @@ -322,7 +341,7 @@ void MaceRunFunc(const int in_out_size) { } } - CheckOutputs(net_def, inputs, outputs); + CheckOutputs(*net_def, inputs, outputs, data); } } // namespace diff --git a/mace/test/mace_api_test.cc b/mace/test/mace_api_test.cc index 0b16c762da94f2379fc2eaf4d50eafd4d9873cc0..f061ecc3f0864019ec1ea8efb4569aebe3ed49e0 100644 --- a/mace/test/mace_api_test.cc +++ b/mace/test/mace_api_test.cc @@ -79,7 +79,10 @@ void BufferToImage(const std::string &input_name, .AddIntArg("mode", mode) .Finalize(&operator_def); - operator_def.set_mem_id(mem_ids); + operator_def.mutable_mem_id()->Reserve(mem_ids.size()); + for (auto mem_id : mem_ids) { + operator_def.add_mem_id(mem_id); + } net_def->add_op()->CopyFrom(operator_def); } @@ -122,7 +125,10 @@ void Conv3x3(const std::string &input_name, .AddIntArg("device", static_cast(device_type)) .Finalize(&operator_def); - operator_def.set_mem_id(mem_ids); + operator_def.mutable_mem_id()->Reserve(mem_ids.size()); + for (auto mem_id : mem_ids) { + operator_def.add_mem_id(mem_id); + } net_def->add_op()->CopyFrom(operator_def); } @@ -146,20 +152,25 @@ void Relu(const std::string &input_name, template void AddTensor(const std::string &name, const std::vector &shape, - T *data, + const int offset, + const int data_size, NetDef *net_def) { - ConstTensor tensor(name, - reinterpret_cast(data), - shape, - DataTypeToEnum::value); - - net_def->mutable_tensors().push_back(tensor); + ConstTensor *tensor_ptr = net_def->add_tensors(); + tensor_ptr->set_name(name); + tensor_ptr->mutable_dims()->Reserve(shape.size()); + for (auto dim : shape) { + tensor_ptr->add_dims(dim); + } + tensor_ptr->set_offset(offset); + tensor_ptr->set_data_size(data_size); + tensor_ptr->set_data_type(DataTypeToEnum::value); } template void CheckOutputs(const NetDef &net_def, const std::map &inputs, - const std::map &outputs) { + const std::map &outputs, + const std::vector &tensor_data) { ops::test::OpsTestNet net; for (auto input : inputs) { auto input_shape = input.second.shape(); @@ -176,13 +187,14 @@ void CheckOutputs(const NetDef &net_def, } auto tensors = net_def.tensors(); for (auto tensor : tensors) { - auto shape = tensor.dims(); + std::vector shape = {tensor.dims().begin(), tensor.dims().end()}; const int64_t data_size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); std::vector data(data_size); - memcpy(data.data(), reinterpret_cast(tensor.data()), - data_size * sizeof(T)); + memcpy(data.data(), + reinterpret_cast(tensor_data.data()) + tensor.offset(), + tensor.data_size() * sizeof(T)); net.AddInputFromArray(tensor.name(), shape, data); } net.RunNet(net_def, D); @@ -227,9 +239,14 @@ std::map AddMemoryOptimization( input_shapes[i][1]); } size_t input_size = input_names.size(); + size_t output_size = output_names.size(); + MemoryArena *mem_arena_ptr = net_def->mutable_mem_arena(); + mem_arena_ptr->mutable_mem_block()->Reserve(input_size + output_size); for (size_t i = 0; i < input_size; ++i) { - net_def->mutable_mem_arena().mutable_mem_block().push_back( - MemoryBlock(mem_id, in_mem_block_x, in_mem_block_y)); + MemoryBlock *mem_blk_ptr = mem_arena_ptr->add_mem_block(); + mem_blk_ptr->set_mem_id(mem_id); + mem_blk_ptr->set_x(in_mem_block_x); + mem_blk_ptr->set_y(in_mem_block_y); res[input_names[i]] = mem_id; mem_id++; } @@ -244,10 +261,11 @@ std::map AddMemoryOptimization( output_shapes[i][0] * output_shapes[i][1]); } - size_t output_size = output_names.size(); for (size_t i = 0; i < output_size; ++i) { - net_def->mutable_mem_arena().mutable_mem_block().push_back( - MemoryBlock(mem_id, out_mem_block_x, out_mem_block_y)); + MemoryBlock *mem_blk_ptr = mem_arena_ptr->add_mem_block(); + mem_blk_ptr->set_mem_id(mem_id); + mem_blk_ptr->set_x(out_mem_block_x); + mem_blk_ptr->set_y(out_mem_block_y); res[output_names[i]] = mem_id; mem_id++; } @@ -271,16 +289,16 @@ void MaceRun(const int in_out_size, const DeviceType device = DeviceType::GPU; - NetDef net_def; + std::shared_ptr net_def(new NetDef()); // Add memory optimization auto mem_map = AddMemoryOptimization(input_names, output_names, input_shapes, output_shapes, - &net_def); + net_def.get()); std::vector data; ops::test::GenerateRandomRealTypeData(filter_shape, &data); - AddTensor(filter_tensor_name, filter_shape, data.data(), &net_def); + AddTensor(filter_tensor_name, filter_shape, 0, data.size(), net_def.get()); for (size_t i = 0; i < input_names.size(); ++i) { std::string input_name = MakeString("mace_input_node_", @@ -289,15 +307,15 @@ void MaceRun(const int in_out_size, mace::kernels::IN_OUT_CHANNEL, {mem_map[input_names[i]]}, device, - &net_def); + net_def.get()); } BufferToImage(filter_tensor_name, filter_tensor_img_name, mace::kernels::CONV2D_FILTER, {}, device, - &net_def, NetMode::INIT); + net_def.get(), NetMode::INIT); for (size_t i = 0; i < output_names.size(); ++i) { Conv3x3(input_names[i], filter_tensor_img_name, output_names[i], {mem_map[output_names[i]]}, - device, &net_def); + device, net_def.get()); } for (size_t i = 0; i < output_names.size(); ++i) { std::string output_name = MakeString("mace_output_node_", @@ -305,11 +323,12 @@ void MaceRun(const int in_out_size, ImageToBuffer(output_names[i], output_name, mace::kernels::IN_OUT_CHANNEL, device, - &net_def); + net_def.get()); } MaceEngine engine(device); - MaceStatus status = engine.Init(&net_def, input_names, output_names); + MaceStatus status = engine.Init(net_def.get(), input_names, output_names, + reinterpret_cast(data.data())); ASSERT_EQ(status, MaceStatus::MACE_SUCCESS); std::map inputs; @@ -326,7 +345,7 @@ void MaceRun(const int in_out_size, } } - CheckOutputs(net_def, inputs, outputs); + CheckOutputs(*net_def, inputs, outputs, data); } } // namespace diff --git a/mace/tools/validation/mace_run.cc b/mace/tools/validation/mace_run.cc index e912071525b4e3adef108ae5ff55301c0792dd9b..38f18af5a5dd425ee0888ea6450b7494c16ca594 100644 --- a/mace/tools/validation/mace_run.cc +++ b/mace/tools/validation/mace_run.cc @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -37,6 +36,7 @@ #include "mace/public/mace_runtime.h" #include "mace/utils/env_time.h" #include "mace/utils/logging.h" +#include "mace/utils/utils.h" #ifdef MACE_ENABLE_OPENCL #include "mace/core/runtime/opencl/opencl_runtime.h" @@ -186,7 +186,10 @@ DEFINE_string(output_file, "output file name | output file prefix for multiple outputs"); DEFINE_string(model_data_file, "", - "model data file name, used when EMBED_MODEL_DATA set to 0"); + "model data file name, used when EMBED_MODEL_DATA set to 0 or 2"); +DEFINE_string(model_file, + "", + "model file name, used when load mace model in pb"); DEFINE_string(device, "GPU", "CPU/GPU/HEXAGON"); DEFINE_int32(round, 1, "round"); DEFINE_int32(restart_round, 1, "restart round"); @@ -228,22 +231,26 @@ bool RunModel(const std::string &model_name, MaceStatus create_engine_status; // Create Engine int64_t t0 = NowMicros(); - if (FLAGS_model_data_file.empty()) { + if (FLAGS_model_file != "") { + std::vector model_pb_data; + if (!mace::ReadBinaryFile(&model_pb_data, FLAGS_model_file)) { + LOG(FATAL) << "Failed to read file: " << FLAGS_model_file; + } create_engine_status = - CreateMaceEngine(model_name.c_str(), - nullptr, - input_names, - output_names, - device_type, - &engine); + CreateMaceEngineFromProto(model_pb_data, + FLAGS_model_data_file, + input_names, + output_names, + device_type, + &engine); } else { create_engine_status = - CreateMaceEngine(model_name.c_str(), - FLAGS_model_data_file.c_str(), - input_names, - output_names, - device_type, - &engine); + CreateMaceEngineFromCode(model_name, + FLAGS_model_data_file, + input_names, + output_names, + device_type, + &engine); } int64_t t1 = NowMicros(); @@ -358,6 +365,7 @@ int Main(int argc, char **argv) { LOG(INFO) << "input_file: " << FLAGS_input_file; LOG(INFO) << "output_file: " << FLAGS_output_file; LOG(INFO) << "model_data_file: " << FLAGS_model_data_file; + LOG(INFO) << "model_file: " << FLAGS_model_file; LOG(INFO) << "device: " << FLAGS_device; LOG(INFO) << "round: " << FLAGS_round; LOG(INFO) << "restart_round: " << FLAGS_restart_round; diff --git a/mace/utils/BUILD b/mace/utils/BUILD index 5af9ad607b2338f26018e719d4587a80a0ad1fdd..76e8864ad798e52d3fb6104c485e778b11fab651 100644 --- a/mace/utils/BUILD +++ b/mace/utils/BUILD @@ -28,6 +28,7 @@ cc_library( linkopts = if_android([ "-llog", ]), + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], deps = [ "//mace/public", ], @@ -38,6 +39,7 @@ cc_library( srcs = [ "tuner_development.cc", ], + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], deps = [ ":utils", ], @@ -48,6 +50,7 @@ cc_library( srcs = [ "tuner_production.cc", ], + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], deps = [ ":utils", "//mace/codegen:generated_tuning_params", @@ -60,6 +63,7 @@ cc_test( srcs = [ "tuner_test.cc", ], + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], linkopts = if_android([ "-pie", "-lm", # Required by unordered_map diff --git a/mace/utils/logging.h b/mace/utils/logging.h index 0cb8f51f964f6739447b4139d23c02efb0cf5ddd..1f540fa692d793a69230a1f564104a0a1eba5bb2 100644 --- a/mace/utils/logging.h +++ b/mace/utils/logging.h @@ -22,9 +22,9 @@ #include #include "mace/public/mace.h" -#include "mace/public/mace_types.h" #include "mace/utils/env_time.h" #include "mace/utils/string_util.h" +#include "mace/utils/utils.h" #undef ERROR diff --git a/mace/utils/utils.h b/mace/utils/utils.h index 9085006061762d8304303f4258eba503947b60ec..391bd2d9deb034f1c8ad97c27ff161055ac7b088 100644 --- a/mace/utils/utils.h +++ b/mace/utils/utils.h @@ -15,12 +15,22 @@ #ifndef MACE_UTILS_UTILS_H_ #define MACE_UTILS_UTILS_H_ +#include #include #include #include #include namespace mace { + +// Disable the copy and assignment operator for a class. +#ifndef DISABLE_COPY_AND_ASSIGN +#define DISABLE_COPY_AND_ASSIGN(classname) \ + private: \ + classname(const classname &) = delete; \ + classname &operator=(const classname &) = delete +#endif + template Integer RoundUp(Integer i, Integer factor) { return (i + factor - 1) / factor * factor; @@ -121,5 +131,26 @@ inline std::vector Split(const std::string &str, char delims) { return result; } +inline bool ReadBinaryFile(std::vector *data, + const std::string &filename) { + std::ifstream ifs(filename, std::ios::in | std::ios::binary); + if (!ifs.is_open()) { + return false; + } + ifs.seekg(0, ifs.end); + size_t length = ifs.tellg(); + ifs.seekg(0, ifs.beg); + + data->reserve(length); + data->insert(data->begin(), std::istreambuf_iterator(ifs), + std::istreambuf_iterator()); + if (ifs.fail()) { + return false; + } + ifs.close(); + + return true; +} + } // namespace mace #endif // MACE_UTILS_UTILS_H_ diff --git a/tools/mace_tools.py b/tools/mace_tools.py index af0597e132256fa9389a8e9f50c31ffb49f19ccd..7bc15c7dac93f03a4596b4b152707c9222a6e87e 100644 --- a/tools/mace_tools.py +++ b/tools/mace_tools.py @@ -159,6 +159,7 @@ def tuning_run(target_abi, output_nodes, input_shapes, output_shapes, + mace_model_dir, model_name, device_type, running_round, @@ -181,6 +182,7 @@ def tuning_run(target_abi, output_nodes, input_shapes, output_shapes, + mace_model_dir, model_name, device_type, running_round, @@ -203,12 +205,12 @@ def tuning_run(target_abi, stdout, target_abi, serialno, model_name, device_type) -def build_mace_run_prod(hexagon_mode, runtime, target_abi, - serialno, vlog_level, embed_model_data, +def build_mace_run_prod(hexagon_mode, runtime, target_abi, serialno, + vlog_level, embed_model_data, model_load_type, model_output_dir, input_nodes, output_nodes, - input_shapes, output_shapes, model_name, device_type, - running_round, restart_round, tuning, - limit_opencl_kernel_time, phone_data_dir, + input_shapes, output_shapes, mace_model_dir, + model_name, device_type, running_round, restart_round, + tuning, limit_opencl_kernel_time, phone_data_dir, enable_openmp): mace_run_target = "//mace/tools/validation:mace_run" strip = "always" @@ -226,14 +228,14 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi, hexagon_mode=hexagon_mode, enable_openmp=enable_openmp ) - sh_commands.update_mace_run_lib(model_output_dir, + sh_commands.update_mace_run_lib(model_output_dir, model_load_type, model_name, embed_model_data) device_type = parse_device_type("gpu") tuning_run(target_abi, serialno, vlog_level, embed_model_data, model_output_dir, input_nodes, output_nodes, input_shapes, - output_shapes, model_name, device_type, running_round=0, - restart_round=1, out_of_range_check=False, + output_shapes, mace_model_dir, model_name, device_type, + running_round=0, restart_round=1, out_of_range_check=False, phone_data_dir=phone_data_dir, tuning=tuning, limit_opencl_kernel_time=limit_opencl_kernel_time) @@ -248,7 +250,7 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi, debug=debug, enable_openmp=enable_openmp ) - sh_commands.update_mace_run_lib(model_output_dir, + sh_commands.update_mace_run_lib(model_output_dir, model_load_type, model_name, embed_model_data) else: gen_opencl_and_tuning_code(target_abi, serialno, [], False) @@ -261,7 +263,7 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi, debug=debug, enable_openmp=enable_openmp ) - sh_commands.update_mace_run_lib(model_output_dir, + sh_commands.update_mace_run_lib(model_output_dir, model_load_type, model_name, embed_model_data) @@ -271,17 +273,21 @@ def merge_libs_and_tuning_results(target_soc, project_name, output_dir, model_output_dirs, + mace_model_dirs_kv, + model_load_type, hexagon_mode, embed_model_data): gen_opencl_and_tuning_code( target_abi, serialno, model_output_dirs, False) - sh_commands.build_production_code(target_abi) + sh_commands.build_production_code(model_load_type, target_abi) sh_commands.merge_libs(target_soc, target_abi, project_name, output_dir, model_output_dirs, + mace_model_dirs_kv, + model_load_type, hexagon_mode, embed_model_data) @@ -366,6 +372,9 @@ def parse_model_configs(): print("CONFIG ERROR:") print("embed_model_data must be integer in range [0, 1]") exit(1) + elif FLAGS.model_load_type == "pb": + configs["embed_model_data"] = 0 + print("emebed_model_data is set 0") model_names = configs.get("models", "") if not model_names: @@ -523,6 +532,12 @@ def parse_args(): type=str, default="cpu", help="validation runtime.") + parser.add_argument( + "--model_load_type", + type=str, + default="source", + help="[source|pb] Load models in generated `source` code" + + "or `pb` file.") parser.add_argument( "--gpu_data_type", type=str, @@ -532,9 +547,11 @@ def parse_args(): def process_models(project_name, configs, embed_model_data, vlog_level, - target_abi, phone_data_dir, target_soc="", serialno=""): + target_abi, phone_data_dir, model_load_type, + target_soc="", serialno=""): hexagon_mode = get_hexagon_mode(configs) model_output_dirs = [] + mace_model_dirs_kv = {} for model_name in configs["models"]: print '===================', model_name, '===================' @@ -550,6 +567,11 @@ def process_models(project_name, configs, embed_model_data, vlog_level, model_output_base_dir = "%s/%s/%s/%s/%s" % ( FLAGS.output_dir, project_name, "build", model_name, model_path_digest) + if model_load_type == "pb": + mace_model_dir = model_output_base_dir + mace_model_dirs_kv[model_name] = mace_model_dir + else: + mace_model_dir = "" if target_abi == "host": model_output_dir = "%s/%s" % (model_output_base_dir, target_abi) @@ -587,11 +609,13 @@ def process_models(project_name, configs, embed_model_data, vlog_level, serialno, vlog_level, embed_model_data, + model_load_type, model_output_dir, model_config["input_nodes"], model_config["output_nodes"], model_config["input_shapes"], model_config["output_shapes"], + mace_model_dir, model_name, model_device_type, FLAGS.round, @@ -617,6 +641,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level, model_config["output_nodes"], model_config["input_shapes"], model_config["output_shapes"], + mace_model_dir, model_name, run_device_type, FLAGS.round, @@ -636,6 +661,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level, vlog_level, embed_model_data, model_output_dir, + mace_model_dir, model_config["input_nodes"], model_config["output_nodes"], model_config["input_shapes"], @@ -672,6 +698,8 @@ def process_models(project_name, configs, embed_model_data, vlog_level, project_name, FLAGS.output_dir, model_output_dirs, + mace_model_dirs_kv, + model_load_type, hexagon_mode, embed_model_data) @@ -732,7 +760,8 @@ def main(unused_args): # generate source sh_commands.gen_mace_version() sh_commands.gen_encrypted_opencl_source() - sh_commands.gen_mace_engine_factory_source(configs['models'].keys()) + sh_commands.gen_mace_engine_factory_source(configs['models'].keys(), + FLAGS.model_load_type) embed_model_data = configs["embed_model_data"] target_socs = get_target_socs(configs) @@ -784,6 +813,8 @@ def main(unused_args): embed_model_data, model_config["fast_conv"], model_config["obfuscate"], + model_output_base_dir, + FLAGS.model_load_type, FLAGS.gpu_data_type) for target_abi in configs["target_abis"]: @@ -802,12 +833,14 @@ def main(unused_args): props["ro.product.model"])) process_models(project_name, configs, embed_model_data, vlog_level, target_abi, phone_data_dir, - target_soc, serialno) + FLAGS.model_load_type, target_soc, + serialno) else: print("====================================================") print("Run on host") process_models(project_name, configs, embed_model_data, - vlog_level, target_abi, phone_data_dir) + vlog_level, target_abi, phone_data_dir, + FLAGS.model_load_type) if FLAGS.mode == "build" or FLAGS.mode == "all": sh_commands.packaging_lib(FLAGS.output_dir, project_name) diff --git a/tools/sh_commands.py b/tools/sh_commands.py index f8463cc92c77d4f07b0c264f2a71413ec4e584cd..065d40f057a5f9fab179415c83b8dfd4fe892f5d 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -287,9 +287,6 @@ def bazel_build(target, target, "--copt=-std=c++11", "--copt=-D_GLIBCXX_USE_C99_MATH_TR1", - "--copt=-Werror", - "--copt=-Wextra", - "--copt=-Wno-missing-field-initializers", "--copt=-O3", "--define", "openmp=%s" % str(enable_openmp).lower(), @@ -316,9 +313,6 @@ def bazel_build(target, "--cpu=%s" % abi, "--copt=-std=c++11", "--copt=-D_GLIBCXX_USE_C99_MATH_TR1", - "--copt=-Werror", - "--copt=-Wextra", - "--copt=-Wno-missing-field-initializers", "--copt=-DMACE_OBFUSCATE_LITERALS", "--copt=-O3", "--define", @@ -375,7 +369,9 @@ def gen_encrypted_opencl_source(codegen_path="mace/codegen"): "mace/codegen/opencl/opencl_encrypt_program.cc") -def gen_mace_engine_factory_source(model_tags, codegen_path="mace/codegen"): +def gen_mace_engine_factory_source(model_tags, + model_load_type, + codegen_path="mace/codegen"): print("* Genearte mace engine creator source") codegen_tools_dir = "%s/engine" % codegen_path sh.rm("-rf", codegen_tools_dir) @@ -383,6 +379,7 @@ def gen_mace_engine_factory_source(model_tags, codegen_path="mace/codegen"): gen_mace_engine_factory( model_tags, "mace/python/tools", + model_load_type, codegen_tools_dir) print("Genearte mace engine creator source done!\n") @@ -472,12 +469,16 @@ def gen_model_code(model_codegen_dir, embed_model_data, fast_conv, obfuscate, + model_output_dir, + model_load_type, gpu_data_type): print("* Genearte model code") bazel_build_common("//mace/python/tools:converter") + if os.path.exists(model_codegen_dir): sh.rm("-rf", model_codegen_dir) sh.mkdir("-p", model_codegen_dir) + stdout_buff = [] process_output = make_output_processor(stdout_buff) p = sh.python("bazel-bin/mace/python/tools/converter", @@ -486,11 +487,9 @@ def gen_model_code(model_codegen_dir, "--model_file=%s" % model_file_path, "--weight_file=%s" % weight_file_path, "--model_checksum=%s" % model_sha256_checksum, - "--output=%s" % model_codegen_dir + "/model.cc", "--input_node=%s" % input_nodes, "--output_node=%s" % output_nodes, "--runtime=%s" % runtime, - "--output_type=source", "--template=%s" % "mace/python/tools", "--model_tag=%s" % model_tag, "--input_shape=%s" % input_shapes, @@ -498,6 +497,9 @@ def gen_model_code(model_codegen_dir, "--embed_model_data=%s" % embed_model_data, "--winograd=%s" % fast_conv, "--obfuscate=%s" % obfuscate, + "--codegen_output=%s/model.cc" % model_codegen_dir, + "--pb_output=%s/%s.pb" % (model_output_dir, model_tag), + "--model_load_type=%s" % model_load_type, "--gpu_data_type=%s" % gpu_data_type, _out=process_output, _bg=True, @@ -549,6 +551,7 @@ def gen_random_input(model_output_dir, def update_mace_run_lib(model_output_dir, + model_load_type, model_tag, embed_model_data): mace_run_filepath = model_output_dir + "/mace_run" @@ -560,8 +563,9 @@ def update_mace_run_lib(model_output_dir, sh.cp("-f", "mace/codegen/models/%s/%s.data" % (model_tag, model_tag), model_output_dir) - sh.cp("-f", "mace/codegen/models/%s/%s.h" % (model_tag, model_tag), - model_output_dir) + if model_load_type == "source": + sh.cp("-f", "mace/codegen/models/%s/%s.h" % (model_tag, model_tag), + model_output_dir) def create_internal_storage_dir(serialno, phone_data_dir): @@ -579,6 +583,7 @@ def tuning_run(abi, output_nodes, input_shapes, output_shapes, + mace_model_dir, model_tag, device_type, running_round, @@ -603,6 +608,10 @@ def tuning_run(abi, str(out_of_range_check), omp_num_threads, cpu_affinity_policy, gpu_perf_hint, gpu_priority_hint)) if abi == "host": + if mace_model_dir: + mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag) + else: + mace_model_path = "" p = subprocess.Popen( [ "env", @@ -623,6 +632,7 @@ def tuning_run(abi, "--cpu_affinity_policy=%s" % cpu_affinity_policy, "--gpu_perf_hint=%s" % gpu_perf_hint, "--gpu_priority_hint=%s" % gpu_priority_hint, + "--model_file=%s" % mace_model_path, ], stderr=subprocess.PIPE, stdout=subprocess.PIPE) @@ -649,6 +659,14 @@ def tuning_run(abi, adb_push("mace/third_party/nnlib/libhexagon_controller.so", phone_data_dir, serialno) + if mace_model_dir: + mace_model_path = "%s/%s.pb" % (phone_data_dir, model_tag) + adb_push("%s/%s.pb" % (mace_model_dir, model_tag), + mace_model_path, + serialno) + else: + mace_model_path = "" + stdout_buff = [] process_output = make_output_processor(stdout_buff) adb_cmd = [ @@ -683,6 +701,7 @@ def tuning_run(abi, "--cpu_affinity_policy=%s" % cpu_affinity_policy, "--gpu_perf_hint=%s" % gpu_perf_hint, "--gpu_priority_hint=%s" % gpu_priority_hint, + "--model_file=%s" % mace_model_path, ]) adb_cmd = ' '.join(adb_cmd) p = sh.adb( @@ -820,13 +839,17 @@ def validate_model(abi, print("Validation done!\n") -def build_production_code(abi): +def build_production_code(model_load_type, abi): bazel_build("//mace/codegen:generated_opencl", abi=abi) bazel_build("//mace/codegen:generated_tuning_params", abi=abi) if abi == 'host': - bazel_build( - "//mace/codegen:generated_models", - abi=abi) + if model_load_type == "source": + bazel_build( + "//mace/codegen:generated_models", + abi=abi) + else: + bazel_build("//mace/core:core", abi=abi) + bazel_build("//mace/ops:ops", abi=abi) def merge_libs(target_soc, @@ -834,6 +857,8 @@ def merge_libs(target_soc, project_name, libmace_output_dir, model_output_dirs, + mace_model_dirs_kv, + model_load_type, hexagon_mode, embed_model_data): print("* Merge mace lib") @@ -853,7 +878,20 @@ def merge_libs(target_soc, if hexagon_mode: sh.cp("-f", hexagon_lib_file, model_bin_dir) - sh.cp("-f", glob.glob("mace/codegen/engine/*.h"), model_header_dir) + if model_load_type == "source": + sh.cp("-f", glob.glob("mace/codegen/engine/*.h"), model_header_dir) + + for model_output_dir in model_output_dirs: + if not embed_model_data: + sh.cp("-f", glob.glob("%s/*.data" % model_output_dir), + model_data_dir) + if model_load_type == "source": + sh.cp("-f", glob.glob("%s/*.h" % model_output_dir), + model_header_dir) + + for model_name in mace_model_dirs_kv: + sh.cp("-f", "%s/%s.pb" % (mace_model_dirs_kv[model_name], model_name), + model_data_dir) mri_stream = "" if abi == "host": @@ -865,12 +903,24 @@ def merge_libs(target_soc, mri_stream += ( "addlib " "bazel-bin/mace/codegen/libgenerated_tuning_params.pic.a\n") - mri_stream += ( - "addlib " - "bazel-bin/mace/codegen/libgenerated_models.pic.a\n") + if model_load_type == "source": + mri_stream += ( + "addlib " + "bazel-bin/mace/codegen/libgenerated_models.pic.a\n") + else: + mri_stream += ( + "addlib " + "bazel-bin/mace/core/libcore.pic.a\n") + mri_stream += ( + "addlib " + "bazel-bin/mace/ops/libops.pic.lo\n") else: mri_stream += "create %s/libmace_%s.%s.a\n" % \ (model_bin_dir, project_name, target_soc) + if model_load_type == "source": + mri_stream += ( + "addlib " + "bazel-bin/mace/codegen/libgenerated_models.a\n") mri_stream += ( "addlib " "bazel-bin/mace/codegen/libgenerated_opencl.a\n") @@ -880,9 +930,6 @@ def merge_libs(target_soc, mri_stream += ( "addlib " "bazel-bin/mace/codegen/libgenerated_version.a\n") - mri_stream += ( - "addlib " - "bazel-bin/mace/codegen/libgenerated_models.a\n") mri_stream += ( "addlib " "bazel-bin/mace/core/libcore.a\n") @@ -895,16 +942,16 @@ def merge_libs(target_soc, mri_stream += ( "addlib " "bazel-bin/mace/utils/libutils_prod.a\n") + mri_stream += ( + "addlib " + "bazel-bin/mace/proto/libmace_cc.a\n") + mri_stream += ( + "addlib " + "bazel-bin/external/com_google_protobuf/libprotobuf_lite.a\n") mri_stream += ( "addlib " "bazel-bin/mace/ops/libops.lo\n") - for model_output_dir in model_output_dirs: - if not embed_model_data: - sh.cp("-f", glob.glob("%s/*.data" % model_output_dir), - model_data_dir) - sh.cp("-f", glob.glob("%s/*.h" % model_output_dir), model_header_dir) - mri_stream += "save\n" mri_stream += "end\n" @@ -971,6 +1018,7 @@ def benchmark_model(abi, vlog_level, embed_model_data, model_output_dir, + mace_model_dir, input_nodes, output_nodes, input_shapes, @@ -988,6 +1036,10 @@ def benchmark_model(abi, stdout_buff = [] process_output = make_output_processor(stdout_buff) if abi == "host": + if mace_model_dir: + mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag) + else: + mace_model_path = "" p = subprocess.Popen( [ "env", @@ -1005,6 +1057,7 @@ def benchmark_model(abi, "--cpu_affinity_policy=%s" % cpu_affinity_policy, "--gpu_perf_hint=%s" % gpu_perf_hint, "--gpu_priority_hint=%s" % gpu_priority_hint, + "--model_file=%s" % mace_model_path, ]) p.wait() else: @@ -1022,6 +1075,14 @@ def benchmark_model(abi, if not embed_model_data: adb_push("%s/%s.data" % (model_output_dir, model_tag), phone_data_dir, serialno) + if mace_model_dir: + mace_model_path = "%s/%s.pb" % (phone_data_dir, model_tag) + adb_push("%s/%s.pb" % (mace_model_dir, model_tag), + mace_model_path, + serialno) + else: + mace_model_path = "" + p = sh.adb( "-s", serialno, @@ -1045,6 +1106,7 @@ def benchmark_model(abi, "--cpu_affinity_policy=%s" % cpu_affinity_policy, "--gpu_perf_hint=%s" % gpu_perf_hint, "--gpu_priority_hint=%s" % gpu_priority_hint, + "--model_file=%s" % mace_model_path, _out=process_output, _bg=True, _err_to_out=True)