diff --git a/mace/codegen/BUILD b/mace/codegen/BUILD index 811b420ed27351422bae9f0b35981846dbdc5da3..b715869984d414234af5961a21cf4867bdb2601d 100644 --- a/mace/codegen/BUILD +++ b/mace/codegen/BUILD @@ -8,6 +8,7 @@ package( cc_library( name = "generated_models", srcs = glob(["models/*/*.cc"]), + hdrs = glob(["models/*/*.h"]), linkstatic = 1, deps = [ "//mace/core", @@ -33,7 +34,6 @@ cc_library( linkstatic = 1, ) - cc_library( name = "generated_version", srcs = ["version/version.cc"], diff --git a/mace/core/arg_helper.h b/mace/core/arg_helper.h index ab8e14b99bf9baa078dd37129a3a089d4fb6096f..296f66e50efdf38c2107f7b55b082fc400c7ee1a 100644 --- a/mace/core/arg_helper.h +++ b/mace/core/arg_helper.h @@ -10,6 +10,7 @@ #include #include "mace/public/mace.h" +#include "mace/public/mace_types.h" namespace mace { diff --git a/mace/core/mace.cc b/mace/core/mace.cc index 3e64e2b1a60c7c423d8e063f8f70caeda2232b77..49066049676935e289053aa2f05d03e5dcc14e84 100644 --- a/mace/core/mace.cc +++ b/mace/core/mace.cc @@ -2,410 +2,89 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#include "mace/public/mace.h" +#include + #include "mace/core/net.h" #include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" -#include "mace/core/runtime/opencl/opencl_runtime.h" -#include "mace/core/runtime/cpu/cpu_runtime.h" #include "mace/core/types.h" +#include "mace/public/mace.h" namespace mace { -ConstTensor::ConstTensor(const std::string &name, - const unsigned char *data, - const std::vector &dims, - const DataType data_type, - uint32_t node_id) - : name_(name), - data_(data), - data_size_(std::accumulate( - dims.begin(), dims.end(), 1, std::multiplies())), - dims_(dims.begin(), dims.end()), - data_type_(data_type), - node_id_(node_id) {} - -ConstTensor::ConstTensor(const std::string &name, - const unsigned char *data, - const std::vector &dims, - const int data_type, - uint32_t node_id) - : name_(name), - data_(data), - data_size_(std::accumulate( - dims.begin(), dims.end(), 1, std::multiplies())), - dims_(dims.begin(), dims.end()), - data_type_(static_cast(data_type)), - node_id_(node_id) {} +// Mace Tensor +struct MaceTensor::Impl { + std::vector shape; + std::shared_ptr data; +}; -const std::string &ConstTensor::name() const { return name_; } -const unsigned char *ConstTensor::data() const { return data_; } -int64_t ConstTensor::data_size() const { return data_size_; } -const std::vector &ConstTensor::dims() const { return dims_; } -DataType ConstTensor::data_type() const { return data_type_; } -uint32_t ConstTensor::node_id() const { return node_id_; } - -Argument::Argument() : has_bits_(0) {} - -void Argument::CopyFrom(const Argument &from) { - this->name_ = from.name(); - this->f_ = from.f(); - this->i_ = from.i(); - this->s_ = from.s(); - auto floats = from.floats(); - this->floats_.resize(floats.size()); - std::copy(floats.begin(), floats.end(), this->floats_.begin()); - auto ints = from.ints(); - this->ints_.resize(ints.size()); - std::copy(ints.begin(), ints.end(), this->ints_.begin()); - auto strings = from.floats(); - this->strings_.resize(strings.size()); - std::copy(floats.begin(), floats.end(), this->floats_.begin()); - - this->has_bits_ = from.has_bits_; -} -const std::string &Argument::name() const { return name_; } -void Argument::set_name(const std::string &value) { name_ = value; } -bool Argument::has_f() const { return (has_bits_ & 0x00000001u) != 0; } -void Argument::set_has_f() { has_bits_ |= 0x00000001u; } -float Argument::f() const { return f_; } -void Argument::set_f(float value) { - set_has_f(); - f_ = value; -} -bool Argument::has_i() const { return (has_bits_ & 0x00000002u) != 0; } -void Argument::set_has_i() { has_bits_ |= 0x00000002u; } -int64_t Argument::i() const { return i_; } -void Argument::set_i(int64_t value) { - set_has_i(); - i_ = value; -} -bool Argument::has_s() const { return (has_bits_ & 0x00000004u) != 0; } -void Argument::set_has_s() { has_bits_ |= 0x00000004u; } -std::string Argument::s() const { return s_; } -void Argument::set_s(const std::string &value) { - set_has_s(); - s_ = value; -} -const std::vector &Argument::floats() const { return floats_; } -void Argument::add_floats(float value) { floats_.push_back(value); } -void Argument::set_floats(const std::vector &value) { - floats_.resize(value.size()); - std::copy(value.begin(), value.end(), floats_.begin()); -} -const std::vector &Argument::ints() const { return ints_; } -void Argument::add_ints(int64_t value) { ints_.push_back(value); } -void Argument::set_ints(const std::vector &value) { - ints_.resize(value.size()); - std::copy(value.begin(), value.end(), ints_.begin()); -} -const std::vector &Argument::strings() const { return strings_; } -void Argument::add_strings(const ::std::string &value) { - strings_.push_back(value); -} -void Argument::set_strings(const std::vector &value) { - strings_.resize(value.size()); - std::copy(value.begin(), value.end(), strings_.begin()); +MaceTensor::MaceTensor(const std::vector &shape, + std::shared_ptr data) { + MACE_CHECK_NOTNULL(data.get()); + impl_ = std::unique_ptr(new MaceTensor::Impl()); + impl_->shape = shape; + impl_->data = data; } -// Node Input -NodeInput::NodeInput(int node_id, int output_port) - : node_id_(node_id), output_port_(output_port) {} -void NodeInput::CopyFrom(const NodeInput &from) { - node_id_ = from.node_id(); - output_port_ = from.output_port(); +MaceTensor::MaceTensor() { + impl_ = std::unique_ptr(new MaceTensor::Impl()); } -int NodeInput::node_id() const { return node_id_; } -void NodeInput::set_node_id(int node_id) { node_id_ = node_id; } -int NodeInput::output_port() const { return output_port_; } -void NodeInput::set_output_port(int output_port) { output_port_ = output_port; } -// OutputShape -OutputShape::OutputShape() {} -OutputShape::OutputShape(const std::vector &dims) - : dims_(dims.begin(), dims.end()) {} -void OutputShape::CopyFrom(const OutputShape &from) { - auto from_dims = from.dims(); - dims_.resize(from_dims.size()); - std::copy(from_dims.begin(), from_dims.end(), dims_.begin()); +MaceTensor::MaceTensor(const MaceTensor &other) { + impl_ = std::unique_ptr(new MaceTensor::Impl()); + impl_->shape = other.shape(); + impl_->data = other.data(); } -const std::vector &OutputShape::dims() const { return dims_; } - -// Operator Def -void OperatorDef::CopyFrom(const OperatorDef &from) { - name_ = from.name(); - type_ = from.type(); - auto from_input = from.input(); - input_.resize(from_input.size()); - std::copy(from_input.begin(), from_input.end(), input_.begin()); - auto from_output = from.output(); - output_.resize(from_output.size()); - std::copy(from_output.begin(), from_output.end(), output_.begin()); - auto from_arg = from.arg(); - arg_.resize(from_arg.size()); - for (int i = 0; i < from_arg.size(); ++i) { - arg_[i].CopyFrom(from_arg[i]); - } - auto from_output_shape = from.output_shape(); - output_shape_.resize(from_output_shape.size()); - for (int i = 0; i < from_output_shape.size(); ++i) { - output_shape_[i].CopyFrom(from_output_shape[i]); - } - auto from_data_type = from.output_type(); - output_type_.resize(from_data_type.size()); - std::copy(from_data_type.begin(), from_data_type.end(), output_type_.begin()); - - auto mem_ids = from.mem_id(); - mem_id_.resize(mem_ids.size()); - std::copy(mem_ids.begin(), mem_ids.end(), mem_id_.begin()); - - // nnlib - node_id_ = from.node_id(); - op_id_ = from.op_id(); - padding_ = from.padding(); - auto from_node_input = from.node_input(); - node_input_.resize(from_node_input.size()); - for (int i = 0; i < from_node_input.size(); ++i) { - node_input_[i].CopyFrom(from_node_input[i]); - } - auto from_out_max_byte_size = from.out_max_byte_size(); - out_max_byte_size_.resize(from_out_max_byte_size.size()); - std::copy(from_out_max_byte_size.begin(), from_out_max_byte_size.end(), - out_max_byte_size_.begin()); - - has_bits_ = from.has_bits_; +MaceTensor::MaceTensor(const MaceTensor &&other) { + impl_ = std::unique_ptr(new MaceTensor::Impl()); + impl_->shape = std::move(other.shape()); + impl_->data = std::move(other.data()); } -const std::string &OperatorDef::name() const { return name_; } -void OperatorDef::set_name(const std::string &name_) { - set_has_name(); - OperatorDef::name_ = name_; -} -bool OperatorDef::has_name() const { return (has_bits_ & 0x00000001u) != 0; } -void OperatorDef::set_has_name() { has_bits_ |= 0x00000001u; } -const std::string &OperatorDef::type() const { return type_; } -void OperatorDef::set_type(const std::string &type_) { - set_has_type(); - OperatorDef::type_ = type_; -} -bool OperatorDef::has_type() const { return (has_bits_ & 0x00000002u) != 0; } -void OperatorDef::set_has_type() { has_bits_ |= 0x00000002u; } -const std::vector &OperatorDef::mem_id() const { return mem_id_; } -void OperatorDef::set_mem_id(const std::vector &value) { - mem_id_.resize(value.size()); - std::copy(value.begin(), value.end(), mem_id_.begin()); -} -uint32_t OperatorDef::node_id() const { return node_id_; } -void OperatorDef::set_node_id(uint32_t node_id) { node_id_ = node_id; } -uint32_t OperatorDef::op_id() const { return op_id_; } -uint32_t OperatorDef::padding() const { return padding_; } -void OperatorDef::set_padding(uint32_t padding) { padding_ = padding; } -const std::vector &OperatorDef::node_input() const { - return node_input_; -} -void OperatorDef::add_node_input(const NodeInput &value) { - node_input_.push_back(value); -} -const std::vector &OperatorDef::out_max_byte_size() const { - return out_max_byte_size_; -} -void OperatorDef::add_out_max_byte_size(int value) { - out_max_byte_size_.push_back(value); -} -const std::vector &OperatorDef::input() const { return input_; } -const std::string &OperatorDef::input(int index) const { - MACE_CHECK(0 <= index && index <= input_.size()); - return input_[index]; -} -std::string *OperatorDef::add_input() { - input_.push_back(""); - return &input_.back(); +MaceTensor &MaceTensor::operator=(const MaceTensor &other) { + impl_->shape = other.shape(); + impl_->data = other.data(); + return *this; } -void OperatorDef::add_input(const ::std::string &value) { - input_.push_back(value); -} -void OperatorDef::add_input(::std::string &&value) { input_.push_back(value); } -void OperatorDef::set_input(const std::vector &value) { - input_.resize(value.size()); - std::copy(value.begin(), value.end(), input_.begin()); -} -const std::vector &OperatorDef::output() const { return output_; } -const std::string &OperatorDef::output(int index) const { - MACE_CHECK(0 <= index && index <= output_.size()); - return output_[index]; -} -std::string *OperatorDef::add_output() { - output_.push_back(""); - return &output_.back(); -} -void OperatorDef::add_output(const ::std::string &value) { - output_.push_back(value); -} -void OperatorDef::add_output(::std::string &&value) { - output_.push_back(value); -} -void OperatorDef::set_output(const std::vector &value) { - output_.resize(value.size()); - std::copy(value.begin(), value.end(), output_.begin()); -} -const std::vector &OperatorDef::arg() const { return arg_; } -Argument *OperatorDef::add_arg() { - arg_.emplace_back(Argument()); - return &arg_.back(); -} -const std::vector &OperatorDef::output_shape() const { - return output_shape_; -} -void OperatorDef::add_output_shape(const OutputShape &value) { - output_shape_.push_back(value); -} -const std::vector &OperatorDef::output_type() const { - return output_type_; -} -void OperatorDef::set_output_type(const std::vector &value) { - output_type_.resize(value.size()); - std::copy(value.begin(), value.end(), output_type_.begin()); -} - -// MemoryBlock -MemoryBlock::MemoryBlock(int mem_id, uint32_t x, uint32_t y) - : mem_id_(mem_id), x_(x), y_(y) {} - -int MemoryBlock::mem_id() const { return mem_id_; } -uint32_t MemoryBlock::x() const { return x_; } -uint32_t MemoryBlock::y() const { return y_; } -// MemoryArena -const std::vector &MemoryArena::mem_block() const { - return mem_block_; +MaceTensor &MaceTensor::operator=(const MaceTensor &&other) { + impl_->shape = std::move(other.shape()); + impl_->data = std::move(other.data()); + return *this; } -std::vector &MemoryArena::mutable_mem_block() { - return mem_block_; -} -int MemoryArena::mem_block_size() const { return mem_block_.size(); } - -// InputInfo -const std::string &InputInfo::name() const { return name_; } -int32_t InputInfo::node_id() const { return node_id_; } -int32_t InputInfo::max_byte_size() const { return max_byte_size_; } -DataType InputInfo::data_type() const { return data_type_; } -const std::vector &InputInfo::dims() const { return dims_; } - -// OutputInfo -const std::string &OutputInfo::name() const { return name_; } -int32_t OutputInfo::node_id() const { return node_id_; } -int32_t OutputInfo::max_byte_size() const { return max_byte_size_; } -DataType OutputInfo::data_type() const { return data_type_; } -void OutputInfo::set_data_type(DataType data_type) { data_type_ = data_type; } -const std::vector &OutputInfo::dims() const { return dims_; } -void OutputInfo::set_dims(const std::vector &dims) { dims_ = dims; } - -// NetDef -NetDef::NetDef() : has_bits_(0) {} - -const std::string &NetDef::name() const { return name_; } -void NetDef::set_name(const std::string &value) { - set_has_name(); - name_ = value; -} -bool NetDef::has_name() const { return (has_bits_ & 0x00000001u) != 0; } -void NetDef::set_has_name() { has_bits_ |= 0x00000001u; } -const std::string &NetDef::version() const { return version_; } -void NetDef::set_version(const std::string &value) { - set_has_version(); - version_ = value; -} -bool NetDef::has_version() const { return (has_bits_ & 0x00000002u) != 0; } -void NetDef::set_has_version() { has_bits_ |= 0x00000002u; } -const std::vector &NetDef::op() const { return op_; } -OperatorDef *NetDef::add_op() { - op_.emplace_back(OperatorDef()); - return &op_.back(); -} -std::vector &NetDef::mutable_op() { return op_; } -const std::vector &NetDef::arg() const { return arg_; } -Argument *NetDef::add_arg() { - arg_.emplace_back(Argument()); - return &arg_.back(); -} -std::vector &NetDef::mutable_arg() { return arg_; } -const std::vector &NetDef::tensors() const { return tensors_; } -std::vector &NetDef::mutable_tensors() { return tensors_; } -const MemoryArena &NetDef::mem_arena() const { return mem_arena_; } -MemoryArena &NetDef::mutable_mem_arena() { - set_has_mem_arena(); - return mem_arena_; -} -bool NetDef::has_mem_arena() const { return (has_bits_ & 0x00000004u) != 0; } -void NetDef::set_has_mem_arena() { has_bits_ |= 0x00000004u; } -const std::vector &NetDef::input_info() const { return input_info_; } -const std::vector &NetDef::output_info() const { - return output_info_; -} -std::vector &NetDef::mutable_output_info() { return output_info_; } -int NetDef::op_size() const { return op_.size(); } +MaceTensor::~MaceTensor() = default; -const OperatorDef &NetDef::op(const int idx) const { - MACE_CHECK(0 <= idx && idx < op_size()); - return op_[idx]; -} +const std::vector &MaceTensor::shape() const { return impl_->shape; } -void ConfigOpenCLRuntime(GPUPerfHint gpu_perf_hint, - GPUPriorityHint gpu_priority_hint) { - VLOG(1) << "Set GPU configurations, gpu_perf_hint: " << gpu_perf_hint - << ", gpu_priority_hint: " << gpu_priority_hint; - OpenCLRuntime::Configure(gpu_perf_hint, gpu_priority_hint); -} +const std::shared_ptr MaceTensor::data() const { return impl_->data; } -void ConfigOmpThreadsAndAffinity(int omp_num_threads, - CPUPowerOption power_option) { - VLOG(1) << "Config CPU Runtime: omp_num_threads: " << omp_num_threads - << ", cpu_power_option: " << power_option; - SetOmpThreadsAndAffinity(omp_num_threads, power_option); -} +std::shared_ptr MaceTensor::data() { return impl_->data; } // Mace Engine -MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type) - : op_registry_(new OperatorRegistry()), - device_type_(device_type), - ws_(new Workspace()), - net_(nullptr), - hexagon_controller_(nullptr) { - ws_->CreateTensor("mace_input_node:0", GetDeviceAllocator(device_type_), - DT_FLOAT); - ws_->CreateTensor("mace_output_node:0", GetDeviceAllocator(device_type_), - DT_FLOAT); - if (device_type == HEXAGON) { - hexagon_controller_.reset(new HexagonControlWrapper()); - MACE_CHECK(hexagon_controller_->Config(), "hexagon config error"); - MACE_CHECK(hexagon_controller_->Init(), "hexagon init error"); - hexagon_controller_->SetDebugLevel( - static_cast(mace::logging::LogMessage::MinVLogLevel())); - int dsp_mode = - ArgumentHelper::GetSingleArgument(*net_def, "dsp_mode", 0); - hexagon_controller_->SetGraphMode(dsp_mode); - MACE_CHECK(hexagon_controller_->SetupGraph(*net_def), - "hexagon setup graph error"); - if (VLOG_IS_ON(2)) { - hexagon_controller_->PrintGraph(); - } - } else { - ws_->LoadModelTensor(*net_def, device_type); - - // Init model - auto net = CreateNet(op_registry_, *net_def, ws_.get(), device_type, - NetMode::INIT); - if (!net->Run()) { - LOG(FATAL) << "Net init run failed"; - } - net_ = std::move(CreateNet(op_registry_, *net_def, ws_.get(), device_type)); - } -} - -extern const char *MaceGitVersion(); - -MaceEngine::MaceEngine(const NetDef *net_def, +class MaceEngine::Impl { + public: + explicit Impl(const NetDef *net_def, + DeviceType device_type, + const std::vector &input_nodes, + const std::vector &output_nodes); + ~Impl(); + + MaceStatus Run(const std::map &inputs, + std::map *outputs, + RunMetadata *run_metadata); + + private: + std::shared_ptr op_registry_; + DeviceType device_type_; + std::unique_ptr ws_; + std::unique_ptr net_; + std::unique_ptr hexagon_controller_; + + DISABLE_COPY_AND_ASSIGN(Impl); +}; + +MaceEngine::Impl::Impl(const NetDef *net_def, DeviceType device_type, const std::vector &input_nodes, const std::vector &output_nodes) @@ -414,7 +93,7 @@ MaceEngine::MaceEngine(const NetDef *net_def, ws_(new Workspace()), net_(nullptr), hexagon_controller_(nullptr) { - LOG(INFO) << "MACE GIT VERSION: " << MaceGitVersion(); + LOG(INFO) << "MACE version: " << MaceVersion(); for (auto input_name : input_nodes) { ws_->CreateTensor(MakeString("mace_input_node_", input_name, ":0"), GetDeviceAllocator(device_type_), DT_FLOAT); @@ -449,7 +128,8 @@ MaceEngine::MaceEngine(const NetDef *net_def, net_ = std::move(CreateNet(op_registry_, *net_def, ws_.get(), device_type)); } } -MaceEngine::~MaceEngine() { + +MaceEngine::Impl::~Impl() { if (device_type_ == HEXAGON) { if (VLOG_IS_ON(2)) { hexagon_controller_->GetPerfInfo(); @@ -460,81 +140,69 @@ MaceEngine::~MaceEngine() { } } -bool MaceEngine::Run(const float *input, - const std::vector &input_shape, - float *output) { - return Run(input, input_shape, output, nullptr); -} - -bool MaceEngine::Run(const float *input, - const std::vector &input_shape, - float *output, - RunMetadata *run_metadata) { - MACE_CHECK(output != nullptr, "output ptr cannot be NULL"); - Tensor *input_tensor = ws_->GetTensor("mace_input_node:0"); - Tensor *output_tensor = ws_->GetTensor("mace_output_node:0"); - input_tensor->Resize(input_shape); - { - Tensor::MappingGuard input_guard(input_tensor); - float *input_data = input_tensor->mutable_data(); - memcpy(input_data, input, input_tensor->size() * sizeof(float)); - } - if (device_type_ == HEXAGON) { - hexagon_controller_->ExecuteGraph(*input_tensor, output_tensor); - } else { - if (!net_->Run(run_metadata)) { - LOG(FATAL) << "Net run failed"; - } - } - // save output - if (output_tensor != nullptr) { - Tensor::MappingGuard output_guard(output_tensor); - auto shape = output_tensor->shape(); - int64_t output_size = std::accumulate(shape.begin(), shape.end(), 1, - std::multiplies()); - std::memcpy(output, output_tensor->data(), - output_size * sizeof(float)); - return true; - } else { - return false; - } -} - -bool MaceEngine::Run(const std::vector &inputs, - std::map &outputs, - RunMetadata *run_metadata) { - MACE_CHECK(device_type_ != HEXAGON, +MaceStatus MaceEngine::Impl::Run( + const std::map &inputs, + std::map *outputs, + RunMetadata *run_metadata) { + MACE_CHECK_NOTNULL(outputs); + MACE_CHECK(device_type_ != HEXAGON || outputs->size() <= 1, "HEXAGON not supports multiple outputs now"); - for (auto input : inputs) { + for (auto &input : inputs) { Tensor *input_tensor = - ws_->GetTensor(MakeString("mace_input_node_", input.name, ":0")); - input_tensor->Resize(input.shape); + ws_->GetTensor(MakeString("mace_input_node_", input.first, ":0")); + input_tensor->Resize(input.second.shape()); { Tensor::MappingGuard input_guard(input_tensor); float *input_data = input_tensor->mutable_data(); - memcpy(input_data, input.data, input_tensor->size() * sizeof(float)); + memcpy(input_data, input.second.data().get(), + input_tensor->size() * sizeof(float)); } } if (!net_->Run(run_metadata)) { LOG(FATAL) << "Net run failed"; } - for (auto output : outputs) { + for (auto &output : *outputs) { Tensor *output_tensor = ws_->GetTensor(MakeString("mace_output_node_", output.first + ":0")); // save output - if (output_tensor != nullptr && output.second != nullptr) { + if (output_tensor != nullptr && output.second.data() != nullptr) { Tensor::MappingGuard output_guard(output_tensor); auto shape = output_tensor->shape(); int64_t output_size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); MACE_CHECK(!shape.empty()) << "Output's shape must greater than 0"; - std::memcpy(output.second, output_tensor->data(), + MACE_CHECK(shape == output.second.shape()) + << "Output shape mispatch: " + << MakeString(output.second.shape()) + << " != " << MakeString(shape); + std::memcpy(output.second.data().get(), output_tensor->data(), output_size * sizeof(float)); } else { - return false; + return MACE_INVALID_ARGS; } } - return true; + return MACE_SUCCESS; +} + +MaceEngine::MaceEngine(const NetDef *net_def, + DeviceType device_type, + const std::vector &input_nodes, + const std::vector &output_nodes) { + impl_ = std::unique_ptr( + new MaceEngine::Impl(net_def, device_type, input_nodes, output_nodes)); +} + +MaceEngine::~MaceEngine() = default; + +MaceStatus MaceEngine::Run(const std::map &inputs, + std::map *outputs, + RunMetadata *run_metadata) { + return impl_->Run(inputs, outputs, run_metadata); +} + +MaceStatus MaceEngine::Run(const std::map &inputs, + std::map *outputs) { + return impl_->Run(inputs, outputs, nullptr); } } // namespace mace diff --git a/mace/core/mace_runtime.cc b/mace/core/mace_runtime.cc new file mode 100644 index 0000000000000000000000000000000000000000..b6d08184d9ce88c9095ef078862e6f8999d99c0c --- /dev/null +++ b/mace/core/mace_runtime.cc @@ -0,0 +1,25 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#include "mace/public/mace_runtime.h" +#include "mace/core/runtime/cpu/cpu_runtime.h" +#include "mace/core/runtime/opencl/opencl_runtime.h" + +namespace mace { + +void ConfigOpenCLRuntime(GPUPerfHint gpu_perf_hint, + GPUPriorityHint gpu_priority_hint) { + VLOG(1) << "Set GPU configurations, gpu_perf_hint: " << gpu_perf_hint + << ", gpu_priority_hint: " << gpu_priority_hint; + OpenCLRuntime::Configure(gpu_perf_hint, gpu_priority_hint); +} + +void ConfigOmpThreadsAndAffinity(int omp_num_threads, + CPUPowerOption power_option) { + VLOG(1) << "Config CPU Runtime: omp_num_threads: " << omp_num_threads + << ", cpu_power_option: " << power_option; + SetOmpThreadsAndAffinity(omp_num_threads, power_option); +} + +}; // namespace mace diff --git a/mace/core/mace_types.cc b/mace/core/mace_types.cc new file mode 100644 index 0000000000000000000000000000000000000000..04f968e6a8a9066c4d76fcf862eb05b40a7746cf --- /dev/null +++ b/mace/core/mace_types.cc @@ -0,0 +1,353 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#include +#include + +#include "mace/public/mace_types.h" +#include "mace/utils/logging.h" + +namespace mace { + +ConstTensor::ConstTensor(const std::string &name, + const unsigned char *data, + const std::vector &dims, + const DataType data_type, + uint32_t node_id) + : name_(name), + data_(data), + data_size_(std::accumulate( + dims.begin(), dims.end(), 1, std::multiplies())), + dims_(dims.begin(), dims.end()), + data_type_(data_type), + node_id_(node_id) {} + +ConstTensor::ConstTensor(const std::string &name, + const unsigned char *data, + const std::vector &dims, + const int data_type, + uint32_t node_id) + : name_(name), + data_(data), + data_size_(std::accumulate( + dims.begin(), dims.end(), 1, std::multiplies())), + dims_(dims.begin(), dims.end()), + data_type_(static_cast(data_type)), + node_id_(node_id) {} + +const std::string &ConstTensor::name() const { return name_; } +const unsigned char *ConstTensor::data() const { return data_; } +int64_t ConstTensor::data_size() const { return data_size_; } +const std::vector &ConstTensor::dims() const { return dims_; } +DataType ConstTensor::data_type() const { return data_type_; } +uint32_t ConstTensor::node_id() const { return node_id_; } + +Argument::Argument() : has_bits_(0) {} + +void Argument::CopyFrom(const Argument &from) { + this->name_ = from.name(); + this->f_ = from.f(); + this->i_ = from.i(); + this->s_ = from.s(); + auto floats = from.floats(); + this->floats_.resize(floats.size()); + std::copy(floats.begin(), floats.end(), this->floats_.begin()); + auto ints = from.ints(); + this->ints_.resize(ints.size()); + std::copy(ints.begin(), ints.end(), this->ints_.begin()); + auto strings = from.floats(); + this->strings_.resize(strings.size()); + std::copy(floats.begin(), floats.end(), this->floats_.begin()); + + this->has_bits_ = from.has_bits_; +} +const std::string &Argument::name() const { return name_; } +void Argument::set_name(const std::string &value) { name_ = value; } +bool Argument::has_f() const { return (has_bits_ & 0x00000001u) != 0; } +void Argument::set_has_f() { has_bits_ |= 0x00000001u; } +float Argument::f() const { return f_; } +void Argument::set_f(float value) { + set_has_f(); + f_ = value; +} +bool Argument::has_i() const { return (has_bits_ & 0x00000002u) != 0; } +void Argument::set_has_i() { has_bits_ |= 0x00000002u; } +int64_t Argument::i() const { return i_; } +void Argument::set_i(int64_t value) { + set_has_i(); + i_ = value; +} +bool Argument::has_s() const { return (has_bits_ & 0x00000004u) != 0; } +void Argument::set_has_s() { has_bits_ |= 0x00000004u; } +std::string Argument::s() const { return s_; } +void Argument::set_s(const std::string &value) { + set_has_s(); + s_ = value; +} +const std::vector &Argument::floats() const { return floats_; } +void Argument::add_floats(float value) { floats_.push_back(value); } +void Argument::set_floats(const std::vector &value) { + floats_.resize(value.size()); + std::copy(value.begin(), value.end(), floats_.begin()); +} +const std::vector &Argument::ints() const { return ints_; } +void Argument::add_ints(int64_t value) { ints_.push_back(value); } +void Argument::set_ints(const std::vector &value) { + ints_.resize(value.size()); + std::copy(value.begin(), value.end(), ints_.begin()); +} +const std::vector &Argument::strings() const { return strings_; } +void Argument::add_strings(const ::std::string &value) { + strings_.push_back(value); +} +void Argument::set_strings(const std::vector &value) { + strings_.resize(value.size()); + std::copy(value.begin(), value.end(), strings_.begin()); +} + +// Node Input +NodeInput::NodeInput(int node_id, int output_port) + : node_id_(node_id), output_port_(output_port) {} +void NodeInput::CopyFrom(const NodeInput &from) { + node_id_ = from.node_id(); + output_port_ = from.output_port(); +} +int NodeInput::node_id() const { return node_id_; } +void NodeInput::set_node_id(int node_id) { node_id_ = node_id; } +int NodeInput::output_port() const { return output_port_; } +void NodeInput::set_output_port(int output_port) { output_port_ = output_port; } + +// OutputShape +OutputShape::OutputShape() {} +OutputShape::OutputShape(const std::vector &dims) + : dims_(dims.begin(), dims.end()) {} +void OutputShape::CopyFrom(const OutputShape &from) { + auto from_dims = from.dims(); + dims_.resize(from_dims.size()); + std::copy(from_dims.begin(), from_dims.end(), dims_.begin()); +} +const std::vector &OutputShape::dims() const { return dims_; } + +// Operator Def +void OperatorDef::CopyFrom(const OperatorDef &from) { + name_ = from.name(); + type_ = from.type(); + + auto from_input = from.input(); + input_.resize(from_input.size()); + std::copy(from_input.begin(), from_input.end(), input_.begin()); + auto from_output = from.output(); + output_.resize(from_output.size()); + std::copy(from_output.begin(), from_output.end(), output_.begin()); + auto from_arg = from.arg(); + arg_.resize(from_arg.size()); + for (int i = 0; i < from_arg.size(); ++i) { + arg_[i].CopyFrom(from_arg[i]); + } + auto from_output_shape = from.output_shape(); + output_shape_.resize(from_output_shape.size()); + for (int i = 0; i < from_output_shape.size(); ++i) { + output_shape_[i].CopyFrom(from_output_shape[i]); + } + auto from_data_type = from.output_type(); + output_type_.resize(from_data_type.size()); + std::copy(from_data_type.begin(), from_data_type.end(), output_type_.begin()); + + auto mem_ids = from.mem_id(); + mem_id_.resize(mem_ids.size()); + std::copy(mem_ids.begin(), mem_ids.end(), mem_id_.begin()); + + // nnlib + node_id_ = from.node_id(); + op_id_ = from.op_id(); + padding_ = from.padding(); + auto from_node_input = from.node_input(); + node_input_.resize(from_node_input.size()); + for (int i = 0; i < from_node_input.size(); ++i) { + node_input_[i].CopyFrom(from_node_input[i]); + } + auto from_out_max_byte_size = from.out_max_byte_size(); + out_max_byte_size_.resize(from_out_max_byte_size.size()); + std::copy(from_out_max_byte_size.begin(), from_out_max_byte_size.end(), + out_max_byte_size_.begin()); + + has_bits_ = from.has_bits_; +} + +const std::string &OperatorDef::name() const { return name_; } +void OperatorDef::set_name(const std::string &name_) { + set_has_name(); + OperatorDef::name_ = name_; +} +bool OperatorDef::has_name() const { return (has_bits_ & 0x00000001u) != 0; } +void OperatorDef::set_has_name() { has_bits_ |= 0x00000001u; } +const std::string &OperatorDef::type() const { return type_; } +void OperatorDef::set_type(const std::string &type_) { + set_has_type(); + OperatorDef::type_ = type_; +} +bool OperatorDef::has_type() const { return (has_bits_ & 0x00000002u) != 0; } +void OperatorDef::set_has_type() { has_bits_ |= 0x00000002u; } +const std::vector &OperatorDef::mem_id() const { return mem_id_; } +void OperatorDef::set_mem_id(const std::vector &value) { + mem_id_.resize(value.size()); + std::copy(value.begin(), value.end(), mem_id_.begin()); +} +uint32_t OperatorDef::node_id() const { return node_id_; } +void OperatorDef::set_node_id(uint32_t node_id) { node_id_ = node_id; } +uint32_t OperatorDef::op_id() const { return op_id_; } +uint32_t OperatorDef::padding() const { return padding_; } +void OperatorDef::set_padding(uint32_t padding) { padding_ = padding; } +const std::vector &OperatorDef::node_input() const { + return node_input_; +} +void OperatorDef::add_node_input(const NodeInput &value) { + node_input_.push_back(value); +} +const std::vector &OperatorDef::out_max_byte_size() const { + return out_max_byte_size_; +} +void OperatorDef::add_out_max_byte_size(int value) { + out_max_byte_size_.push_back(value); +} +const std::vector &OperatorDef::input() const { return input_; } +const std::string &OperatorDef::input(int index) const { + MACE_CHECK(0 <= index && index <= input_.size()); + return input_[index]; +} +std::string *OperatorDef::add_input() { + input_.push_back(""); + return &input_.back(); +} +void OperatorDef::add_input(const ::std::string &value) { + input_.push_back(value); +} +void OperatorDef::add_input(::std::string &&value) { input_.push_back(value); } +void OperatorDef::set_input(const std::vector &value) { + input_.resize(value.size()); + std::copy(value.begin(), value.end(), input_.begin()); +} +const std::vector &OperatorDef::output() const { return output_; } +const std::string &OperatorDef::output(int index) const { + MACE_CHECK(0 <= index && index <= output_.size()); + return output_[index]; +} +std::string *OperatorDef::add_output() { + output_.push_back(""); + return &output_.back(); +} +void OperatorDef::add_output(const ::std::string &value) { + output_.push_back(value); +} +void OperatorDef::add_output(::std::string &&value) { + output_.push_back(value); +} +void OperatorDef::set_output(const std::vector &value) { + output_.resize(value.size()); + std::copy(value.begin(), value.end(), output_.begin()); +} +const std::vector &OperatorDef::arg() const { return arg_; } +Argument *OperatorDef::add_arg() { + arg_.emplace_back(Argument()); + return &arg_.back(); +} +const std::vector &OperatorDef::output_shape() const { + return output_shape_; +} +void OperatorDef::add_output_shape(const OutputShape &value) { + output_shape_.push_back(value); +} +const std::vector &OperatorDef::output_type() const { + return output_type_; +} +void OperatorDef::set_output_type(const std::vector &value) { + output_type_.resize(value.size()); + std::copy(value.begin(), value.end(), output_type_.begin()); +} + +// MemoryBlock +MemoryBlock::MemoryBlock(int mem_id, uint32_t x, uint32_t y) + : mem_id_(mem_id), x_(x), y_(y) {} + +int MemoryBlock::mem_id() const { return mem_id_; } +uint32_t MemoryBlock::x() const { return x_; } +uint32_t MemoryBlock::y() const { return y_; } + +// MemoryArena +const std::vector &MemoryArena::mem_block() const { + return mem_block_; +} +std::vector &MemoryArena::mutable_mem_block() { + return mem_block_; +} +int MemoryArena::mem_block_size() const { return mem_block_.size(); } + +// InputInfo +const std::string &InputInfo::name() const { return name_; } +int32_t InputInfo::node_id() const { return node_id_; } +int32_t InputInfo::max_byte_size() const { return max_byte_size_; } +DataType InputInfo::data_type() const { return data_type_; } +const std::vector &InputInfo::dims() const { return dims_; } + +// OutputInfo +const std::string &OutputInfo::name() const { return name_; } +int32_t OutputInfo::node_id() const { return node_id_; } +int32_t OutputInfo::max_byte_size() const { return max_byte_size_; } +DataType OutputInfo::data_type() const { return data_type_; } +void OutputInfo::set_data_type(DataType data_type) { data_type_ = data_type; } +const std::vector &OutputInfo::dims() const { return dims_; } +void OutputInfo::set_dims(const std::vector &dims) { dims_ = dims; } + +// NetDef +NetDef::NetDef() : has_bits_(0) {} + +const std::string &NetDef::name() const { return name_; } +void NetDef::set_name(const std::string &value) { + set_has_name(); + name_ = value; +} +bool NetDef::has_name() const { return (has_bits_ & 0x00000001u) != 0; } +void NetDef::set_has_name() { has_bits_ |= 0x00000001u; } +const std::string &NetDef::version() const { return version_; } +void NetDef::set_version(const std::string &value) { + set_has_version(); + version_ = value; +} +bool NetDef::has_version() const { return (has_bits_ & 0x00000002u) != 0; } +void NetDef::set_has_version() { has_bits_ |= 0x00000002u; } +const std::vector &NetDef::op() const { return op_; } +OperatorDef *NetDef::add_op() { + op_.emplace_back(OperatorDef()); + return &op_.back(); +} +std::vector &NetDef::mutable_op() { return op_; } +const std::vector &NetDef::arg() const { return arg_; } +Argument *NetDef::add_arg() { + arg_.emplace_back(Argument()); + return &arg_.back(); +} +std::vector &NetDef::mutable_arg() { return arg_; } +const std::vector &NetDef::tensors() const { return tensors_; } +std::vector &NetDef::mutable_tensors() { return tensors_; } +const MemoryArena &NetDef::mem_arena() const { return mem_arena_; } +MemoryArena &NetDef::mutable_mem_arena() { + set_has_mem_arena(); + return mem_arena_; +} +bool NetDef::has_mem_arena() const { return (has_bits_ & 0x00000004u) != 0; } +void NetDef::set_has_mem_arena() { has_bits_ |= 0x00000004u; } +const std::vector &NetDef::input_info() const { return input_info_; } +const std::vector &NetDef::output_info() const { + return output_info_; +} +std::vector &NetDef::mutable_output_info() { return output_info_; } + +int NetDef::op_size() const { return op_.size(); } + +const OperatorDef &NetDef::op(const int idx) const { + MACE_CHECK(0 <= idx && idx < op_size()); + return op_[idx]; +} + +}; // namespace mace diff --git a/mace/core/operator.h b/mace/core/operator.h index 27afdadd3a3b4c8bfbbe613c36ca558025f7c606..3ca7cd167b1e13d46e6072cea74f19d87df88ced 100644 --- a/mace/core/operator.h +++ b/mace/core/operator.h @@ -16,6 +16,7 @@ #include "mace/core/tensor.h" #include "mace/core/workspace.h" #include "mace/public/mace.h" +#include "mace/public/mace_types.h" namespace mace { diff --git a/mace/core/runtime/cpu/cpu_runtime.h b/mace/core/runtime/cpu/cpu_runtime.h index 082673a6a5ea1931a5569b2cd6ecbd4868d97492..dbe19c4d6abe91e896df8de6dab251659b984a06 100644 --- a/mace/core/runtime/cpu/cpu_runtime.h +++ b/mace/core/runtime/cpu/cpu_runtime.h @@ -6,7 +6,7 @@ #ifndef MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H_ #define MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H_ -#include "mace/public/mace.h" +#include "mace/public/mace_runtime.h" namespace mace { diff --git a/mace/core/runtime/opencl/opencl_runtime.h b/mace/core/runtime/opencl/opencl_runtime.h index 8a3ce06abb63f078efa89ca43b242e46a13e5a3e..414fa7ed91fa205cbbb5b3d3b06d6d7c91d59fcf 100644 --- a/mace/core/runtime/opencl/opencl_runtime.h +++ b/mace/core/runtime/opencl/opencl_runtime.h @@ -14,6 +14,7 @@ #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/runtime/opencl/opencl_wrapper.h" +#include "mace/public/mace_runtime.h" #include "mace/utils/timer.h" namespace mace { diff --git a/mace/core/types.h b/mace/core/types.h index e7a078f625fbaf869cdfbae50dcaf0be7b3b9054..f038d5be034a3e9140bea2cbcdcde174c9f9918b 100644 --- a/mace/core/types.h +++ b/mace/core/types.h @@ -8,7 +8,7 @@ #include #include -#include "mace/public/mace.h" +#include "mace/public/mace_types.h" #include "include/half.hpp" namespace mace { diff --git a/mace/examples/mace_run.cc b/mace/examples/mace_run.cc index 1bfb79273821884462fc529909521c3e42e590d6..a05782baf44a7caac0c95ec0542083670f7622e8 100644 --- a/mace/examples/mace_run.cc +++ b/mace/examples/mace_run.cc @@ -23,9 +23,11 @@ #include "gflags/gflags.h" #include "mace/public/mace.h" +#include "mace/public/mace_runtime.h" #include "mace/utils/env_time.h" #include "mace/utils/logging.h" +// #include "mace/codegen/models/${MACE_MODEL_TAG}/${MACE_MODEL_TAG}.h" instead namespace mace { namespace MACE_MODEL_TAG { @@ -145,19 +147,26 @@ struct mallinfo LogMallinfoChange(struct mallinfo prev) { return curr; } -DEFINE_string(input_node, "input_node0,input_node1", +DEFINE_string(input_node, + "input_node0,input_node1", "input nodes, separated by comma"); -DEFINE_string(input_shape, "1,224,224,3:1,1,1,10", +DEFINE_string(input_shape, + "1,224,224,3:1,1,1,10", "input shapes, separated by colon and comma"); -DEFINE_string(output_node, "output_node0,output_node1", +DEFINE_string(output_node, + "output_node0,output_node1", "output nodes, separated by comma"); -DEFINE_string(output_shape, "1,224,224,2:1,1,1,10", +DEFINE_string(output_shape, + "1,224,224,2:1,1,1,10", "output shapes, separated by colon and comma"); -DEFINE_string(input_file, "", +DEFINE_string(input_file, + "", "input file name | input file prefix for multiple inputs."); -DEFINE_string(output_file, "", +DEFINE_string(output_file, + "", "output file name | output file prefix for multiple outputs"); -DEFINE_string(model_data_file, "", +DEFINE_string(model_data_file, + "", "model data file name, used when EMBED_MODEL_DATA set to 0"); DEFINE_string(device, "OPENCL", "CPU/NEON/OPENCL/HEXAGON"); DEFINE_int32(round, 1, "round"); @@ -166,115 +175,14 @@ DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable"); DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(omp_num_threads, 8, "num of openmp threads"); -DEFINE_int32(cpu_power_option, 0, +DEFINE_int32(cpu_power_option, + 0, "0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE"); -bool SingleInputAndOutput(const std::vector &input_shape, - const std::vector &output_shape) { - // load model - int64_t t0 = NowMicros(); - const unsigned char *model_data = - mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str()); - NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data); - int64_t t1 = NowMicros(); - LOG(INFO) << "CreateNetDef latency: " << t1 - t0 << " us"; - int64_t init_micros = t1 - t0; - - DeviceType device_type = ParseDeviceType(FLAGS_device); - LOG(INFO) << "Runing with device type: " << device_type; - - // config runtime - if (device_type == DeviceType::OPENCL) { - mace::ConfigOpenCLRuntime( - static_cast(FLAGS_gpu_perf_hint), - static_cast(FLAGS_gpu_priority_hint)); - } else if (device_type == DeviceType::CPU) { - mace::ConfigOmpThreadsAndAffinity( - FLAGS_omp_num_threads, - static_cast(FLAGS_cpu_power_option)); - } - - // Init model - LOG(INFO) << "Run init"; - t0 = NowMicros(); - mace::MaceEngine engine(&net_def, device_type); - if (device_type == DeviceType::OPENCL || device_type == DeviceType::HEXAGON) { - mace::MACE_MODEL_TAG::UnloadModelData(model_data); - } - t1 = NowMicros(); - init_micros += t1 - t0; - LOG(INFO) << "Net init latency: " << t1 - t0 << " us"; - LOG(INFO) << "Total init latency: " << init_micros << " us"; - - // Allocate input and output - int64_t input_size = - std::accumulate(input_shape.begin(), input_shape.end(), 1, - std::multiplies()); - int64_t output_size = - std::accumulate(output_shape.begin(), output_shape.end(), 1, - std::multiplies()); - std::unique_ptr input_data(new float[input_size]); - std::unique_ptr output_data(new float[output_size]); - - // load input - std::ifstream in_file(FLAGS_input_file + "_" + FormatName(FLAGS_input_node), - std::ios::in | std::ios::binary); - if (in_file.is_open()) { - in_file.read(reinterpret_cast(input_data.get()), - input_size * sizeof(float)); - in_file.close(); - } else { - LOG(INFO) << "Open input file failed"; - return -1; - } - - LOG(INFO) << "Warm up run"; - t0 = NowMicros(); - engine.Run(input_data.get(), input_shape, output_data.get()); - t1 = NowMicros(); - LOG(INFO) << "1st warm up run latency: " << t1 - t0 << " us"; - - if (FLAGS_round > 0) { - LOG(INFO) << "Run model"; - t0 = NowMicros(); - struct mallinfo prev = mallinfo(); - for (int i = 0; i < FLAGS_round; ++i) { - engine.Run(input_data.get(), input_shape, output_data.get()); - if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) { - LOG(INFO) << "=== check malloc info change #" << i << " ==="; - prev = LogMallinfoChange(prev); - } - } - t1 = NowMicros(); - LOG(INFO) << "Average latency: " << (t1 - t0) / FLAGS_round << " us"; - } - - if (FLAGS_restart_round == 1) { - if (output_data != nullptr) { - std::string - output_name = FLAGS_output_file + "_" + FormatName(FLAGS_output_node); - std::ofstream out_file(output_name, std::ios::binary); - out_file.write((const char *) (output_data.get()), - output_size * sizeof(float)); - out_file.flush(); - out_file.close(); - LOG(INFO) << "Write output file " - << output_name - << " with size " << output_size - << " done."; - } else { - LOG(INFO) << "Output data is null"; - } - } - - return true; -} - -bool MultipleInputOrOutput( - const std::vector &input_names, - const std::vector> &input_shapes, - const std::vector &output_names, - const std::vector> &output_shapes) { +bool RunModel(const std::vector &input_names, + const std::vector> &input_shapes, + const std::vector &output_names, + const std::vector> &output_shapes) { // load model int64_t t0 = NowMicros(); const unsigned char *model_data = @@ -312,42 +220,42 @@ bool MultipleInputOrOutput( const size_t input_count = input_names.size(); const size_t output_count = output_names.size(); - std::vector input_infos(input_count); - std::map outputs; - std::vector> input_datas(input_count); + + std::map inputs; + std::map outputs; for (size_t i = 0; i < input_count; ++i) { // Allocate input and output int64_t input_size = std::accumulate(input_shapes[i].begin(), input_shapes[i].end(), 1, std::multiplies()); - input_datas[i].reset(new float[input_size]); + auto buffer_in = std::shared_ptr(new float[input_size], + std::default_delete()); // load input std::ifstream in_file(FLAGS_input_file + "_" + FormatName(input_names[i]), std::ios::in | std::ios::binary); if (in_file.is_open()) { - in_file.read(reinterpret_cast(input_datas[i].get()), + in_file.read(reinterpret_cast(buffer_in.get()), input_size * sizeof(float)); in_file.close(); } else { LOG(INFO) << "Open input file failed"; return -1; } - input_infos[i].name = input_names[i]; - input_infos[i].shape = input_shapes[i]; - input_infos[i].data = input_datas[i].get(); + inputs[input_names[i]] = mace::MaceTensor(input_shapes[i], buffer_in); } - std::vector> output_datas(output_count); + for (size_t i = 0; i < output_count; ++i) { int64_t output_size = std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 1, std::multiplies()); - output_datas[i].reset(new float[output_size]); - outputs[output_names[i]] = output_datas[i].get(); + auto buffer_out = std::shared_ptr(new float[output_size], + std::default_delete()); + outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out); } LOG(INFO) << "Warm up run"; t0 = NowMicros(); - engine.Run(input_infos, outputs); + engine.Run(inputs, &outputs); t1 = NowMicros(); LOG(INFO) << "1st warm up run latency: " << t1 - t0 << " us"; @@ -356,7 +264,7 @@ bool MultipleInputOrOutput( t0 = NowMicros(); struct mallinfo prev = mallinfo(); for (int i = 0; i < FLAGS_round; ++i) { - engine.Run(input_infos, outputs); + engine.Run(inputs, &outputs); if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) { LOG(INFO) << "=== check malloc info change #" << i << " ==="; prev = LogMallinfoChange(prev); @@ -367,20 +275,19 @@ bool MultipleInputOrOutput( } for (size_t i = 0; i < output_count; ++i) { - std::string output_name = FLAGS_output_file + "_" - + FormatName(output_names[i]); + std::string output_name = + FLAGS_output_file + "_" + FormatName(output_names[i]); std::ofstream out_file(output_name, std::ios::binary); int64_t output_size = std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 1, std::multiplies()); - out_file.write((const char *) outputs[output_names[i]], - output_size * sizeof(float)); + out_file.write( + reinterpret_cast(outputs[output_names[i]].data().get()), + output_size * sizeof(float)); out_file.flush(); out_file.close(); - LOG(INFO) << "Write output file " - << output_name - << " with size " << output_size - << " done."; + LOG(INFO) << "Write output file " << output_name << " with size " + << output_size << " done."; } return true; @@ -391,7 +298,6 @@ int Main(int argc, char **argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); LOG(INFO) << "mace version: " << MaceVersion(); - LOG(INFO) << "mace git version: " << MaceGitVersion(); LOG(INFO) << "model checksum: " << mace::MACE_MODEL_TAG::ModelChecksum(); LOG(INFO) << "input node: " << FLAGS_input_node; LOG(INFO) << "input shape: " << FLAGS_input_shape; @@ -431,14 +337,8 @@ int Main(int argc, char **argv) { #pragma omp parallel for for (int i = 0; i < FLAGS_restart_round; ++i) { VLOG(0) << "restart round " << i; - if (input_count == 1 && output_count == 1) { - ret = SingleInputAndOutput(input_shape_vec[0], output_shape_vec[0]); - } else { - ret = MultipleInputOrOutput(input_names, - input_shape_vec, - output_names, - output_shape_vec); - } + ret = + RunModel(input_names, input_shape_vec, output_names, output_shape_vec); } if (ret) { return 0; diff --git a/mace/kernels/opencl/buffer_to_image.cc b/mace/kernels/opencl/buffer_to_image.cc index 7a5df69d9ec43953025ee2d1f208e5aac7332ce3..565b3d569934a3b2618ffbf48d278fa0041b8953 100644 --- a/mace/kernels/opencl/buffer_to_image.cc +++ b/mace/kernels/opencl/buffer_to_image.cc @@ -95,7 +95,12 @@ void BufferToImageFunctor::operator()( static_cast(buffer->buffer_offset() / GetEnumTypeSize(buffer->dtype()))); } - if (type == ARGUMENT) { + if (type == CONV2D_FILTER) { + b2f_kernel.setArg(idx++, static_cast(buffer->dim(0))); + b2f_kernel.setArg(idx++, static_cast(buffer->dim(1))); + b2f_kernel.setArg(idx++, static_cast(buffer->dim(2))); + b2f_kernel.setArg(idx++, static_cast(buffer->dim(3))); + } else if (type == ARGUMENT) { b2f_kernel.setArg(idx++, static_cast(buffer->dim(0))); } else if (type == WEIGHT_HEIGHT || type == WEIGHT_WIDTH) { b2f_kernel.setArg(idx++, static_cast(buffer->dim(0))); diff --git a/mace/kernels/opencl/cl/buffer_to_image.cl b/mace/kernels/opencl/cl/buffer_to_image.cl index 8e2f7184d63a3bed64d47aaaf66cc3b01b62943d..a5d9f289efc7266feb77c7c868b51cfe4b25fb28 100644 --- a/mace/kernels/opencl/cl/buffer_to_image.cl +++ b/mace/kernels/opencl/cl/buffer_to_image.cl @@ -2,6 +2,7 @@ __kernel void filter_buffer_to_image(__global const DATA_TYPE *input, /* h, w, oc, ic */ __private const int input_offset, + __private const int filter_h, __private const int filter_w, __private const int out_channel, __private const int in_channel, @@ -22,16 +23,18 @@ __kernel void filter_buffer_to_image(__global const DATA_TYPE *input, /* h, w, o } #endif - const int out_channel_idx = h * 4; - const int rounded_in_channel = ((in_channel + 3) / 4) * 4; - const int hw_idx = w / rounded_in_channel; - const int in_channel_idx = w % rounded_in_channel; + const int in_channel_idx = w; + const int hw_size = filter_w * filter_h; + const int out_channel_idx = h / hw_size * 4; + const int hw_idx = h % hw_size; const int h_idx = hw_idx / filter_w; const int w_idx = hw_idx % filter_w; - const int offset = input_offset + ((h_idx * filter_w + w_idx) * out_channel + out_channel_idx) * in_channel - + in_channel_idx; + const int offset = input_offset + + ((h_idx * filter_w + w_idx) * out_channel + + out_channel_idx) * in_channel + + in_channel_idx; - VEC_DATA_TYPE(DATA_TYPE, 4) values = 0; + DATA_TYPE4 values = 0; if (out_channel_idx < out_channel) { const int size = out_channel - out_channel_idx; if (size < 4) { @@ -52,10 +55,11 @@ __kernel void filter_buffer_to_image(__global const DATA_TYPE *input, /* h, w, o } int2 coord = (int2)(w, h); - CMD_TYPE(write_image, CMD_DATA_TYPE)(output, coord, values); + WRITE_IMAGET(output, coord, values); } __kernel void filter_image_to_buffer(__global DATA_TYPE *output, /* h, w, oc, ic */ + __private const int filter_h, __private const int filter_w, __private const int out_channel, __private const int in_channel, @@ -76,18 +80,19 @@ __kernel void filter_image_to_buffer(__global DATA_TYPE *output, /* h, w, oc, ic } #endif - const int out_channel_idx = h * 4; - const int rounded_in_channel = ((in_channel + 3) / 4) * 4; - const int hw_idx = w / rounded_in_channel; - const int in_channel_idx = w % rounded_in_channel; + const int in_channel_idx = w; + const int hw_size = filter_w * filter_h; + const int out_channel_idx = h / hw_size * 4; + const int hw_idx = h % hw_size; const int h_idx = hw_idx / filter_w; const int w_idx = hw_idx % filter_w; - const int offset = ((h_idx * filter_w + w_idx) * out_channel + out_channel_idx) * in_channel - + in_channel_idx; + const int offset = ((h_idx * filter_w + w_idx) * out_channel + + out_channel_idx) * in_channel + + in_channel_idx; if (out_channel_idx < out_channel) { int2 coord = (int2)(w, h); - VEC_DATA_TYPE(DATA_TYPE, 4) values = CMD_TYPE(read_image, CMD_DATA_TYPE)(input, SAMPLER, coord); + DATA_TYPE4 values = READ_IMAGET(input, SAMPLER, coord); const int size = (out_channel - out_channel_idx); if (size < 4) { switch (size) { @@ -200,7 +205,7 @@ __kernel void in_out_buffer_to_image(__global const DATA_TYPE *input, /* nhwc */ + channel_idx; const int size = channels - channel_idx; - VEC_DATA_TYPE(DATA_TYPE, 4) values = 0; + DATA_TYPE4 values = 0; if (size < 4) { switch(size) { case 3: @@ -214,7 +219,7 @@ __kernel void in_out_buffer_to_image(__global const DATA_TYPE *input, /* nhwc */ values = vload4(0, input + offset); } int2 coord = (int2)(w, h); - CMD_TYPE(write_image, CMD_DATA_TYPE)(output, coord, values); + WRITE_IMAGET(output, coord, values); } __kernel void in_out_image_to_buffer(__global DATA_TYPE *output, /* nhwc */ @@ -246,7 +251,7 @@ __kernel void in_out_image_to_buffer(__global DATA_TYPE *output, /* nhwc */ + channel_idx; int2 coord = (int2)(w, h); - VEC_DATA_TYPE(DATA_TYPE, 4) values = CMD_TYPE(read_image, CMD_DATA_TYPE)(input, SAMPLER, coord); + DATA_TYPE4 values = READ_IMAGET(input, SAMPLER, coord); const int size = channels - channel_idx; if (size < 4) { switch (size) { @@ -286,7 +291,7 @@ __kernel void arg_buffer_to_image(__global const DATA_TYPE *input, /* nhwc */ const int size = count - w * 4; - VEC_DATA_TYPE(DATA_TYPE, 4) values = 0; + DATA_TYPE4 values = 0; if (size < 4) { switch(size) { case 3: @@ -300,7 +305,7 @@ __kernel void arg_buffer_to_image(__global const DATA_TYPE *input, /* nhwc */ values = vload4(0, input + offset); } int2 coord = (int2)(w, h); - CMD_TYPE(write_image, CMD_DATA_TYPE)(output, coord, values); + WRITE_IMAGET(output, coord, values); } __kernel void arg_image_to_buffer(__global DATA_TYPE *output, /* nhwc */ @@ -325,7 +330,7 @@ __kernel void arg_image_to_buffer(__global DATA_TYPE *output, /* nhwc */ const int offset = w * 4; int2 coord = (int2)(w, h); - VEC_DATA_TYPE(DATA_TYPE, 4) values = CMD_TYPE(read_image, CMD_DATA_TYPE)(input, SAMPLER, coord); + DATA_TYPE4 values = READ_IMAGET(input, SAMPLER, coord); const int size = count - offset; if (size < 4) { switch (size) { diff --git a/mace/kernels/opencl/cl/conv_2d.cl b/mace/kernels/opencl/cl/conv_2d.cl index f85bf1080dae99432d1786fd9828dcf32c3b6d37..8fa23f02db62c5c43469e7c4f5095830ebeeed1e 100644 --- a/mace/kernels/opencl/cl/conv_2d.cl +++ b/mace/kernels/opencl/cl/conv_2d.cl @@ -1,7 +1,7 @@ #include __kernel void conv_2d(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */ - __read_only image2d_t filter, /* cout%4 * cin * kh * kw, cout/4 */ + __read_only image2d_t filter, /* cout%4 * cin, kh * kw * cout/4 */ #ifdef BIAS __read_only image2d_t bias, /* cout%4 * cout/4 */ #endif @@ -41,8 +41,6 @@ __kernel void conv_2d(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */ const int out_w_blks = get_global_size(1); #endif - const int rounded_in_ch = in_ch_blks << 2; - #ifdef BIAS DATA_TYPE4 out0 = READ_IMAGET(bias, SAMPLER, (int2)(out_ch_blk, 0)); @@ -64,21 +62,21 @@ __kernel void conv_2d(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */ const int height_idx = mad24((out_hb % out_height), stride, -padding_top); const int batch_idx = mul24((out_hb / out_height), in_height); - const int rounded_in_ch_x_filter_width = mul24(rounded_in_ch, filter_width); + const int filter_hw = mul24(filter_width, filter_height); DATA_TYPE4 in0, in1, in2, in3; DATA_TYPE4 weights0, weights1, weights2, weights3; for (short in_ch_blk = 0; in_ch_blk < in_ch_blks; ++in_ch_blk) { const int in_idx = mul24(in_ch_blk, in_width); - int filter_x_part0 = in_ch_blk << 2; + int filter_x_idx = in_ch_blk << 2; + int filter_y_idx = mul24(out_ch_blk, filter_hw); for (short hb_idx = 0; hb_idx < filter_height; ++hb_idx) { - // TODO(heliangliang) optimize out these muls int in_hb_value = height_idx + mul24(hb_idx, dilation_h); in_hb_value = select(in_hb_value + batch_idx, -1, (in_hb_value < 0 || in_hb_value >= in_height)); - int filter_x_part1 = 0; +#pragma unroll for (short width_idx = 0; width_idx < filter_width; ++width_idx) { int in_width_value; #define READ_INPUT(i) \ @@ -96,11 +94,10 @@ __kernel void conv_2d(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */ #undef READ_INPUT // int filter_idx = (hb_idx * filter_width + width_idx) * rounded_in_ch + (in_ch_blk << 2); - int filter_idx = filter_x_part0 + filter_x_part1; - weights0 = READ_IMAGET(filter, SAMPLER, (int2)(filter_idx + 0, out_ch_blk)); - weights1 = READ_IMAGET(filter, SAMPLER, (int2)(filter_idx + 1, out_ch_blk)); - weights2 = READ_IMAGET(filter, SAMPLER, (int2)(filter_idx + 2, out_ch_blk)); - weights3 = READ_IMAGET(filter, SAMPLER, (int2)(filter_idx + 3, out_ch_blk)); + weights0 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 0, filter_y_idx)); + weights1 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 1, filter_y_idx)); + weights2 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 2, filter_y_idx)); + weights3 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 3, filter_y_idx)); out0 = mad(in0.x, weights0, out0); out0 = mad(in0.y, weights1, out0); @@ -123,9 +120,8 @@ __kernel void conv_2d(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */ out3 = mad(in3.z, weights2, out3); out3 = mad(in3.w, weights3, out3); - filter_x_part1 += rounded_in_ch; + filter_y_idx += 1; } - filter_x_part0 += rounded_in_ch_x_filter_width; } } diff --git a/mace/kernels/opencl/cl/conv_2d_3x3.cl b/mace/kernels/opencl/cl/conv_2d_3x3.cl index 8f58255ab8100c6597bdb8bb701adbc8406e0537..8ce485b7f3f92e46d3d25a1a95bcc624564d1125 100644 --- a/mace/kernels/opencl/cl/conv_2d_3x3.cl +++ b/mace/kernels/opencl/cl/conv_2d_3x3.cl @@ -1,7 +1,7 @@ #include __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */ - __read_only image2d_t filter, /* cout%4 * cin * kh * kw, cout/4 */ + __read_only image2d_t filter, /* cout%4 * cin , kh * kw * cout/4 */ #ifdef BIAS __read_only image2d_t bias, /* cout%4 * cout/4 */ #endif @@ -39,8 +39,6 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] const int out_w_blks = get_global_size(1); #endif - const int rounded_in_ch = in_ch_blks << 2; - #ifdef BIAS DATA_TYPE4 out0 = READ_IMAGET(bias, SAMPLER, (int2)(out_ch_blk, 0)); @@ -65,19 +63,18 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] const int height_idx = mad24((out_hb % out_height), stride, -padding_top); const int batch_idx = mul24((out_hb / out_height), in_height); - const int rounded_in_ch_x_3 = (rounded_in_ch << 1) + rounded_in_ch; DATA_TYPE4 in0, in1, in2, in3, in4; DATA_TYPE4 weights0, weights1, weights2, weights3; for (short in_ch_blk = 0; in_ch_blk < in_ch_blks; ++in_ch_blk) { const int in_idx = mul24(in_ch_blk, in_width); - int filter_x_part0 = in_ch_blk << 2; + int filter_x_idx = in_ch_blk << 2; + int filter_y_idx = mul24(out_ch_blk, 9); int in_hb_idx = height_idx; for (short hb_idx = 0; hb_idx < 3; ++hb_idx) { int in_hb_value = select(in_hb_idx + batch_idx, -1, (in_hb_idx < 0 || in_hb_idx >= in_height)); - int filter_x_part1 = 0; int in_width_idx = 0; for (short width_idx = 0; width_idx < 3; ++width_idx) { int in_width_value; @@ -97,11 +94,10 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] #undef READ_INPUT // int filter_idx = (hb_idx * 3 + width_idx) * rounded_in_ch + (in_ch_blk << 2); - int filter_idx = filter_x_part0 + filter_x_part1; - weights0 = READ_IMAGET(filter, SAMPLER, (int2)(filter_idx + 0, out_ch_blk)); - weights1 = READ_IMAGET(filter, SAMPLER, (int2)(filter_idx + 1, out_ch_blk)); - weights2 = READ_IMAGET(filter, SAMPLER, (int2)(filter_idx + 2, out_ch_blk)); - weights3 = READ_IMAGET(filter, SAMPLER, (int2)(filter_idx + 3, out_ch_blk)); + weights0 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 0, filter_y_idx)); + weights1 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 1, filter_y_idx)); + weights2 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 2, filter_y_idx)); + weights3 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 3, filter_y_idx)); out0 = mad(in0.x, weights0, out0); out0 = mad(in0.y, weights1, out0); @@ -129,10 +125,9 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] out4 = mad(in4.z, weights2, out4); out4 = mad(in4.w, weights3, out4); - filter_x_part1 += rounded_in_ch; in_width_idx += dilation_w; + filter_y_idx += 1; } - filter_x_part0 += rounded_in_ch_x_3; in_hb_idx += dilation_h; } } diff --git a/mace/kernels/opencl/helper.cc b/mace/kernels/opencl/helper.cc index ba788a26977750ae69d37f90e6661e6612cdcf08..b8b8d6a3cc51de74c9cab7a8f85f86f61d218abd 100644 --- a/mace/kernels/opencl/helper.cc +++ b/mace/kernels/opencl/helper.cc @@ -23,13 +23,13 @@ void CalInOutputImageShape(const std::vector &shape, /* NHWC */ (*image_shape)[1] = shape[0] * shape[1]; } -// [RoundUp<4>(Ic) * H * W, (Oc + 3) / 4] +// [RoundUp<4>(Ic), H * W * (Oc + 3) / 4] void CalConv2dFilterImageShape(const std::vector &shape, /* HWOI */ std::vector *image_shape) { MACE_CHECK(shape.size() == 4); image_shape->resize(2); - (*image_shape)[0] = shape[0] * shape[1] * RoundUp(shape[3], 4); - (*image_shape)[1] = RoundUpDiv4(shape[2]); + (*image_shape)[0] = RoundUp(shape[3], 4); + (*image_shape)[1] = shape[0] * shape[1] * RoundUpDiv4(shape[2]); } // [H * W * M, (Ic + 3) / 4] diff --git a/mace/ops/conv_2d_benchmark.cc b/mace/ops/conv_2d_benchmark.cc index 90e4579eb9c53c4870a083f9871001420509318e..f06a7e127359e391a54b28bb4d35891416f32cbb 100644 --- a/mace/ops/conv_2d_benchmark.cc +++ b/mace/ops/conv_2d_benchmark.cc @@ -114,6 +114,7 @@ static void Conv2d(int iters, BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, OPENCL); \ BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, half, OPENCL); + BM_CONV_2D(1, 256, 64, 64, 3, 3, 1, 1, VALID, 256); BM_CONV_2D(1, 512, 15, 15, 1, 1, 1, 1, VALID, 1024); @@ -135,6 +136,8 @@ BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, 1, SAME, 128); BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, 1, SAME, 128); BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, 1, SAME, 128); +BM_CONV_2D(1, 1024, 16, 16, 15, 1, 1, 1, SAME, 2); + // Dilation BM_CONV_2D(1, 32, 256, 256, 3, 3, 1, 2, VALID, 32); BM_CONV_2D(1, 32, 256, 256, 3, 3, 1, 4, VALID, 32); diff --git a/mace/public/BUILD b/mace/public/BUILD index f51c85b8c15a3d4588ab60f5a3f9cc57ae81681a..41c709878728c120daf7f2c82954f04b4a7e1809 100644 --- a/mace/public/BUILD +++ b/mace/public/BUILD @@ -7,11 +7,11 @@ package( licenses(["notice"]) # Apache 2.0 -load("//mace:mace.bzl", "if_android") - cc_library( name = "public", hdrs = [ "mace.h", + "mace_runtime.h", + "mace_types.h", ], ) diff --git a/mace/public/mace.h b/mace/public/mace.h index eb74e6749517b1c7cd616a0073bd0c3c7965f916..db57fbcb9eaf27c3f8ea2f16ba062c6ca73a09fd 100644 --- a/mace/public/mace.h +++ b/mace/public/mace.h @@ -2,6 +2,9 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // +// This file defines core MACE APIs. +// There APIs will be stable and backward compatible. + #ifndef MACE_PUBLIC_MACE_H_ #define MACE_PUBLIC_MACE_H_ @@ -13,415 +16,60 @@ namespace mace { -#define MACE_MAJOR_VERSION 0 -#define MACE_MINOR_VERSION 1 -#define MACE_PATCH_VERSION 0 - -// MACE_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", -// "-beta", "-rc", "-rc.1") -#define MACE_VERSION_SUFFIX "" - -#define MACE_STR_HELPER(x) #x -#define MACE_STR(x) MACE_STR_HELPER(x) - -// e.g. "0.5.0" or "0.6.0-alpha". -#define MACE_VERSION_STRING \ - (MACE_STR(MACE_MAJOR_VERSION) "." MACE_STR(MACE_MINOR_VERSION) "." MACE_STR( \ - MACE_PATCH_VERSION) MACE_VERSION_SUFFIX) - -inline const char *MaceVersion() { return MACE_VERSION_STRING; } - -extern const char *MaceGitVersion(); - -// Disable the copy and assignment operator for a class. -#ifndef DISABLE_COPY_AND_ASSIGN -#define DISABLE_COPY_AND_ASSIGN(classname) \ - private: \ - classname(const classname &) = delete; \ - classname &operator=(const classname &) = delete -#endif - -enum NetMode { INIT = 0, NORMAL = 1 }; +const char *MaceVersion(); enum DeviceType { CPU = 0, NEON = 1, OPENCL = 2, HEXAGON = 3 }; -enum DataType { - DT_INVALID = 0, - DT_FLOAT = 1, - DT_DOUBLE = 2, - DT_INT32 = 3, - DT_UINT8 = 4, - DT_INT16 = 5, - DT_INT8 = 6, - DT_STRING = 7, - DT_INT64 = 8, - DT_UINT16 = 9, - DT_BOOL = 10, - DT_HALF = 19, - DT_UINT32 = 22 -}; - -enum GPUPerfHint { - PERF_DEFAULT = 0, - PERF_LOW = 1, - PERF_NORMAL = 2, - PERF_HIGH = 3 -}; - -enum GPUPriorityHint { - PRIORITY_DEFAULT = 0, - PRIORITY_LOW = 1, - PRIORITY_NORMAL = 2, - PRIORITY_HIGH = 3 -}; - -enum CPUPowerOption { DEFAULT = 0, HIGH_PERFORMANCE = 1, BATTERY_SAVE = 2}; - -class ConstTensor { - public: - ConstTensor(const std::string &name, - const unsigned char *data, - const std::vector &dims, - const DataType data_type = DT_FLOAT, - uint32_t node_id = 0); - ConstTensor(const std::string &name, - const unsigned char *data, - const std::vector &dims, - const int data_type, - uint32_t node_id = 0); - - const std::string &name() const; - const unsigned char *data() const; - int64_t data_size() const; - const std::vector &dims() const; - DataType data_type() const; - uint32_t node_id() const; - - private: - const std::string name_; - const unsigned char *data_; - const int64_t data_size_; - const std::vector dims_; - const DataType data_type_; - const uint32_t node_id_; -}; - -class Argument { - public: - Argument(); - void CopyFrom(const Argument &from); - - public: - const std::string &name() const; - void set_name(const std::string &value); - bool has_f() const; - float f() const; - void set_f(float value); - bool has_i() const; - int64_t i() const; - void set_i(int64_t value); - bool has_s() const; - std::string s() const; - void set_s(const std::string &value); - const std::vector &floats() const; - void add_floats(float value); - void set_floats(const std::vector &value); - const std::vector &ints() const; - void add_ints(int64_t value); - void set_ints(const std::vector &value); - const std::vector &strings() const; - void add_strings(const ::std::string &value); - void set_strings(const std::vector &value); - - private: - void set_has_f(); - void set_has_i(); - void set_has_s(); - - private: - std::string name_; - float f_; - int64_t i_; - std::string s_; - std::vector floats_; - std::vector ints_; - std::vector strings_; - uint32_t has_bits_; -}; - -class NodeInput { - public: - NodeInput() {} - NodeInput(int node_id, int output_port); - void CopyFrom(const NodeInput &from); - - public: - int node_id() const; - void set_node_id(int node_id); - int output_port() const; - void set_output_port(int output_port); - - private: - int node_id_; - int output_port_; -}; - -class OutputShape { - public: - OutputShape(); - OutputShape(const std::vector &dims); // NOLINT(runtime/explicit) - void CopyFrom(const OutputShape &from); - - public: - const std::vector &dims() const; - - private: - std::vector dims_; -}; - -class OperatorDef { - public: - void CopyFrom(const OperatorDef &from); - - public: - const std::string &name() const; - void set_name(const std::string &name_); - bool has_name() const; - const std::string &type() const; - void set_type(const std::string &type_); - bool has_type() const; - const std::vector &mem_id() const; - void set_mem_id(const std::vector &value); - uint32_t node_id() const; - void set_node_id(uint32_t node_id); - uint32_t op_id() const; - uint32_t padding() const; - void set_padding(uint32_t padding); - const std::vector &node_input() const; - void add_node_input(const NodeInput &value); - const std::vector &out_max_byte_size() const; - void add_out_max_byte_size(int value); - const std::vector &input() const; - const std::string &input(int index) const; - std::string *add_input(); - void add_input(const ::std::string &value); - void add_input(::std::string &&value); - void set_input(const std::vector &value); - const std::vector &output() const; - const std::string &output(int index) const; - std::string *add_output(); - void add_output(const ::std::string &value); - void add_output(::std::string &&value); - void set_output(const std::vector &value); - const std::vector &arg() const; - Argument *add_arg(); - const std::vector &output_shape() const; - void add_output_shape(const OutputShape &value); - const std::vector &output_type() const; - void set_output_type(const std::vector &value); - - private: - void set_has_name(); - void set_has_type(); - void set_has_mem_id(); - - private: - std::string name_; - std::string type_; - - std::vector input_; - std::vector output_; - std::vector arg_; - std::vector output_shape_; - std::vector output_type_; - - std::vector mem_id_; - - // nnlib - uint32_t node_id_; - uint32_t op_id_; - uint32_t padding_; - std::vector node_input_; - std::vector out_max_byte_size_; - - uint32_t has_bits_; -}; - -class MemoryBlock { - public: - MemoryBlock(int mem_id, uint32_t x, uint32_t y); - - public: - int mem_id() const; - uint32_t x() const; - uint32_t y() const; - - private: - int mem_id_; - uint32_t x_; - uint32_t y_; -}; - -class MemoryArena { - public: - const std::vector &mem_block() const; - std::vector &mutable_mem_block(); - int mem_block_size() const; - - private: - std::vector mem_block_; -}; +enum MaceStatus { MACE_SUCCESS = 0, MACE_INVALID_ARGS = 1 }; -// for hexagon mace-nnlib -class InputInfo { +// MACE input/output tensor +class MaceTensor { public: - const std::string &name() const; - int32_t node_id() const; - int32_t max_byte_size() const; - DataType data_type() const; - const std::vector &dims() const; + // shape - the shape of the tensor, with size n + // data - the buffer of the tensor, must not be null with size equals + // shape[0] * shape[1] * ... * shape[n-1] + explicit MaceTensor(const std::vector &shape, + std::shared_ptr data); + MaceTensor(); + MaceTensor(const MaceTensor &other); + MaceTensor(const MaceTensor &&other); + MaceTensor &operator=(const MaceTensor &other); + MaceTensor &operator=(const MaceTensor &&other); + ~MaceTensor(); - private: - std::string name_; - int32_t node_id_; - int32_t max_byte_size_; // only support 32-bit len - DataType data_type_; - std::vector dims_; -}; - -class OutputInfo { - public: - const std::string &name() const; - int32_t node_id() const; - int32_t max_byte_size() const; - DataType data_type() const; - void set_data_type(DataType data_type); - const std::vector &dims() const; - void set_dims(const std::vector &dims); + const std::vector &shape() const; + const std::shared_ptr data() const; + std::shared_ptr data(); private: - std::string name_; - int32_t node_id_; - int32_t max_byte_size_; // only support 32-bit len - DataType data_type_; - std::vector dims_; + class Impl; + std::unique_ptr impl_; }; -class NetDef { - public: - NetDef(); - int op_size() const; - - const OperatorDef &op(const int idx) const; - - public: - const std::string &name() const; - bool has_name() const; - void set_name(const std::string &value); - const std::string &version() const; - bool has_version() const; - void set_version(const std::string &value); - - const std::vector &op() const; - OperatorDef *add_op(); - std::vector &mutable_op(); - const std::vector &arg() const; - Argument *add_arg(); - std::vector &mutable_arg(); - const std::vector &tensors() const; - std::vector &mutable_tensors(); - const MemoryArena &mem_arena() const; - bool has_mem_arena() const; - MemoryArena &mutable_mem_arena(); - const std::vector &input_info() const; - const std::vector &output_info() const; - std::vector &mutable_output_info(); - - private: - void set_has_name(); - void set_has_version(); - void set_has_mem_arena(); - - private: - std::string name_; - std::string version_; - std::vector op_; - std::vector arg_; - std::vector tensors_; - - // for mem optimization - MemoryArena mem_arena_; - - // for hexagon mace-nnlib - std::vector input_info_; - std::vector output_info_; - - uint32_t has_bits_; -}; - -struct CallStats { - int64_t start_micros; - int64_t end_micros; -}; - -struct OperatorStats { - std::string operator_name; - std::string type; - CallStats stats; -}; - -struct RunMetadata { - std::vector op_stats; -}; - -class Workspace; -class NetBase; -class OperatorRegistry; -class HexagonControlWrapper; - -struct MaceInputInfo { - std::string name; - std::vector shape; - const float *data; -}; - -void ConfigOpenCLRuntime(GPUPerfHint, GPUPriorityHint); -void ConfigOmpThreadsAndAffinity(int omp_num_threads, - CPUPowerOption power_option); +class NetDef; +class RunMetadata; class MaceEngine { public: - // Single input and output - explicit MaceEngine(const NetDef *net_def, DeviceType device_type); - // Multiple input or output explicit MaceEngine(const NetDef *net_def, DeviceType device_type, const std::vector &input_nodes, const std::vector &output_nodes); ~MaceEngine(); - // Single input and output - bool Run(const float *input, - const std::vector &input_shape, - float *output); - // Single input and output for benchmark - bool Run(const float *input, - const std::vector &input_shape, - float *output, - RunMetadata *run_metadata); - // Multiple input or output - bool Run( - const std::vector &input, - std::map &output, // NOLINT(runtime/references) - RunMetadata *run_metadata = nullptr); - MaceEngine(const MaceEngine &) = delete; - MaceEngine &operator=(const MaceEngine &) = delete; + + MaceStatus Run(const std::map &inputs, + std::map *outputs); + + MaceStatus Run(const std::map &inputs, + std::map *outputs, + RunMetadata *run_metadata); private: - std::shared_ptr op_registry_; - DeviceType device_type_; - std::unique_ptr ws_; - std::unique_ptr net_; - std::unique_ptr hexagon_controller_; + class Impl; + std::unique_ptr impl_; + + MaceEngine(const MaceEngine &) = delete; + MaceEngine &operator=(const MaceEngine &) = delete; }; } // namespace mace diff --git a/mace/public/mace_runtime.h b/mace/public/mace_runtime.h new file mode 100644 index 0000000000000000000000000000000000000000..1887f00993301198d33f6a32e5d601c14b8a998b --- /dev/null +++ b/mace/public/mace_runtime.h @@ -0,0 +1,35 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +// This file defines runtime tuning APIs. +// These APIs are not stable. + +#ifndef MACE_PUBLIC_MACE_RUNTIME_H_ +#define MACE_PUBLIC_MACE_RUNTIME_H_ + +namespace mace { + +enum GPUPerfHint { + PERF_DEFAULT = 0, + PERF_LOW = 1, + PERF_NORMAL = 2, + PERF_HIGH = 3 +}; + +enum GPUPriorityHint { + PRIORITY_DEFAULT = 0, + PRIORITY_LOW = 1, + PRIORITY_NORMAL = 2, + PRIORITY_HIGH = 3 +}; + +enum CPUPowerOption { DEFAULT = 0, HIGH_PERFORMANCE = 1, BATTERY_SAVE = 2 }; + +void ConfigOpenCLRuntime(GPUPerfHint, GPUPriorityHint); +void ConfigOmpThreadsAndAffinity(int omp_num_threads, + CPUPowerOption power_option); + +} // namespace mace + +#endif // MACE_PUBLIC_MACE_RUNTIME_H_ diff --git a/mace/public/mace_types.h b/mace/public/mace_types.h new file mode 100644 index 0000000000000000000000000000000000000000..141825830b1faa82d9cca82fee48b839ed6c0d1b --- /dev/null +++ b/mace/public/mace_types.h @@ -0,0 +1,341 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +// This file defines data types used by net creation and benchmark tools. +// These APIs are not stable and should only be used by advanced users. + +#ifndef MACE_PUBLIC_MACE_TYPES_H_ +#define MACE_PUBLIC_MACE_TYPES_H_ + +#include +#include + +namespace mace { + +// Disable the copy and assignment operator for a class. +#ifndef DISABLE_COPY_AND_ASSIGN +#define DISABLE_COPY_AND_ASSIGN(classname) \ + private: \ + classname(const classname &) = delete; \ + classname &operator=(const classname &) = delete +#endif + +enum NetMode { INIT = 0, NORMAL = 1 }; + +enum DataType { + DT_INVALID = 0, + DT_FLOAT = 1, + DT_DOUBLE = 2, + DT_INT32 = 3, + DT_UINT8 = 4, + DT_INT16 = 5, + DT_INT8 = 6, + DT_STRING = 7, + DT_INT64 = 8, + DT_UINT16 = 9, + DT_BOOL = 10, + DT_HALF = 19, + DT_UINT32 = 22 +}; + +class ConstTensor { + public: + ConstTensor(const std::string &name, + const unsigned char *data, + const std::vector &dims, + const DataType data_type = DT_FLOAT, + uint32_t node_id = 0); + ConstTensor(const std::string &name, + const unsigned char *data, + const std::vector &dims, + const int data_type, + uint32_t node_id = 0); + + const std::string &name() const; + const unsigned char *data() const; + int64_t data_size() const; + const std::vector &dims() const; + DataType data_type() const; + uint32_t node_id() const; + + private: + const std::string name_; + const unsigned char *data_; + const int64_t data_size_; + const std::vector dims_; + const DataType data_type_; + const uint32_t node_id_; +}; + +class Argument { + public: + Argument(); + void CopyFrom(const Argument &from); + + public: + const std::string &name() const; + void set_name(const std::string &value); + bool has_f() const; + float f() const; + void set_f(float value); + bool has_i() const; + int64_t i() const; + void set_i(int64_t value); + bool has_s() const; + std::string s() const; + void set_s(const std::string &value); + const std::vector &floats() const; + void add_floats(float value); + void set_floats(const std::vector &value); + const std::vector &ints() const; + void add_ints(int64_t value); + void set_ints(const std::vector &value); + const std::vector &strings() const; + void add_strings(const ::std::string &value); + void set_strings(const std::vector &value); + + private: + void set_has_f(); + void set_has_i(); + void set_has_s(); + + private: + std::string name_; + float f_; + int64_t i_; + std::string s_; + std::vector floats_; + std::vector ints_; + std::vector strings_; + uint32_t has_bits_; +}; + +class NodeInput { + public: + NodeInput() {} + NodeInput(int node_id, int output_port); + void CopyFrom(const NodeInput &from); + + public: + int node_id() const; + void set_node_id(int node_id); + int output_port() const; + void set_output_port(int output_port); + + private: + int node_id_; + int output_port_; +}; + +class OutputShape { + public: + OutputShape(); + OutputShape(const std::vector &dims); // NOLINT(runtime/explicit) + void CopyFrom(const OutputShape &from); + + public: + const std::vector &dims() const; + + private: + std::vector dims_; +}; + +class OperatorDef { + public: + void CopyFrom(const OperatorDef &from); + + public: + const std::string &name() const; + void set_name(const std::string &name_); + bool has_name() const; + const std::string &type() const; + void set_type(const std::string &type_); + bool has_type() const; + const std::vector &mem_id() const; + void set_mem_id(const std::vector &value); + uint32_t node_id() const; + void set_node_id(uint32_t node_id); + uint32_t op_id() const; + uint32_t padding() const; + void set_padding(uint32_t padding); + const std::vector &node_input() const; + void add_node_input(const NodeInput &value); + const std::vector &out_max_byte_size() const; + void add_out_max_byte_size(int value); + const std::vector &input() const; + const std::string &input(int index) const; + std::string *add_input(); + void add_input(const ::std::string &value); + void add_input(::std::string &&value); + void set_input(const std::vector &value); + const std::vector &output() const; + const std::string &output(int index) const; + std::string *add_output(); + void add_output(const ::std::string &value); + void add_output(::std::string &&value); + void set_output(const std::vector &value); + const std::vector &arg() const; + Argument *add_arg(); + const std::vector &output_shape() const; + void add_output_shape(const OutputShape &value); + const std::vector &output_type() const; + void set_output_type(const std::vector &value); + + private: + void set_has_name(); + void set_has_type(); + void set_has_mem_id(); + + private: + std::string name_; + std::string type_; + + std::vector input_; + std::vector output_; + std::vector arg_; + std::vector output_shape_; + std::vector output_type_; + + std::vector mem_id_; + + // nnlib + uint32_t node_id_; + uint32_t op_id_; + uint32_t padding_; + std::vector node_input_; + std::vector out_max_byte_size_; + + uint32_t has_bits_; +}; + +class MemoryBlock { + public: + MemoryBlock(int mem_id, uint32_t x, uint32_t y); + + public: + int mem_id() const; + uint32_t x() const; + uint32_t y() const; + + private: + int mem_id_; + uint32_t x_; + uint32_t y_; +}; + +class MemoryArena { + public: + const std::vector &mem_block() const; + std::vector &mutable_mem_block(); + int mem_block_size() const; + + private: + std::vector mem_block_; +}; + +// for hexagon mace-nnlib +class InputInfo { + public: + const std::string &name() const; + int32_t node_id() const; + int32_t max_byte_size() const; + DataType data_type() const; + const std::vector &dims() const; + + private: + std::string name_; + int32_t node_id_; + int32_t max_byte_size_; // only support 32-bit len + DataType data_type_; + std::vector dims_; +}; + +class OutputInfo { + public: + const std::string &name() const; + int32_t node_id() const; + int32_t max_byte_size() const; + DataType data_type() const; + void set_data_type(DataType data_type); + const std::vector &dims() const; + void set_dims(const std::vector &dims); + + private: + std::string name_; + int32_t node_id_; + int32_t max_byte_size_; // only support 32-bit len + DataType data_type_; + std::vector dims_; +}; + +class NetDef { + public: + NetDef(); + int op_size() const; + + const OperatorDef &op(const int idx) const; + + public: + const std::string &name() const; + bool has_name() const; + void set_name(const std::string &value); + const std::string &version() const; + bool has_version() const; + void set_version(const std::string &value); + + const std::vector &op() const; + OperatorDef *add_op(); + std::vector &mutable_op(); + const std::vector &arg() const; + Argument *add_arg(); + std::vector &mutable_arg(); + const std::vector &tensors() const; + std::vector &mutable_tensors(); + const MemoryArena &mem_arena() const; + bool has_mem_arena() const; + MemoryArena &mutable_mem_arena(); + const std::vector &input_info() const; + const std::vector &output_info() const; + std::vector &mutable_output_info(); + + private: + void set_has_name(); + void set_has_version(); + void set_has_mem_arena(); + + private: + std::string name_; + std::string version_; + std::vector op_; + std::vector arg_; + std::vector tensors_; + + // for mem optimization + MemoryArena mem_arena_; + + // for hexagon mace-nnlib + std::vector input_info_; + std::vector output_info_; + + uint32_t has_bits_; +}; + +struct CallStats { + int64_t start_micros; + int64_t end_micros; +}; + +struct OperatorStats { + std::string operator_name; + std::string type; + CallStats stats; +}; + +struct RunMetadata { + std::vector op_stats; +}; + +} // namespace mace + +#endif // MACE_PUBLIC_MACE_TYPES_H_ diff --git a/mace/python/tools/caffe_converter_lib.py b/mace/python/tools/caffe_converter_lib.py index 7c7cd9abd71cb8b4720f782ffc71835033c3e97c..166bb6ec3012f2f3075e6dc5577dd5a9e6832463 100644 --- a/mace/python/tools/caffe_converter_lib.py +++ b/mace/python/tools/caffe_converter_lib.py @@ -72,9 +72,9 @@ class Shapes(object): output_shape = np.zeros_like(input_shape) output_shape[0] = input_shape[0] output_shape[1] = int(round_func((input_shape[1] + paddings[0] - filter_shape[0] - - (filter_shape[0] - 1) * (dilations[0] - 1)) / float(strides[0]))) + 1 + - (filter_shape[0] - 1) * (dilations[0] - 1)) / float(strides[0]))) + 1 output_shape[2] = int(round_func((input_shape[2] + paddings[1] - filter_shape[1] - - (filter_shape[1] - 1) * (dilations[1] - 1)) / float(strides[1]))) + 1 + - (filter_shape[1] - 1) * (dilations[1] - 1)) / float(strides[1]))) + 1 output_shape[3] = filter_shape[2] return output_shape @@ -247,12 +247,9 @@ class CaffeConverter(object): arg.i = self.dt return output_name - def add_input_transform(self, names, is_single): + def add_input_transform(self, names): for name in names: - if is_single: - new_input_name = MACE_INPUT_NODE_NAME + ":0" - else: - new_input_name = MACE_INPUT_NODE_NAME + '_' + name + ":0" + new_input_name = MACE_INPUT_NODE_NAME + '_' + name + ":0" op_def = self.net_def.op.add() op_def.name = name op_def.type = 'BufferToImage' @@ -267,12 +264,9 @@ class CaffeConverter(object): arg.name = 'T' arg.i = self.dt - def add_output_transform(self, names, is_single): + def add_output_transform(self, names): for name in names: - if is_single: - output_name = MACE_OUTPUT_NODE_NAME + ":0" - else: - output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0" + output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0" op_def = self.net_def.op.add() op_def.name = output_name[:-2] op_def.type = 'ImageToBuffer' @@ -333,8 +327,18 @@ class CaffeConverter(object): return pad, stride, kernel def convert_conv2d(self, op): - op_def = self.CommonConvert(op, 'Conv2D') param = op.layer.convolution_param + is_depthwise = False + if param.HasField('group'): + if param.group == op.data[0].shape[0] and op.data[0].shape[1] == 1: + is_depthwise = True + else: + raise Exception("Mace do not support group convolution yet") + + if is_depthwise: + op_def = self.CommonConvert(op, 'DepthwiseConv2d') + else: + op_def = self.CommonConvert(op, 'Conv2D') # Add filter weight_tensor_name = op.name + '_weight:0' @@ -342,7 +346,7 @@ class CaffeConverter(object): self.add_tensor(weight_tensor_name, weight_data) if self.device == 'gpu': - buffer_type = "CONV2D_FILTER" + buffer_type = "DW_CONV2D_FILTER" if is_depthwise else "CONV2D_FILTER" output_name = self.add_buffer_to_image(weight_tensor_name, buffer_type) op_def.input.extend([output_name]) else: @@ -373,15 +377,16 @@ class CaffeConverter(object): self.resolved_ops.add(op.name) output_shape = Shapes.conv_pool_shape(op.get_single_parent().output_shape_map[op.layer.bottom[0]], - weight_data.shape, - paddings, strides, dilations, - math.floor) + weight_data.shape, + paddings, strides, dilations, + math.floor) op.output_shape_map[op.layer.top[0]] = output_shape if len(self.ops_map[final_op.name].children) == 1 \ and self.ops_map[final_op.name].children[0].type in activation_name_map: activation_op = self.ops_map[final_op.name].children[0] - op_def.type = "FusedConv2D" + if not is_depthwise: + op_def.type = "FusedConv2D" fused_act_arg = op_def.arg.add() fused_act_arg.name = 'activation' fused_act_arg.s = activation_name_map[activation_op.type] @@ -412,7 +417,7 @@ class CaffeConverter(object): width = output_shape[0] * ((output_shape[1] + 1)/2) * ((output_shape[2]+1)/2) return self.winograd and self.device == 'gpu' and \ filter_shape[0] == 3 and (filter_shape[0] == filter_shape[1]) and \ - dilations[0] == 1 and (dilations[0] == dilations[1]) and\ + dilations[0] == 1 and (dilations[0] == dilations[1]) and \ (strides[0] == 1) and (strides[0] == strides[1]) and \ (16 * filter_shape[2] < OPENCL_IMAGE_MAX_SIZE) and \ (16 * filter_shape[3] < OPENCL_IMAGE_MAX_SIZE) and \ @@ -662,7 +667,7 @@ class CaffeConverter(object): filter_shape = [kernels[0], kernels[1], input_shape[3], input_shape[3]] output_shape = Shapes.conv_pool_shape(input_shape, filter_shape, - paddings, strides, [1, 1], math.ceil) + paddings, strides, [1, 1], math.ceil) op.output_shape_map[op.layer.top[0]] = output_shape op_def.output.extend([op.name + ':0']) @@ -764,7 +769,7 @@ class CaffeConverter(object): input_shape = op.parents[0].output_shape_map[op.layer.bottom[0]] num_outputs = len(op.layer.top) if (input_shape[3] % num_outputs) != 0 or \ - (self.device == 'gpu' and ((input_shape[3] / num_outputs) % 4 != 0)) : + (self.device == 'gpu' and ((input_shape[3] / num_outputs) % 4 != 0)) : raise Exception('Mace do not support slice with input shape ' + str(input_shape) + ' and number of output ' + str(num_outputs)) output_shape = Shapes.slice_shape(input_shape, num_outputs) @@ -789,7 +794,6 @@ class CaffeConverter(object): input_shape = op.parents[0].output_shape_map[op.layer.bottom[0]] output_shape = input_shape shape_param = np.asarray(op.layer.reshape_param.shape.dim)[[0, 3, 2, 1]] - print shape_param for i in range(len(shape_param)): if shape_param[i] != 0: output_shape[i] = shape_param[i] @@ -844,29 +848,20 @@ class CaffeConverter(object): self.net_def.op.extend([op_def]) self.resolved_ops.add(op.name) - def replace_in_out_name(self, input_names, output_names, is_single): + def replace_in_out_name(self, input_names, output_names): in_names = set([input_name + ":0" for input_name in input_names]) out_names = set([output_name + ":0" for output_name in output_names]) - if is_single: - for op in self.net_def.op: - for i in range(len(op.input)): - if op.input[i] in in_names: - op.input[i] = MACE_INPUT_NODE_NAME + ':0' - for i in range(len(op.output)): - if op.output[i] in out_names: - op.output[i] = MACE_OUTPUT_NODE_NAME + ':0' - else: - for op in self.net_def.op: - for i in range(len(op.input)): - if op.input[i] in in_names: - op.input[i] = MACE_INPUT_NODE_NAME + '_' + op.input[i] - if op.input[i] in out_names: - op.input[i] = MACE_OUTPUT_NODE_NAME + '_' + op.input[i] - for i in range(len(op.output)): - if op.output[i] in in_names: - op.output[i] = MACE_INPUT_NODE_NAME + '_' + op.output[i] - if op.output[i] in out_names: - op.output[i] = MACE_OUTPUT_NODE_NAME + '_' + op.output[i] + for op in self.net_def.op: + for i in range(len(op.input)): + if op.input[i] in in_names: + op.input[i] = MACE_INPUT_NODE_NAME + '_' + op.input[i] + if op.input[i] in out_names: + op.input[i] = MACE_OUTPUT_NODE_NAME + '_' + op.input[i] + for i in range(len(op.output)): + if op.output[i] in in_names: + op.output[i] = MACE_INPUT_NODE_NAME + '_' + op.output[i] + if op.output[i] in out_names: + op.output[i] = MACE_OUTPUT_NODE_NAME + '_' + op.output[i] def add_input_op_shape(self, input_nodes, input_shapes): assert len(input_nodes) == len(input_shapes) @@ -878,9 +873,8 @@ class CaffeConverter(object): input_op.output_shape_map[input_op.name] = input_shapes[i] def convert(self, input_nodes, input_shapes, output_nodes): - is_single = len(input_nodes) == 1 and len(output_nodes) == 1 if self.device == 'gpu': - self.add_input_transform(input_nodes, is_single) + self.add_input_transform(input_nodes) assert self.ops[0].type == 'Input' self.add_input_op_shape(input_nodes, input_shapes) @@ -925,10 +919,10 @@ class CaffeConverter(object): raise Exception('Unknown Op: %s, type: %s' % (op.name, op.type)) if self.device == 'gpu': - self.add_output_transform(output_nodes, is_single) + self.add_output_transform(output_nodes) if self.device == 'cpu': - self.replace_in_out_name(input_nodes, output_nodes, is_single) + self.replace_in_out_name(input_nodes, output_nodes) for op in self.ops: if op.name not in self.resolved_ops: @@ -967,3 +961,4 @@ def convert_to_mace_pb(model_file, weight_file, input_node_str, input_shape_str, print "Memory optimization done." return net_def + diff --git a/mace/python/tools/model_header.jinja2 b/mace/python/tools/model_header.jinja2 index 9f5c776d52bd6456bf3c410216f5b4de1ce1fa58..efd477ee253ccead1ebf2f58c54f624629118280 100644 --- a/mace/python/tools/model_header.jinja2 +++ b/mace/python/tools/model_header.jinja2 @@ -1,7 +1,10 @@ // // Copyright (c) 2017 XiaoMi All rights reserved. -// Generated by the mace converter. DO NOT EDIT! // +// Generated by the mace converter. DO NOT EDIT! + +#ifndef MACE_CODEGEN_MODELS_{{tag|upper}}_{{tag|upper}}_H_ +#define MACE_CODEGEN_MODELS_{{tag|upper}}_{{tag|upper}}_H_ #include @@ -10,13 +13,16 @@ namespace mace { namespace {{tag}} { -extern const unsigned char *LoadModelData(const char *model_data_file); +const unsigned char *LoadModelData(const char *model_data_file); -extern void UnloadModelData(const unsigned char *model_data); +void UnloadModelData(const unsigned char *model_data); -extern NetDef CreateNet(const unsigned char *model_data); +NetDef CreateNet(const unsigned char *model_data); -extern const std::string ModelChecksum(); +const std::string ModelChecksum(); } // namespace {{ tag }} } // namespace mace + +#endif // MACE_CODEGEN_MODELS_{{tag|upper}}_{{tag|upper}}_H_ + diff --git a/mace/python/tools/tf_converter_lib.py b/mace/python/tools/tf_converter_lib.py index 7177a691dabae5bac8fe0fd884d05850d4bac586..01e73645e8439d77ba5b4b6bada8f84e7c3eae9a 100644 --- a/mace/python/tools/tf_converter_lib.py +++ b/mace/python/tools/tf_converter_lib.py @@ -118,12 +118,9 @@ class TFConverter(object): arg.i = self.dt return output_name - def add_input_transform(self, names, is_single): + def add_input_transform(self, names): for name in names: - if is_single: - new_input_name = MACE_INPUT_NODE_NAME + ":0" - else: - new_input_name = MACE_INPUT_NODE_NAME + '_' + name + ":0" + new_input_name = MACE_INPUT_NODE_NAME + '_' + name + ":0" op_def = self.net_def.op.add() op_def.name = name op_def.type = 'BufferToImage' @@ -138,12 +135,9 @@ class TFConverter(object): arg.name = 'T' arg.i = self.dt - def add_output_transform(self, names, is_single): + def add_output_transform(self, names): for name in names: - if is_single: - output_name = MACE_OUTPUT_NODE_NAME + ":0" - else: - output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0" + output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0" op_def = self.net_def.op.add() op_def.name = output_name[:-2] op_def.type = 'ImageToBuffer' @@ -362,7 +356,8 @@ class TFConverter(object): if len(self.tf_graph.get(final_op.name, [])) == 1 \ and self.tf_graph[final_op.name][0].type in activation_name_map: activation_op = self.tf_graph[final_op.name][0] - op_def.type = "FusedConv2D" + if op_def.type == "Conv2D": + op_def.type = "FusedConv2D" fused_act_arg = op_def.arg.add() fused_act_arg.name = 'activation' fused_act_arg.s = activation_name_map[activation_op.type] @@ -805,26 +800,18 @@ class TFConverter(object): self.add_output_shape(op.outputs, op_def) self.resolved_ops[op.name] = 1 - def replace_in_out_name(self, input_names, output_names, is_single): + def replace_in_out_name(self, input_names, output_names): in_names = set([input_name + ":0" for input_name in input_names]) out_names = set([output_name + ":0" for output_name in output_names]) - if is_single: - for op in self.net_def.op: - if len(op.input) > 0 and op.input[0] in in_names: - op.input[0] = MACE_INPUT_NODE_NAME + ':0' - if len(op.output) > 0 and op.output[0] in out_names: - op.output[0] = MACE_OUTPUT_NODE_NAME + ':0' - else: - for op in self.net_def.op: - if len(op.input) > 0 and op.input[0] in in_names: - op.input[0] = MACE_INPUT_NODE_NAME + '_' + op.input[0] - if len(op.output) > 0 and op.output[0] in out_names: - op.output[0] = MACE_OUTPUT_NODE_NAME + '_' + op.output[0] + for op in self.net_def.op: + if op.input[0] in in_names: + op.input[0] = MACE_INPUT_NODE_NAME + '_' + op.input[0] + if op.output[0] in out_names: + op.output[0] = MACE_OUTPUT_NODE_NAME + '_' + op.output[0] def convert(self, input_nodes, output_nodes): - is_single = len(input_nodes) == 1 and len(output_nodes) == 1 if self.device == 'gpu': - self.add_input_transform(input_nodes, is_single) + self.add_input_transform(input_nodes) for op in self.tf_ops: if self.resolved_ops[op.name] == 1: @@ -892,10 +879,10 @@ class TFConverter(object): raise Exception('Unknown Op: %s, type: %s' % (op.name, op.type)) if self.device == 'gpu': - self.add_output_transform(output_nodes, is_single) + self.add_output_transform(output_nodes) if self.device == 'cpu': - self.replace_in_out_name(input_nodes, output_nodes, is_single) + self.replace_in_out_name(input_nodes, output_nodes) for key in self.resolved_ops: if self.resolved_ops[key] != 1: diff --git a/mace/tools/git/gen_version_source.sh b/mace/tools/git/gen_version_source.sh index 6b17c56c5e5f43a6113806517f59acb15a97a70f..36b0233f1e0edffcefe3079437df45e1cd45bd32 100644 --- a/mace/tools/git/gen_version_source.sh +++ b/mace/tools/git/gen_version_source.sh @@ -25,6 +25,6 @@ cat < ${OUTPUT_FILENAME} // This is a generated file, DO NOT EDIT namespace mace { - const char *MaceGitVersion() { return "${GIT_VERSION}"; } + const char *MaceVersion() { return "${GIT_VERSION}"; } } // namespace mace EOF diff --git a/mace/utils/logging.h b/mace/utils/logging.h index 4a6da3befb985a57d702a1766799722f6271bb36..594381437ea5a9a8261fa540d27c770119921c8f 100644 --- a/mace/utils/logging.h +++ b/mace/utils/logging.h @@ -12,6 +12,7 @@ #include #include "mace/public/mace.h" +#include "mace/public/mace_types.h" #include "mace/utils/env_time.h" #include "mace/utils/string_util.h" diff --git a/tools/mace_tools.py b/tools/mace_tools.py index c9a22f6472e33f8b8245cee9da5796c32d5d5e1d..4f2b209a700439fffd6f466551c0dffceb555805 100644 --- a/tools/mace_tools.py +++ b/tools/mace_tools.py @@ -76,26 +76,28 @@ def generate_random_input(target_soc, model_output_dir, target_soc, model_output_dir, int(generate_data_or_not)) run_command(command) - input_name_list = [] input_file_list = [] - if isinstance(input_names, list): - input_name_list.extend(input_names) - else: - input_name_list.append(input_names) if isinstance(input_files, list): input_file_list.extend(input_files) else: input_file_list.append(input_files) - assert len(input_file_list) == len(input_name_list) - for i in range(len(input_file_list)): - if input_file_list[i] is not None: - dst_input_file = model_output_dir + '/' + input_file_name(input_name_list[i]) - if input_file_list[i].startswith("http://") or \ - input_file_list[i].startswith("https://"): - urllib.urlretrieve(input_file_list[i], dst_input_file) - else: - print 'Copy input data:', dst_input_file - shutil.copy(input_file_list[i], dst_input_file) + if len(input_file_list) != 0: + input_name_list = [] + if isinstance(input_names, list): + input_name_list.extend(input_names) + else: + input_name_list.append(input_names) + if len(input_file_list) != len(input_name_list): + raise Exception('If input_files set, the input files should match the input names.') + for i in range(len(input_file_list)): + if input_file_list[i] is not None: + dst_input_file = model_output_dir + '/' + input_file_name(input_name_list[i]) + if input_file_list[i].startswith("http://") or \ + input_file_list[i].startswith("https://"): + urllib.urlretrieve(input_file_list[i], dst_input_file) + else: + print 'Copy input data:', dst_input_file + shutil.copy(input_file_list[i], dst_input_file) def generate_model_code(): command = "bash tools/generate_model_code.sh"