提交 58a20531 编写于 作者: 刘琦

Merge branch 'load_model_in_pb' into 'master'

load model in pb

See merge request !493
...@@ -16,6 +16,7 @@ cc_library( ...@@ -16,6 +16,7 @@ cc_library(
srcs = ["statistics.cc"], srcs = ["statistics.cc"],
hdrs = ["statistics.h"], hdrs = ["statistics.h"],
linkstatic = 1, linkstatic = 1,
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
deps = [ deps = [
"//mace/kernels", "//mace/kernels",
"//mace/utils", "//mace/utils",
...@@ -27,7 +28,11 @@ cc_binary( ...@@ -27,7 +28,11 @@ cc_binary(
srcs = [ srcs = [
"benchmark_model.cc", "benchmark_model.cc",
], ],
copts = if_android(["-DMACE_ENABLE_OPENCL"]), copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
] + if_android(["-DMACE_ENABLE_OPENCL"]),
linkopts = if_openmp_enabled(["-fopenmp"]), linkopts = if_openmp_enabled(["-fopenmp"]),
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
...@@ -51,6 +56,7 @@ cc_binary( ...@@ -51,6 +56,7 @@ cc_binary(
srcs = ["model_throughput_test.cc"], srcs = ["model_throughput_test.cc"],
linkopts = if_openmp_enabled(["-fopenmp"]), linkopts = if_openmp_enabled(["-fopenmp"]),
linkstatic = 1, linkstatic = 1,
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
deps = [ deps = [
":libmace_merged", ":libmace_merged",
"//external:gflags_nothreads", "//external:gflags_nothreads",
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/public/mace_runtime.h" #include "mace/public/mace_runtime.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
#include "mace/utils/utils.h"
#include "mace/benchmark/statistics.h" #include "mace/benchmark/statistics.h"
#include "mace/codegen/engine/mace_engine_factory.h" #include "mace/codegen/engine/mace_engine_factory.h"
...@@ -189,6 +190,8 @@ DEFINE_string(max_time, "10.0", "length to run max"); ...@@ -189,6 +190,8 @@ DEFINE_string(max_time, "10.0", "length to run max");
DEFINE_int32(warmup_runs, 1, "how many runs to initialize model"); DEFINE_int32(warmup_runs, 1, "how many runs to initialize model");
DEFINE_string(model_data_file, "", DEFINE_string(model_data_file, "",
"model data file name, used when EMBED_MODEL_DATA set to 0"); "model data file name, used when EMBED_MODEL_DATA set to 0");
DEFINE_string(model_file, "",
"model file name, used when load mace model in pb");
DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, -1, "num of openmp threads"); DEFINE_int32(omp_num_threads, -1, "num of openmp threads");
...@@ -202,6 +205,7 @@ int Main(int argc, char **argv) { ...@@ -202,6 +205,7 @@ int Main(int argc, char **argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true); gflags::ParseCommandLineFlags(&argc, &argv, true);
LOG(INFO) << "Model name: [" << FLAGS_model_name << "]"; LOG(INFO) << "Model name: [" << FLAGS_model_name << "]";
LOG(INFO) << "Model_file: " << FLAGS_model_file;
LOG(INFO) << "Device: [" << FLAGS_device << "]"; LOG(INFO) << "Device: [" << FLAGS_device << "]";
LOG(INFO) << "gpu_perf_hint: [" << FLAGS_gpu_perf_hint << "]"; LOG(INFO) << "gpu_perf_hint: [" << FLAGS_gpu_perf_hint << "]";
LOG(INFO) << "gpu_priority_hint: [" << FLAGS_gpu_priority_hint << "]"; LOG(INFO) << "gpu_priority_hint: [" << FLAGS_gpu_priority_hint << "]";
...@@ -268,22 +272,28 @@ int Main(int argc, char **argv) { ...@@ -268,22 +272,28 @@ int Main(int argc, char **argv) {
std::shared_ptr<mace::MaceEngine> engine; std::shared_ptr<mace::MaceEngine> engine;
MaceStatus create_engine_status; MaceStatus create_engine_status;
// Create Engine // Create Engine
if (FLAGS_model_data_file.empty()) { const char *model_data_file_ptr =
FLAGS_model_data_file.empty() ? nullptr : FLAGS_model_data_file.c_str();
if (FLAGS_model_file != "") {
std::vector<unsigned char> model_pb_data;
if (!mace::ReadBinaryFile(&model_pb_data, FLAGS_model_file)) {
LOG(FATAL) << "Failed to read file: " << FLAGS_model_file;
}
create_engine_status = create_engine_status =
CreateMaceEngine(FLAGS_model_name.c_str(), CreateMaceEngineFromProto(model_pb_data,
nullptr, model_data_file_ptr,
input_names, input_names,
output_names, output_names,
device_type, device_type,
&engine); &engine);
} else { } else {
create_engine_status = create_engine_status =
CreateMaceEngine(FLAGS_model_name.c_str(), CreateMaceEngineFromCode(FLAGS_model_name,
FLAGS_model_data_file.c_str(), model_data_file_ptr,
input_names, input_names,
output_names, output_names,
device_type, device_type,
&engine); &engine);
} }
if (create_engine_status != MaceStatus::MACE_SUCCESS) { if (create_engine_status != MaceStatus::MACE_SUCCESS) {
LOG(FATAL) << "Create engine error, please check the arguments"; LOG(FATAL) << "Create engine error, please check the arguments";
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#include <set> #include <set>
#include "mace/kernels/conv_pool_2d_util.h" #include "mace/kernels/conv_pool_2d_util.h"
#include "mace/public/mace_types.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
#include "mace/utils/string_util.h" #include "mace/utils/string_util.h"
...@@ -59,10 +58,10 @@ std::string ShapeToString(const std::vector<OutputShape> &output_shape) { ...@@ -59,10 +58,10 @@ std::string ShapeToString(const std::vector<OutputShape> &output_shape) {
std::stringstream stream; std::stringstream stream;
stream << "["; stream << "[";
for (size_t i = 0; i < output_shape.size(); ++i) { for (size_t i = 0; i < output_shape.size(); ++i) {
const std::vector<index_t> &dims = output_shape[i].dims(); size_t dims_size = output_shape[i].dims_size();
for (size_t j = 0; j < dims.size(); ++j) { for (size_t j = 0; j < dims_size; ++j) {
stream << dims[j]; stream << output_shape[i].dims(j);
if (j != dims.size() - 1) { if (j != dims_size - 1) {
stream << ","; stream << ",";
} }
} }
......
...@@ -10,6 +10,7 @@ cc_library( ...@@ -10,6 +10,7 @@ cc_library(
srcs = glob(["models/*/*.cc"]), srcs = glob(["models/*/*.cc"]),
hdrs = glob(["models/*/*.h"]), hdrs = glob(["models/*/*.h"]),
linkstatic = 1, linkstatic = 1,
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
deps = [ deps = [
"//mace/core", "//mace/core",
"//mace/ops", "//mace/ops",
...@@ -19,24 +20,28 @@ cc_library( ...@@ -19,24 +20,28 @@ cc_library(
cc_library( cc_library(
name = "generated_opencl", name = "generated_opencl",
srcs = glob(["opencl/*.cc"]), srcs = glob(["opencl/*.cc"]),
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
linkstatic = 1, linkstatic = 1,
) )
cc_library( cc_library(
name = "generated_tuning_params", name = "generated_tuning_params",
srcs = ["tuning/tuning_params.cc"], srcs = ["tuning/tuning_params.cc"],
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
linkstatic = 1, linkstatic = 1,
) )
cc_library( cc_library(
name = "generated_version", name = "generated_version",
srcs = ["version/version.cc"], srcs = ["version/version.cc"],
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
linkstatic = 1, linkstatic = 1,
) )
cc_library( cc_library(
name = "generated_mace_engine_factory", name = "generated_mace_engine_factory",
hdrs = ["engine/mace_engine_factory.h"], hdrs = ["engine/mace_engine_factory.h"],
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
deps = [ deps = [
"//mace/public", "//mace/public",
], ],
......
...@@ -43,7 +43,11 @@ cc_library( ...@@ -43,7 +43,11 @@ cc_library(
"runtime/opencl/*.h", "runtime/opencl/*.h",
], ],
)) + if_hexagon_enabled(glob(["runtime/hexagon/*.h"])), )) + if_hexagon_enabled(glob(["runtime/hexagon/*.h"])),
copts = if_openmp_enabled([ copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
] + if_openmp_enabled([
"-fopenmp", "-fopenmp",
"-DMACE_ENABLE_OPENMP", "-DMACE_ENABLE_OPENMP",
]) + if_android([ ]) + if_android([
...@@ -58,8 +62,9 @@ cc_library( ...@@ -58,8 +62,9 @@ cc_library(
"-lm", "-lm",
]), ]),
deps = [ deps = [
"//mace/utils",
"//mace/codegen:generated_version", "//mace/codegen:generated_version",
"//mace/proto:mace_cc",
"//mace/utils",
] + if_android([ ] + if_android([
":opencl_headers", ":opencl_headers",
"//mace/codegen:generated_opencl", "//mace/codegen:generated_opencl",
...@@ -79,6 +84,7 @@ cc_library( ...@@ -79,6 +84,7 @@ cc_library(
hdrs = glob([ hdrs = glob([
"runtime/opencl/*.h", "runtime/opencl/*.h",
]), ]),
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
deps = [ deps = [
"@opencl_clhpp//:opencl_clhpp", "@opencl_clhpp//:opencl_clhpp",
"@opencl_headers//:opencl20_headers", "@opencl_headers//:opencl20_headers",
...@@ -95,6 +101,7 @@ cc_library( ...@@ -95,6 +101,7 @@ cc_library(
hdrs = [ hdrs = [
"testing/test_benchmark.h", "testing/test_benchmark.h",
], ],
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
deps = [ deps = [
":core", ":core",
"//external:gflags_nothreads", "//external:gflags_nothreads",
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
#include <vector> #include <vector>
#include <map> #include <map>
#include "mace/proto/mace.pb.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/public/mace_types.h"
namespace mace { namespace mace {
......
...@@ -12,6 +12,12 @@ ...@@ -12,6 +12,12 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include <memory> #include <memory>
#include "mace/core/net.h" #include "mace/core/net.h"
...@@ -88,7 +94,8 @@ class MaceEngine::Impl { ...@@ -88,7 +94,8 @@ class MaceEngine::Impl {
MaceStatus Init(const NetDef *net_def, MaceStatus Init(const NetDef *net_def,
const std::vector<std::string> &input_nodes, const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes); const std::vector<std::string> &output_nodes,
const unsigned char *model_data);
MaceStatus Run(const std::map<std::string, MaceTensor> &inputs, MaceStatus Run(const std::map<std::string, MaceTensor> &inputs,
std::map<std::string, MaceTensor> *outputs, std::map<std::string, MaceTensor> *outputs,
...@@ -106,7 +113,6 @@ class MaceEngine::Impl { ...@@ -106,7 +113,6 @@ class MaceEngine::Impl {
DISABLE_COPY_AND_ASSIGN(Impl); DISABLE_COPY_AND_ASSIGN(Impl);
}; };
MaceEngine::Impl::Impl(DeviceType device_type) MaceEngine::Impl::Impl(DeviceType device_type)
: op_registry_(new OperatorRegistry()), : op_registry_(new OperatorRegistry()),
device_type_(device_type), device_type_(device_type),
...@@ -120,7 +126,8 @@ MaceEngine::Impl::Impl(DeviceType device_type) ...@@ -120,7 +126,8 @@ MaceEngine::Impl::Impl(DeviceType device_type)
MaceStatus MaceEngine::Impl::Init( MaceStatus MaceEngine::Impl::Init(
const NetDef *net_def, const NetDef *net_def,
const std::vector<std::string> &input_nodes, const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes) { const std::vector<std::string> &output_nodes,
const unsigned char *model_data) {
LOG(INFO) << "MACE version: " << MaceVersion(); LOG(INFO) << "MACE version: " << MaceVersion();
// Set storage path for internal usage // Set storage path for internal usage
for (auto input_name : input_nodes) { for (auto input_name : input_nodes) {
...@@ -141,14 +148,15 @@ MaceStatus MaceEngine::Impl::Init( ...@@ -141,14 +148,15 @@ MaceStatus MaceEngine::Impl::Init(
int dsp_mode = int dsp_mode =
ArgumentHelper::GetSingleArgument<NetDef, int>(*net_def, "dsp_mode", 0); ArgumentHelper::GetSingleArgument<NetDef, int>(*net_def, "dsp_mode", 0);
hexagon_controller_->SetGraphMode(dsp_mode); hexagon_controller_->SetGraphMode(dsp_mode);
MACE_CHECK(hexagon_controller_->SetupGraph(*net_def), MACE_CHECK(hexagon_controller_->SetupGraph(*net_def, model_data),
"hexagon setup graph error"); "hexagon setup graph error");
if (VLOG_IS_ON(2)) { if (VLOG_IS_ON(2)) {
hexagon_controller_->PrintGraph(); hexagon_controller_->PrintGraph();
} }
} else { } else {
#endif #endif
MaceStatus status = ws_->LoadModelTensor(*net_def, device_type_); MaceStatus status =
ws_->LoadModelTensor(*net_def, device_type_, model_data);
if (status != MaceStatus::MACE_SUCCESS) { if (status != MaceStatus::MACE_SUCCESS) {
return status; return status;
} }
...@@ -260,8 +268,9 @@ MaceEngine::~MaceEngine() = default; ...@@ -260,8 +268,9 @@ MaceEngine::~MaceEngine() = default;
MaceStatus MaceEngine::Init(const NetDef *net_def, MaceStatus MaceEngine::Init(const NetDef *net_def,
const std::vector<std::string> &input_nodes, const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes) { const std::vector<std::string> &output_nodes,
return impl_->Init(net_def, input_nodes, output_nodes); const unsigned char *model_data) {
return impl_->Init(net_def, input_nodes, output_nodes, model_data);
} }
MaceStatus MaceEngine::Run(const std::map<std::string, MaceTensor> &inputs, MaceStatus MaceEngine::Run(const std::map<std::string, MaceTensor> &inputs,
...@@ -275,4 +284,68 @@ MaceStatus MaceEngine::Run(const std::map<std::string, MaceTensor> &inputs, ...@@ -275,4 +284,68 @@ MaceStatus MaceEngine::Run(const std::map<std::string, MaceTensor> &inputs,
return impl_->Run(inputs, outputs, nullptr); return impl_->Run(inputs, outputs, nullptr);
} }
const unsigned char *LoadModelData(const std::string &model_data_file,
const size_t &data_size) {
int fd = open(model_data_file.c_str(), O_RDONLY);
MACE_CHECK(fd >= 0, "Failed to open model data file ",
model_data_file, ", error code: ", errno);
const unsigned char *model_data = static_cast<const unsigned char *>(
mmap(nullptr, data_size, PROT_READ, MAP_PRIVATE, fd, 0));
MACE_CHECK(model_data != MAP_FAILED, "Failed to map model data file ",
model_data_file, ", error code: ", errno);
int ret = close(fd);
MACE_CHECK(ret == 0, "Failed to close model data file ",
model_data_file, ", error code: ", errno);
return model_data;
}
void UnloadModelData(const unsigned char *model_data,
const size_t &data_size) {
int ret = munmap(const_cast<unsigned char *>(model_data),
data_size);
MACE_CHECK(ret == 0, "Failed to unmap model data file, error code: ", errno);
}
MaceStatus CreateMaceEngineFromProto(
const std::vector<unsigned char> &model_pb,
const std::string &model_data_file,
const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes,
const DeviceType device_type,
std::shared_ptr<MaceEngine> *engine) {
LOG(INFO) << "Create MaceEngine from model pb";
// load model
if (engine == nullptr) {
return MaceStatus::MACE_INVALID_ARGS;
}
std::shared_ptr<NetDef> net_def(new NetDef());
net_def->ParseFromArray(&model_pb[0], model_pb.size());
index_t model_data_size = 0;
for (auto &const_tensor : net_def->tensors()) {
model_data_size = std::max(
model_data_size,
static_cast<index_t>(const_tensor.offset() +
const_tensor.data_size() *
GetEnumTypeSize(const_tensor.data_type())));
}
MaceStatus status;
const unsigned char *model_data = nullptr;
model_data = LoadModelData(model_data_file, model_data_size);
engine->reset(new mace::MaceEngine(device_type));
status = (*engine)->Init(
net_def.get(), input_nodes, output_nodes, model_data);
if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) {
UnloadModelData(model_data, model_data_size);
}
return status;
}
} // namespace mace } // namespace mace
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <numeric>
#include "mace/public/mace_types.h"
#include "mace/utils/logging.h"
namespace mace {
ConstTensor::ConstTensor(const std::string &name,
const unsigned char *data,
const std::vector<int64_t> &dims,
const DataType data_type,
uint32_t node_id)
: name_(name),
data_(data),
data_size_(std::accumulate(
dims.begin(), dims.end(), 1, std::multiplies<int64_t>())),
dims_(dims.begin(), dims.end()),
data_type_(data_type),
node_id_(node_id) {}
ConstTensor::ConstTensor(const std::string &name,
const unsigned char *data,
const std::vector<int64_t> &dims,
const int data_type,
uint32_t node_id)
: name_(name),
data_(data),
data_size_(std::accumulate(
dims.begin(), dims.end(), 1, std::multiplies<int64_t>())),
dims_(dims.begin(), dims.end()),
data_type_(static_cast<DataType>(data_type)),
node_id_(node_id) {}
const std::string &ConstTensor::name() const { return name_; }
const unsigned char *ConstTensor::data() const { return data_; }
int64_t ConstTensor::data_size() const { return data_size_; }
const std::vector<int64_t> &ConstTensor::dims() const { return dims_; }
DataType ConstTensor::data_type() const { return data_type_; }
uint32_t ConstTensor::node_id() const { return node_id_; }
Argument::Argument() : has_bits_(0) {}
void Argument::CopyFrom(const Argument &from) {
this->name_ = from.name();
this->f_ = from.f();
this->i_ = from.i();
this->s_ = from.s();
auto floats = from.floats();
this->floats_.resize(floats.size());
std::copy(floats.begin(), floats.end(), this->floats_.begin());
auto ints = from.ints();
this->ints_.resize(ints.size());
std::copy(ints.begin(), ints.end(), this->ints_.begin());
auto strings = from.floats();
this->strings_.resize(strings.size());
std::copy(floats.begin(), floats.end(), this->floats_.begin());
this->has_bits_ = from.has_bits_;
}
const std::string &Argument::name() const { return name_; }
void Argument::set_name(const std::string &value) { name_ = value; }
bool Argument::has_f() const { return (has_bits_ & 0x00000001u) != 0; }
void Argument::set_has_f() { has_bits_ |= 0x00000001u; }
float Argument::f() const { return f_; }
void Argument::set_f(float value) {
set_has_f();
f_ = value;
}
bool Argument::has_i() const { return (has_bits_ & 0x00000002u) != 0; }
void Argument::set_has_i() { has_bits_ |= 0x00000002u; }
int64_t Argument::i() const { return i_; }
void Argument::set_i(int64_t value) {
set_has_i();
i_ = value;
}
bool Argument::has_s() const { return (has_bits_ & 0x00000004u) != 0; }
void Argument::set_has_s() { has_bits_ |= 0x00000004u; }
std::string Argument::s() const { return s_; }
void Argument::set_s(const std::string &value) {
set_has_s();
s_ = value;
}
const std::vector<float> &Argument::floats() const { return floats_; }
void Argument::add_floats(float value) { floats_.push_back(value); }
void Argument::set_floats(const std::vector<float> &value) {
floats_.resize(value.size());
std::copy(value.begin(), value.end(), floats_.begin());
}
const std::vector<int64_t> &Argument::ints() const { return ints_; }
void Argument::add_ints(int64_t value) { ints_.push_back(value); }
void Argument::set_ints(const std::vector<int64_t> &value) {
ints_.resize(value.size());
std::copy(value.begin(), value.end(), ints_.begin());
}
const std::vector<std::string> &Argument::strings() const { return strings_; }
void Argument::add_strings(const ::std::string &value) {
strings_.push_back(value);
}
void Argument::set_strings(const std::vector<std::string> &value) {
strings_.resize(value.size());
std::copy(value.begin(), value.end(), strings_.begin());
}
// Node Input
NodeInput::NodeInput(int node_id, int output_port)
: node_id_(node_id), output_port_(output_port) {}
void NodeInput::CopyFrom(const NodeInput &from) {
node_id_ = from.node_id();
output_port_ = from.output_port();
}
int NodeInput::node_id() const { return node_id_; }
void NodeInput::set_node_id(int node_id) { node_id_ = node_id; }
int NodeInput::output_port() const { return output_port_; }
void NodeInput::set_output_port(int output_port) { output_port_ = output_port; }
// OutputShape
OutputShape::OutputShape() {}
OutputShape::OutputShape(const std::vector<int64_t> &dims)
: dims_(dims.begin(), dims.end()) {}
void OutputShape::CopyFrom(const OutputShape &from) {
auto from_dims = from.dims();
dims_.resize(from_dims.size());
std::copy(from_dims.begin(), from_dims.end(), dims_.begin());
}
const std::vector<int64_t> &OutputShape::dims() const { return dims_; }
// Operator Def
void OperatorDef::CopyFrom(const OperatorDef &from) {
name_ = from.name();
type_ = from.type();
auto from_input = from.input();
input_.resize(from_input.size());
std::copy(from_input.begin(), from_input.end(), input_.begin());
auto from_output = from.output();
output_.resize(from_output.size());
std::copy(from_output.begin(), from_output.end(), output_.begin());
auto from_arg = from.arg();
arg_.resize(from_arg.size());
for (size_t i = 0; i < from_arg.size(); ++i) {
arg_[i].CopyFrom(from_arg[i]);
}
auto from_output_shape = from.output_shape();
output_shape_.resize(from_output_shape.size());
for (size_t i = 0; i < from_output_shape.size(); ++i) {
output_shape_[i].CopyFrom(from_output_shape[i]);
}
auto from_data_type = from.output_type();
output_type_.resize(from_data_type.size());
std::copy(from_data_type.begin(), from_data_type.end(), output_type_.begin());
auto mem_ids = from.mem_id();
mem_id_.resize(mem_ids.size());
std::copy(mem_ids.begin(), mem_ids.end(), mem_id_.begin());
// nnlib
node_id_ = from.node_id();
op_id_ = from.op_id();
padding_ = from.padding();
auto from_node_input = from.node_input();
node_input_.resize(from_node_input.size());
for (size_t i = 0; i < from_node_input.size(); ++i) {
node_input_[i].CopyFrom(from_node_input[i]);
}
auto from_out_max_byte_size = from.out_max_byte_size();
out_max_byte_size_.resize(from_out_max_byte_size.size());
std::copy(from_out_max_byte_size.begin(), from_out_max_byte_size.end(),
out_max_byte_size_.begin());
has_bits_ = from.has_bits_;
}
const std::string &OperatorDef::name() const { return name_; }
void OperatorDef::set_name(const std::string &name_) {
set_has_name();
OperatorDef::name_ = name_;
}
bool OperatorDef::has_name() const { return (has_bits_ & 0x00000001u) != 0; }
void OperatorDef::set_has_name() { has_bits_ |= 0x00000001u; }
const std::string &OperatorDef::type() const { return type_; }
void OperatorDef::set_type(const std::string &type_) {
set_has_type();
OperatorDef::type_ = type_;
}
bool OperatorDef::has_type() const { return (has_bits_ & 0x00000002u) != 0; }
void OperatorDef::set_has_type() { has_bits_ |= 0x00000002u; }
const std::vector<int> &OperatorDef::mem_id() const { return mem_id_; }
void OperatorDef::set_mem_id(const std::vector<int> &value) {
mem_id_.resize(value.size());
std::copy(value.begin(), value.end(), mem_id_.begin());
}
uint32_t OperatorDef::node_id() const { return node_id_; }
void OperatorDef::set_node_id(uint32_t node_id) { node_id_ = node_id; }
uint32_t OperatorDef::op_id() const { return op_id_; }
uint32_t OperatorDef::padding() const { return padding_; }
void OperatorDef::set_padding(uint32_t padding) { padding_ = padding; }
const std::vector<NodeInput> &OperatorDef::node_input() const {
return node_input_;
}
void OperatorDef::add_node_input(const NodeInput &value) {
node_input_.push_back(value);
}
const std::vector<int> &OperatorDef::out_max_byte_size() const {
return out_max_byte_size_;
}
void OperatorDef::add_out_max_byte_size(int value) {
out_max_byte_size_.push_back(value);
}
const std::vector<std::string> &OperatorDef::input() const { return input_; }
const std::string &OperatorDef::input(int index) const {
MACE_CHECK(0 <= index && index <= static_cast<int>(input_.size()));
return input_[index];
}
std::string *OperatorDef::add_input() {
input_.push_back("");
return &input_.back();
}
void OperatorDef::add_input(const ::std::string &value) {
input_.push_back(value);
}
void OperatorDef::add_input(::std::string &&value) { input_.push_back(value); }
void OperatorDef::set_input(const std::vector<std::string> &value) {
input_.resize(value.size());
std::copy(value.begin(), value.end(), input_.begin());
}
const std::vector<std::string> &OperatorDef::output() const { return output_; }
const std::string &OperatorDef::output(int index) const {
MACE_CHECK(0 <= index && index <= static_cast<int>(output_.size()));
return output_[index];
}
std::string *OperatorDef::add_output() {
output_.push_back("");
return &output_.back();
}
void OperatorDef::add_output(const ::std::string &value) {
output_.push_back(value);
}
void OperatorDef::add_output(::std::string &&value) {
output_.push_back(value);
}
void OperatorDef::set_output(const std::vector<std::string> &value) {
output_.resize(value.size());
std::copy(value.begin(), value.end(), output_.begin());
}
const std::vector<Argument> &OperatorDef::arg() const { return arg_; }
Argument *OperatorDef::add_arg() {
arg_.emplace_back(Argument());
return &arg_.back();
}
const std::vector<OutputShape> &OperatorDef::output_shape() const {
return output_shape_;
}
void OperatorDef::add_output_shape(const OutputShape &value) {
output_shape_.push_back(value);
}
const std::vector<DataType> &OperatorDef::output_type() const {
return output_type_;
}
void OperatorDef::set_output_type(const std::vector<DataType> &value) {
output_type_.resize(value.size());
std::copy(value.begin(), value.end(), output_type_.begin());
}
// MemoryBlock
MemoryBlock::MemoryBlock(int mem_id, uint32_t x, uint32_t y)
: mem_id_(mem_id), x_(x), y_(y) {}
int MemoryBlock::mem_id() const { return mem_id_; }
uint32_t MemoryBlock::x() const { return x_; }
uint32_t MemoryBlock::y() const { return y_; }
// MemoryArena
const std::vector<MemoryBlock> &MemoryArena::mem_block() const {
return mem_block_;
}
std::vector<MemoryBlock> &MemoryArena::mutable_mem_block() {
return mem_block_;
}
int MemoryArena::mem_block_size() const { return mem_block_.size(); }
// InputInfo
const std::string &InputInfo::name() const { return name_; }
int32_t InputInfo::node_id() const { return node_id_; }
int32_t InputInfo::max_byte_size() const { return max_byte_size_; }
DataType InputInfo::data_type() const { return data_type_; }
const std::vector<int32_t> &InputInfo::dims() const { return dims_; }
// OutputInfo
const std::string &OutputInfo::name() const { return name_; }
int32_t OutputInfo::node_id() const { return node_id_; }
int32_t OutputInfo::max_byte_size() const { return max_byte_size_; }
DataType OutputInfo::data_type() const { return data_type_; }
void OutputInfo::set_data_type(DataType data_type) { data_type_ = data_type; }
const std::vector<int32_t> &OutputInfo::dims() const { return dims_; }
void OutputInfo::set_dims(const std::vector<int32_t> &dims) { dims_ = dims; }
// NetDef
NetDef::NetDef() : has_bits_(0) {}
const std::string &NetDef::name() const { return name_; }
void NetDef::set_name(const std::string &value) {
set_has_name();
name_ = value;
}
bool NetDef::has_name() const { return (has_bits_ & 0x00000001u) != 0; }
void NetDef::set_has_name() { has_bits_ |= 0x00000001u; }
const std::string &NetDef::version() const { return version_; }
void NetDef::set_version(const std::string &value) {
set_has_version();
version_ = value;
}
bool NetDef::has_version() const { return (has_bits_ & 0x00000002u) != 0; }
void NetDef::set_has_version() { has_bits_ |= 0x00000002u; }
const std::vector<OperatorDef> &NetDef::op() const { return op_; }
OperatorDef *NetDef::add_op() {
op_.emplace_back(OperatorDef());
return &op_.back();
}
std::vector<OperatorDef> &NetDef::mutable_op() { return op_; }
const std::vector<Argument> &NetDef::arg() const { return arg_; }
Argument *NetDef::add_arg() {
arg_.emplace_back(Argument());
return &arg_.back();
}
std::vector<Argument> &NetDef::mutable_arg() { return arg_; }
const std::vector<ConstTensor> &NetDef::tensors() const { return tensors_; }
std::vector<ConstTensor> &NetDef::mutable_tensors() { return tensors_; }
const MemoryArena &NetDef::mem_arena() const { return mem_arena_; }
MemoryArena &NetDef::mutable_mem_arena() {
set_has_mem_arena();
return mem_arena_;
}
bool NetDef::has_mem_arena() const { return (has_bits_ & 0x00000004u) != 0; }
void NetDef::set_has_mem_arena() { has_bits_ |= 0x00000004u; }
const std::vector<InputInfo> &NetDef::input_info() const { return input_info_; }
const std::vector<OutputInfo> &NetDef::output_info() const {
return output_info_;
}
std::vector<OutputInfo> &NetDef::mutable_output_info() { return output_info_; }
int NetDef::op_size() const { return op_.size(); }
const OperatorDef &NetDef::op(const int idx) const {
MACE_CHECK(0 <= idx && idx < op_size());
return op_[idx];
}
}; // namespace mace
...@@ -110,7 +110,8 @@ bool SerialNet::Run(RunMetadata *run_metadata) { ...@@ -110,7 +110,8 @@ bool SerialNet::Run(RunMetadata *run_metadata) {
} }
OperatorStats op_stats = {op->debug_def().name(), op->debug_def().type(), OperatorStats op_stats = {op->debug_def().name(), op->debug_def().type(),
op->debug_def().output_shape(), {op->debug_def().output_shape().begin(),
op->debug_def().output_shape().end()},
{strides, padding_type, paddings, dilations, {strides, padding_type, paddings, dilations,
kernels}, call_stats}; kernels}, call_stats};
run_metadata->op_stats.emplace_back(op_stats); run_metadata->op_stats.emplace_back(op_stats);
......
...@@ -25,8 +25,8 @@ ...@@ -25,8 +25,8 @@
#include "mace/core/registry.h" #include "mace/core/registry.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/core/workspace.h" #include "mace/core/workspace.h"
#include "mace/proto/mace.pb.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/public/mace_types.h"
namespace mace { namespace mace {
...@@ -108,20 +108,20 @@ class Operator : public OperatorBase { ...@@ -108,20 +108,20 @@ class Operator : public OperatorBase {
inputs_.push_back(tensor); inputs_.push_back(tensor);
} }
for (size_t i = 0; i < operator_def.output().size(); ++i) { for (int i = 0; i < operator_def.output_size(); ++i) {
const std::string output_str = operator_def.output()[i]; const std::string output_str = operator_def.output(i);
if (ws->HasTensor(output_str)) { if (ws->HasTensor(output_str)) {
outputs_.push_back(ws->GetTensor(output_str)); outputs_.push_back(ws->GetTensor(output_str));
} else { } else {
MACE_CHECK( MACE_CHECK(
operator_def.output_type().size() == 0 operator_def.output_type_size() == 0
|| operator_def.output().size() == operator_def.output_type().size(), || operator_def.output_size() == operator_def.output_type_size(),
"operator output size != operator output type size", "operator output size != operator output type size",
operator_def.output().size(), operator_def.output_size(),
operator_def.output_type().size()); operator_def.output_type_size());
DataType output_type; DataType output_type;
if (i < operator_def.output_type().size()) { if (i < operator_def.output_type_size()) {
output_type = operator_def.output_type()[i]; output_type = operator_def.output_type(i);
} else { } else {
output_type = DataTypeToEnum<T>::v(); output_type = DataTypeToEnum<T>::v();
} }
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "mace/core/macros.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/public/mace_runtime.h" #include "mace/public/mace_runtime.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
...@@ -135,6 +136,7 @@ void SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads, ...@@ -135,6 +136,7 @@ void SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads,
<< ", CPU core IDs: " << MakeString(cpu_ids); << ", CPU core IDs: " << MakeString(cpu_ids);
omp_set_num_threads(omp_num_threads); omp_set_num_threads(omp_num_threads);
#else #else
MACE_UNUSED(omp_num_threads);
LOG(WARNING) << "Set OpenMP threads number failed: OpenMP not enabled."; LOG(WARNING) << "Set OpenMP threads number failed: OpenMP not enabled.";
#endif #endif
......
...@@ -68,7 +68,8 @@ bool HexagonControlWrapper::Finalize() { ...@@ -68,7 +68,8 @@ bool HexagonControlWrapper::Finalize() {
return hexagon_controller_DeInitHexagon() == 0; return hexagon_controller_DeInitHexagon() == 0;
} }
bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) { bool HexagonControlWrapper::SetupGraph(const NetDef &net_def,
unsigned const char *model_data) {
LOG(INFO) << "Hexagon setup graph"; LOG(INFO) << "Hexagon setup graph";
int64_t t0 = NowMicros(); int64_t t0 = NowMicros();
...@@ -96,7 +97,7 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) { ...@@ -96,7 +97,7 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) {
const_node.tensor.dataLen = 0; const_node.tensor.dataLen = 0;
} else { } else {
const_node.tensor.data = const_node.tensor.data =
const_cast<unsigned char *>(const_tensor.data()); const_cast<unsigned char *>(model_data + const_tensor.offset());
const_node.tensor.dataLen = const_tensor.data_size() * const_node.tensor.dataLen = const_tensor.data_size() *
GetEnumTypeSize(const_tensor.data_type()); GetEnumTypeSize(const_tensor.data_type());
} }
...@@ -133,12 +134,12 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) { ...@@ -133,12 +134,12 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) {
for (const OperatorDef &op : net_def.op()) { for (const OperatorDef &op : net_def.op()) {
int op_id = op_map.GetOpId(op.type()); int op_id = op_map.GetOpId(op.type());
inputs.resize(op.node_input().size()); inputs.resize(op.node_input().size());
for (size_t i = 0; i < op.node_input().size(); ++i) { for (int i = 0; i < op.node_input().size(); ++i) {
inputs[i].src_id = node_id(op.node_input()[i].node_id()); inputs[i].src_id = node_id(op.node_input()[i].node_id());
inputs[i].output_idx = op.node_input()[i].output_port(); inputs[i].output_idx = op.node_input()[i].output_port();
} }
outputs.resize(op.out_max_byte_size().size()); outputs.resize(op.out_max_byte_size().size());
for (size_t i = 0; i < op.out_max_byte_size().size(); ++i) { for (int i = 0; i < op.out_max_byte_size().size(); ++i) {
outputs[i].max_size = op.out_max_byte_size()[i]; outputs[i].max_size = op.out_max_byte_size()[i];
} }
cached_inputs.push_back(inputs); cached_inputs.push_back(inputs);
......
...@@ -31,7 +31,7 @@ class HexagonControlWrapper { ...@@ -31,7 +31,7 @@ class HexagonControlWrapper {
bool Config(); bool Config();
bool Init(); bool Init();
bool Finalize(); bool Finalize();
bool SetupGraph(const NetDef &net_def); bool SetupGraph(const NetDef &net_def, const unsigned char *model_data);
bool ExecuteGraph(const Tensor &input_tensor, Tensor *output_tensor); bool ExecuteGraph(const Tensor &input_tensor, Tensor *output_tensor);
bool ExecuteGraphNew(const std::vector<Tensor> &input_tensors, bool ExecuteGraphNew(const std::vector<Tensor> &input_tensors,
std::vector<Tensor> *output_tensors); std::vector<Tensor> *output_tensors);
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#include <cstdint> #include <cstdint>
#include <string> #include <string>
#include "mace/public/mace_types.h" #include "mace/proto/mace.pb.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "include/half.hpp" #include "include/half.hpp"
#endif #endif
......
...@@ -60,24 +60,17 @@ std::vector<std::string> Workspace::Tensors() const { ...@@ -60,24 +60,17 @@ std::vector<std::string> Workspace::Tensors() const {
return names; return names;
} }
MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { MaceStatus Workspace::LoadModelTensor(const NetDef &net_def,
DeviceType type,
const unsigned char *model_data) {
MACE_LATENCY_LOGGER(1, "Load model tensors"); MACE_LATENCY_LOGGER(1, "Load model tensors");
index_t model_data_size = 0; index_t model_data_size = 0;
unsigned char *model_data_ptr = nullptr;
for (auto &const_tensor : net_def.tensors()) {
if (model_data_ptr == nullptr ||
reinterpret_cast<int64_t>(const_tensor.data()) <
reinterpret_cast<int64_t>(model_data_ptr)) {
model_data_ptr = const_cast<unsigned char *>(const_tensor.data());
}
}
for (auto &const_tensor : net_def.tensors()) { for (auto &const_tensor : net_def.tensors()) {
model_data_size = std::max( model_data_size = std::max(
model_data_size, model_data_size,
static_cast<index_t>((reinterpret_cast<int64_t>(const_tensor.data()) - static_cast<index_t>(const_tensor.offset() +
reinterpret_cast<int64_t>(model_data_ptr)) +
const_tensor.data_size() * const_tensor.data_size() *
GetEnumTypeSize(const_tensor.data_type()))); GetEnumTypeSize(const_tensor.data_type())));
} }
VLOG(3) << "Model data size: " << model_data_size; VLOG(3) << "Model data size: " << model_data_size;
...@@ -85,7 +78,7 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { ...@@ -85,7 +78,7 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
if (type == DeviceType::CPU) { if (type == DeviceType::CPU) {
tensor_buffer_ = std::unique_ptr<Buffer>( tensor_buffer_ = std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(type), new Buffer(GetDeviceAllocator(type),
model_data_ptr, const_cast<unsigned char*>(model_data),
model_data_size)); model_data_size));
} else { } else {
tensor_buffer_ = std::unique_ptr<Buffer>( tensor_buffer_ = std::unique_ptr<Buffer>(
...@@ -95,7 +88,8 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { ...@@ -95,7 +88,8 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
return status; return status;
} }
tensor_buffer_->Map(nullptr); tensor_buffer_->Map(nullptr);
tensor_buffer_->Copy(model_data_ptr, 0, model_data_size); tensor_buffer_->Copy(const_cast<unsigned char*>(model_data),
0, model_data_size);
tensor_buffer_->UnMap(); tensor_buffer_->UnMap();
} }
} }
...@@ -111,10 +105,8 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { ...@@ -111,10 +105,8 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
dims.push_back(d); dims.push_back(d);
} }
index_t offset = reinterpret_cast<int64_t>(const_tensor.data())
- reinterpret_cast<int64_t>(model_data_ptr);
std::unique_ptr<Tensor> tensor( std::unique_ptr<Tensor> tensor(
new Tensor(BufferSlice(tensor_buffer_.get(), offset, new Tensor(BufferSlice(tensor_buffer_.get(), const_tensor.offset(),
const_tensor.data_size() * const_tensor.data_size() *
GetEnumTypeSize(const_tensor.data_type())), GetEnumTypeSize(const_tensor.data_type())),
const_tensor.data_type())); const_tensor.data_type()));
......
...@@ -47,7 +47,9 @@ class Workspace { ...@@ -47,7 +47,9 @@ class Workspace {
std::vector<std::string> Tensors() const; std::vector<std::string> Tensors() const;
MaceStatus LoadModelTensor(const NetDef &net_def, DeviceType type); MaceStatus LoadModelTensor(const NetDef &net_def,
DeviceType type,
const unsigned char *model_data);
ScratchBuffer *GetScratchBuffer(DeviceType device_type); ScratchBuffer *GetScratchBuffer(DeviceType device_type);
......
...@@ -6,6 +6,7 @@ cc_binary( ...@@ -6,6 +6,7 @@ cc_binary(
srcs = ["example.cc"], srcs = ["example.cc"],
linkopts = if_openmp_enabled(["-fopenmp"]), linkopts = if_openmp_enabled(["-fopenmp"]),
linkstatic = 1, linkstatic = 1,
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
deps = [ deps = [
"//external:gflags_nothreads", "//external:gflags_nothreads",
"//mace/codegen:generated_models", "//mace/codegen:generated_models",
......
...@@ -120,6 +120,9 @@ DEFINE_string(output_file, ...@@ -120,6 +120,9 @@ DEFINE_string(output_file,
DEFINE_string(model_data_file, DEFINE_string(model_data_file,
"", "",
"model data file name, used when EMBED_MODEL_DATA set to 0"); "model data file name, used when EMBED_MODEL_DATA set to 0");
DEFINE_string(model_file,
"",
"model file name, used when load mace model in pb");
DEFINE_string(device, "GPU", "CPU/GPU/HEXAGON"); DEFINE_string(device, "GPU", "CPU/GPU/HEXAGON");
DEFINE_int32(round, 1, "round"); DEFINE_int32(round, 1, "round");
DEFINE_int32(restart_round, 1, "restart round"); DEFINE_int32(restart_round, 1, "restart round");
...@@ -163,23 +166,31 @@ bool RunModel(const std::vector<std::string> &input_names, ...@@ -163,23 +166,31 @@ bool RunModel(const std::vector<std::string> &input_names,
std::shared_ptr<mace::MaceEngine> engine; std::shared_ptr<mace::MaceEngine> engine;
MaceStatus create_engine_status; MaceStatus create_engine_status;
// Create Engine // Create Engine
if (FLAGS_model_data_file.empty()) { MaceStatus create_engine_status;
// Create Engine
int64_t t0 = NowMicros();
if (FLAGS_model_file != "") {
std::vector<unsigned char> model_pb_data;
if (!mace::ReadBinaryFile(&model_pb_data, FLAGS_model_file)) {
LOG(FATAL) << "Failed to read file: " << FLAGS_model_file;
}
create_engine_status = create_engine_status =
CreateMaceEngine(FLAGS_model_name.c_str(), CreateMaceEngineFromProto(model_pb_data,
nullptr, FLAGS_model_data_file,
input_names, input_names,
output_names, output_names,
device_type, device_type,
&engine); &engine);
} else { } else {
create_engine_status = create_engine_status =
CreateMaceEngine(FLAGS_model_name.c_str(), CreateMaceEngineFromCode(model_name,
FLAGS_model_data_file.c_str(), FLAGS_model_data_file,
input_names, input_names,
output_names, output_names,
device_type, device_type,
&engine); &engine);
} }
if (create_engine_status != MaceStatus::MACE_SUCCESS) { if (create_engine_status != MaceStatus::MACE_SUCCESS) {
LOG(FATAL) << "Create engine error, please check the arguments"; LOG(FATAL) << "Create engine error, please check the arguments";
} }
...@@ -258,6 +269,7 @@ int Main(int argc, char **argv) { ...@@ -258,6 +269,7 @@ int Main(int argc, char **argv) {
LOG(INFO) << "input_file: " << FLAGS_input_file; LOG(INFO) << "input_file: " << FLAGS_input_file;
LOG(INFO) << "output_file: " << FLAGS_output_file; LOG(INFO) << "output_file: " << FLAGS_output_file;
LOG(INFO) << "model_data_file: " << FLAGS_model_data_file; LOG(INFO) << "model_data_file: " << FLAGS_model_data_file;
LOG(INFO) << "model_file: " << FLAGS_model_file;
LOG(INFO) << "device: " << FLAGS_device; LOG(INFO) << "device: " << FLAGS_device;
LOG(INFO) << "round: " << FLAGS_round; LOG(INFO) << "round: " << FLAGS_round;
LOG(INFO) << "restart_round: " << FLAGS_restart_round; LOG(INFO) << "restart_round: " << FLAGS_restart_round;
......
...@@ -38,7 +38,8 @@ cc_library( ...@@ -38,7 +38,8 @@ cc_library(
"opencl/*.h", "opencl/*.h",
"buffer_to_image.h", "buffer_to_image.h",
])), ])),
copts = if_openmp_enabled(["-fopenmp"]) + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"] +
if_openmp_enabled(["-fopenmp"]) +
if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) +
if_android_armv7(["-mfpu=neon"]) + if_android_armv7(["-mfpu=neon"]) +
if_android_armv7(["-mfloat-abi=softfp"]) + if_android_armv7(["-mfloat-abi=softfp"]) +
...@@ -61,7 +62,8 @@ cc_test( ...@@ -61,7 +62,8 @@ cc_test(
"opencl/*_test.cc", "opencl/*_test.cc",
], ],
), ),
copts = if_openmp_enabled(["-fopenmp"]) + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"] +
if_openmp_enabled(["-fopenmp"]) +
if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) +
if_android_armv7(["-mfpu=neon"]) + if_android_armv7(["-mfpu=neon"]) +
if_android_armv7(["-mfloat-abi=softfp"]) + if_android_armv7(["-mfloat-abi=softfp"]) +
......
...@@ -15,6 +15,7 @@ cc_library( ...@@ -15,6 +15,7 @@ cc_library(
hdrs = [ hdrs = [
"ops_test_util.h", "ops_test_util.h",
], ],
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
deps = [ deps = [
"//mace/core", "//mace/core",
"@gtest//:gtest", "@gtest//:gtest",
...@@ -40,7 +41,8 @@ cc_library( ...@@ -40,7 +41,8 @@ cc_library(
["*.h"], ["*.h"],
exclude = ["ops_test_util.h"], exclude = ["ops_test_util.h"],
), ),
copts = if_openmp_enabled(["-fopenmp"]) + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"] +
if_openmp_enabled(["-fopenmp"]) +
if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) +
if_android_armv7(["-mfpu=neon"]) + if_android_armv7(["-mfpu=neon"]) +
if_android_armv7(["-mfloat-abi=softfp"]) + if_android_armv7(["-mfloat-abi=softfp"]) +
...@@ -58,7 +60,8 @@ cc_test( ...@@ -58,7 +60,8 @@ cc_test(
srcs = glob( srcs = glob(
["*_test.cc"], ["*_test.cc"],
), ),
copts = if_openmp_enabled(["-fopenmp"]) + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"] +
if_openmp_enabled(["-fopenmp"]) +
if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) +
if_android_armv7(["-mfpu=neon"]) + if_android_armv7(["-mfpu=neon"]) +
if_android_armv7(["-mfloat-abi=softfp"]) + if_android_armv7(["-mfloat-abi=softfp"]) +
...@@ -77,7 +80,8 @@ cc_test( ...@@ -77,7 +80,8 @@ cc_test(
name = "ops_benchmark", name = "ops_benchmark",
testonly = 1, testonly = 1,
srcs = glob(["*_benchmark.cc"]), srcs = glob(["*_benchmark.cc"]),
copts = if_openmp_enabled(["-fopenmp"]) + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"] +
if_openmp_enabled(["-fopenmp"]) +
if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) +
if_android_armv7(["-mfpu=neon"]) + if_android_armv7(["-mfpu=neon"]) +
if_android_armv7(["-mfloat-abi=softfp"]) + if_android_armv7(["-mfloat-abi=softfp"]) +
......
...@@ -53,7 +53,9 @@ class OpDefBuilder { ...@@ -53,7 +53,9 @@ class OpDefBuilder {
} }
OpDefBuilder &OutputType(const std::vector<DataType> &output_type) { OpDefBuilder &OutputType(const std::vector<DataType> &output_type) {
op_def_.set_output_type(output_type); for (auto out_t : output_type) {
op_def_.add_output_type(out_t);
}
return *this; return *this;
} }
......
...@@ -8,7 +8,9 @@ package( ...@@ -8,7 +8,9 @@ package(
licenses(["notice"]) # Apache 2.0 licenses(["notice"]) # Apache 2.0
load("@com_google_protobuf//:protobuf.bzl", "py_proto_library") load("@com_google_protobuf//:protobuf.bzl",
"py_proto_library",
"cc_proto_library")
py_proto_library( py_proto_library(
name = "mace_py", name = "mace_py",
...@@ -19,11 +21,9 @@ py_proto_library( ...@@ -19,11 +21,9 @@ py_proto_library(
deps = ["@com_google_protobuf//:protobuf_python"], deps = ["@com_google_protobuf//:protobuf_python"],
) )
py_proto_library( cc_proto_library(
name = "caffe_py", name = "mace_cc",
srcs = ["caffe.proto"], srcs = ["mace.proto"],
default_runtime = "@com_google_protobuf//:protobuf_python", default_runtime = "@com_google_protobuf//:protobuf_lite",
protoc = "@com_google_protobuf//:protoc", protoc = "@com_google_protobuf//:protoc",
srcs_version = "PY2AND3",
deps = ["@com_google_protobuf//:protobuf_python"],
) )
...@@ -2,17 +2,13 @@ syntax = "proto2"; ...@@ -2,17 +2,13 @@ syntax = "proto2";
package mace; package mace;
option optimize_for = LITE_RUNTIME;
enum NetMode { enum NetMode {
INIT = 0; INIT = 0;
NORMAL = 1; NORMAL = 1;
} }
enum DeviceType {
CPU = 0; // In default, we will use CPU.
GPU = 2;
HEXAGON = 3;
}
enum DataType { enum DataType {
DT_INVALID = 0; DT_INVALID = 0;
...@@ -32,7 +28,7 @@ enum DataType { ...@@ -32,7 +28,7 @@ enum DataType {
DT_UINT32 = 22; DT_UINT32 = 22;
} }
message TensorProto { message ConstTensor {
// The dimensions in the tensor. // The dimensions in the tensor.
repeated int64 dims = 1; repeated int64 dims = 1;
optional DataType data_type = 2 [default = DT_FLOAT]; optional DataType data_type = 2 [default = DT_FLOAT];
...@@ -52,6 +48,8 @@ message TensorProto { ...@@ -52,6 +48,8 @@ message TensorProto {
repeated int64 int64_data = 10 [packed = true]; repeated int64 int64_data = 10 [packed = true];
// Optionally, a name for the tensor. // Optionally, a name for the tensor.
optional string name = 7; optional string name = 7;
optional int64 offset = 11;
optional int64 data_size = 12;
optional uint32 node_id = 100; optional uint32 node_id = 100;
} }
...@@ -126,7 +124,7 @@ message NetDef { ...@@ -126,7 +124,7 @@ message NetDef {
repeated OperatorDef op = 2; repeated OperatorDef op = 2;
optional string version = 3; optional string version = 3;
repeated Argument arg = 4; repeated Argument arg = 4;
repeated TensorProto tensors = 5; repeated ConstTensor tensors = 5;
// for mem optimization // for mem optimization
optional MemoryArena mem_arena = 10; optional MemoryArena mem_arena = 10;
......
...@@ -12,6 +12,6 @@ cc_library( ...@@ -12,6 +12,6 @@ cc_library(
hdrs = [ hdrs = [
"mace.h", "mace.h",
"mace_runtime.h", "mace_runtime.h",
"mace_types.h",
], ],
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
) )
...@@ -26,10 +26,39 @@ ...@@ -26,10 +26,39 @@
namespace mace { namespace mace {
const char *MaceVersion(); class OutputShape;
class NetDef;
enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3 }; enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3 };
struct CallStats {
int64_t start_micros;
int64_t end_micros;
};
struct ConvPoolArgs {
std::vector<int> strides;
int padding_type;
std::vector<int> paddings;
std::vector<int> dilations;
std::vector<int64_t> kernels;
};
struct OperatorStats {
std::string operator_name;
std::string type;
std::vector<OutputShape> output_shape;
ConvPoolArgs args;
CallStats stats;
};
class RunMetadata {
public:
std::vector<OperatorStats> op_stats;
};
const char *MaceVersion();
enum MaceStatus { enum MaceStatus {
MACE_SUCCESS = 0, MACE_SUCCESS = 0,
MACE_INVALID_ARGS = 1, MACE_INVALID_ARGS = 1,
...@@ -60,9 +89,6 @@ class MaceTensor { ...@@ -60,9 +89,6 @@ class MaceTensor {
std::unique_ptr<Impl> impl_; std::unique_ptr<Impl> impl_;
}; };
class NetDef;
class RunMetadata;
class MaceEngine { class MaceEngine {
public: public:
explicit MaceEngine(DeviceType device_type); explicit MaceEngine(DeviceType device_type);
...@@ -70,7 +96,8 @@ class MaceEngine { ...@@ -70,7 +96,8 @@ class MaceEngine {
MaceStatus Init(const NetDef *net_def, MaceStatus Init(const NetDef *net_def,
const std::vector<std::string> &input_nodes, const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes); const std::vector<std::string> &output_nodes,
const unsigned char *model_data);
MaceStatus Run(const std::map<std::string, MaceTensor> &inputs, MaceStatus Run(const std::map<std::string, MaceTensor> &inputs,
std::map<std::string, MaceTensor> *outputs); std::map<std::string, MaceTensor> *outputs);
...@@ -87,6 +114,14 @@ class MaceEngine { ...@@ -87,6 +114,14 @@ class MaceEngine {
MaceEngine &operator=(const MaceEngine &) = delete; MaceEngine &operator=(const MaceEngine &) = delete;
}; };
MaceStatus CreateMaceEngineFromProto(
const std::vector<unsigned char> &model_pb,
const std::string &model_data_file,
const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes,
const DeviceType device_type,
std::shared_ptr<MaceEngine> *engine);
} // namespace mace } // namespace mace
#endif // MACE_PUBLIC_MACE_H_ #endif // MACE_PUBLIC_MACE_H_
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This file defines data types used by net creation and benchmark tools.
// These APIs are not stable and should only be used by advanced users.
#ifndef MACE_PUBLIC_MACE_TYPES_H_
#define MACE_PUBLIC_MACE_TYPES_H_
#include <string>
#include <vector>
namespace mace {
// Disable the copy and assignment operator for a class.
#ifndef DISABLE_COPY_AND_ASSIGN
#define DISABLE_COPY_AND_ASSIGN(classname) \
private: \
classname(const classname &) = delete; \
classname &operator=(const classname &) = delete
#endif
enum NetMode { INIT = 0, NORMAL = 1 };
enum DataType {
DT_INVALID = 0,
DT_FLOAT = 1,
DT_DOUBLE = 2,
DT_INT32 = 3,
DT_UINT8 = 4,
DT_INT16 = 5,
DT_INT8 = 6,
DT_STRING = 7,
DT_INT64 = 8,
DT_UINT16 = 9,
DT_BOOL = 10,
DT_HALF = 19,
DT_UINT32 = 22
};
class ConstTensor {
public:
ConstTensor(const std::string &name,
const unsigned char *data,
const std::vector<int64_t> &dims,
const DataType data_type = DT_FLOAT,
uint32_t node_id = 0);
ConstTensor(const std::string &name,
const unsigned char *data,
const std::vector<int64_t> &dims,
const int data_type,
uint32_t node_id = 0);
const std::string &name() const;
const unsigned char *data() const;
int64_t data_size() const;
const std::vector<int64_t> &dims() const;
DataType data_type() const;
uint32_t node_id() const;
private:
const std::string name_;
const unsigned char *data_;
const int64_t data_size_;
const std::vector<int64_t> dims_;
const DataType data_type_;
const uint32_t node_id_;
};
class Argument {
public:
Argument();
void CopyFrom(const Argument &from);
public:
const std::string &name() const;
void set_name(const std::string &value);
bool has_f() const;
float f() const;
void set_f(float value);
bool has_i() const;
int64_t i() const;
void set_i(int64_t value);
bool has_s() const;
std::string s() const;
void set_s(const std::string &value);
const std::vector<float> &floats() const;
void add_floats(float value);
void set_floats(const std::vector<float> &value);
const std::vector<int64_t> &ints() const;
void add_ints(int64_t value);
void set_ints(const std::vector<int64_t> &value);
const std::vector<std::string> &strings() const;
void add_strings(const ::std::string &value);
void set_strings(const std::vector<std::string> &value);
private:
void set_has_f();
void set_has_i();
void set_has_s();
private:
std::string name_;
float f_;
int64_t i_;
std::string s_;
std::vector<float> floats_;
std::vector<int64_t> ints_;
std::vector<std::string> strings_;
uint32_t has_bits_;
};
class NodeInput {
public:
NodeInput() {}
NodeInput(int node_id, int output_port);
void CopyFrom(const NodeInput &from);
public:
int node_id() const;
void set_node_id(int node_id);
int output_port() const;
void set_output_port(int output_port);
private:
int node_id_;
int output_port_;
};
class OutputShape {
public:
OutputShape();
OutputShape(const std::vector<int64_t> &dims); // NOLINT(runtime/explicit)
void CopyFrom(const OutputShape &from);
public:
const std::vector<int64_t> &dims() const;
private:
std::vector<int64_t> dims_;
};
class OperatorDef {
public:
void CopyFrom(const OperatorDef &from);
public:
const std::string &name() const;
void set_name(const std::string &name_);
bool has_name() const;
const std::string &type() const;
void set_type(const std::string &type_);
bool has_type() const;
const std::vector<int> &mem_id() const;
void set_mem_id(const std::vector<int> &value);
uint32_t node_id() const;
void set_node_id(uint32_t node_id);
uint32_t op_id() const;
uint32_t padding() const;
void set_padding(uint32_t padding);
const std::vector<NodeInput> &node_input() const;
void add_node_input(const NodeInput &value);
const std::vector<int> &out_max_byte_size() const;
void add_out_max_byte_size(int value);
const std::vector<std::string> &input() const;
const std::string &input(int index) const;
std::string *add_input();
void add_input(const ::std::string &value);
void add_input(::std::string &&value);
void set_input(const std::vector<std::string> &value);
const std::vector<std::string> &output() const;
const std::string &output(int index) const;
std::string *add_output();
void add_output(const ::std::string &value);
void add_output(::std::string &&value);
void set_output(const std::vector<std::string> &value);
const std::vector<Argument> &arg() const;
Argument *add_arg();
const std::vector<OutputShape> &output_shape() const;
void add_output_shape(const OutputShape &value);
const std::vector<DataType> &output_type() const;
void set_output_type(const std::vector<DataType> &value);
private:
void set_has_name();
void set_has_type();
void set_has_mem_id();
private:
std::string name_;
std::string type_;
std::vector<std::string> input_;
std::vector<std::string> output_;
std::vector<Argument> arg_;
std::vector<OutputShape> output_shape_;
std::vector<DataType> output_type_;
std::vector<int> mem_id_;
// nnlib
uint32_t node_id_;
uint32_t op_id_;
uint32_t padding_;
std::vector<NodeInput> node_input_;
std::vector<int> out_max_byte_size_;
uint32_t has_bits_;
};
class MemoryBlock {
public:
MemoryBlock(int mem_id, uint32_t x, uint32_t y);
public:
int mem_id() const;
uint32_t x() const;
uint32_t y() const;
private:
int mem_id_;
uint32_t x_;
uint32_t y_;
};
class MemoryArena {
public:
const std::vector<MemoryBlock> &mem_block() const;
std::vector<MemoryBlock> &mutable_mem_block();
int mem_block_size() const;
private:
std::vector<MemoryBlock> mem_block_;
};
// for hexagon mace-nnlib
class InputInfo {
public:
const std::string &name() const;
int32_t node_id() const;
int32_t max_byte_size() const;
DataType data_type() const;
const std::vector<int32_t> &dims() const;
private:
std::string name_;
int32_t node_id_;
int32_t max_byte_size_; // only support 32-bit len
DataType data_type_;
std::vector<int32_t> dims_;
};
class OutputInfo {
public:
const std::string &name() const;
int32_t node_id() const;
int32_t max_byte_size() const;
DataType data_type() const;
void set_data_type(DataType data_type);
const std::vector<int32_t> &dims() const;
void set_dims(const std::vector<int32_t> &dims);
private:
std::string name_;
int32_t node_id_;
int32_t max_byte_size_; // only support 32-bit len
DataType data_type_;
std::vector<int32_t> dims_;
};
class NetDef {
public:
NetDef();
int op_size() const;
const OperatorDef &op(const int idx) const;
public:
const std::string &name() const;
bool has_name() const;
void set_name(const std::string &value);
const std::string &version() const;
bool has_version() const;
void set_version(const std::string &value);
const std::vector<OperatorDef> &op() const;
OperatorDef *add_op();
std::vector<OperatorDef> &mutable_op();
const std::vector<Argument> &arg() const;
Argument *add_arg();
std::vector<Argument> &mutable_arg();
const std::vector<ConstTensor> &tensors() const;
std::vector<ConstTensor> &mutable_tensors();
const MemoryArena &mem_arena() const;
bool has_mem_arena() const;
MemoryArena &mutable_mem_arena();
const std::vector<InputInfo> &input_info() const;
const std::vector<OutputInfo> &output_info() const;
std::vector<OutputInfo> &mutable_output_info();
private:
void set_has_name();
void set_has_version();
void set_has_mem_arena();
private:
std::string name_;
std::string version_;
std::vector<OperatorDef> op_;
std::vector<Argument> arg_;
std::vector<ConstTensor> tensors_;
// for mem optimization
MemoryArena mem_arena_;
// for hexagon mace-nnlib
std::vector<InputInfo> input_info_;
std::vector<OutputInfo> output_info_;
uint32_t has_bits_;
};
struct CallStats {
int64_t start_micros;
int64_t end_micros;
};
struct ConvPoolArgs {
std::vector<int> strides;
int padding_type;
std::vector<int> paddings;
std::vector<int> dilations;
std::vector<int64_t> kernels;
};
struct OperatorStats {
std::string operator_name;
std::string type;
std::vector<OutputShape> output_shape;
ConvPoolArgs args;
CallStats stats;
};
class RunMetadata {
public:
std::vector<OperatorStats> op_stats;
};
} // namespace mace
#endif // MACE_PUBLIC_MACE_TYPES_H_
...@@ -3,6 +3,7 @@ py_library( ...@@ -3,6 +3,7 @@ py_library(
srcs = [ srcs = [
"convert_util.py", "convert_util.py",
"graph_util.py", "graph_util.py",
"tensor_util.py",
"tf_dsp_converter_lib.py", "tf_dsp_converter_lib.py",
"converter_tool/base_converter.py", "converter_tool/base_converter.py",
"converter_tool/shape_inference.py", "converter_tool/shape_inference.py",
......
...@@ -22,6 +22,7 @@ from mace.proto import mace_pb2 ...@@ -22,6 +22,7 @@ from mace.proto import mace_pb2
from mace.python.tools import tf_dsp_converter_lib from mace.python.tools import tf_dsp_converter_lib
from mace.python.tools import memory_optimizer from mace.python.tools import memory_optimizer
from mace.python.tools import source_converter_lib from mace.python.tools import source_converter_lib
from mace.python.tools import tensor_util
from mace.python.tools.converter_tool import base_converter as cvt from mace.python.tools.converter_tool import base_converter as cvt
from mace.python.tools.converter_tool import tensorflow_converter from mace.python.tools.converter_tool import tensorflow_converter
from mace.python.tools.converter_tool import caffe_converter from mace.python.tools.converter_tool import caffe_converter
...@@ -36,9 +37,9 @@ from mace.python.tools.convert_util import mace_check ...@@ -36,9 +37,9 @@ from mace.python.tools.convert_util import mace_check
FLAGS = None FLAGS = None
device_type_map = {'cpu': mace_pb2.CPU, device_type_map = {'cpu': cvt.DeviceType.CPU.value,
'gpu': mace_pb2.GPU, 'gpu': cvt.DeviceType.GPU.value,
'dsp': mace_pb2.HEXAGON} 'dsp': cvt.DeviceType.HEXAGON.value}
def file_checksum(fname): def file_checksum(fname):
...@@ -129,16 +130,16 @@ def main(unused_args): ...@@ -129,16 +130,16 @@ def main(unused_args):
else: else:
gpu_data_type = mace_pb2.DT_FLOAT gpu_data_type = mace_pb2.DT_FLOAT
device_data_type_map = { device_data_type_map = {
mace_pb2.CPU: mace_pb2.DT_FLOAT, cvt.DeviceType.CPU.value: mace_pb2.DT_FLOAT,
mace_pb2.GPU: gpu_data_type, cvt.DeviceType.GPU.value: gpu_data_type,
mace_pb2.HEXAGON: mace_pb2.DT_UINT8 cvt.DeviceType.HEXAGON.value: mace_pb2.DT_UINT8
} }
print("Transform model to one that can better run on device") print("Transform model to one that can better run on device")
if not FLAGS.runtime: if not FLAGS.runtime:
cpu_graph_def = copy.deepcopy(output_graph_def) cpu_graph_def = copy.deepcopy(output_graph_def)
option.device = mace_pb2.CPU option.device = cvt.DeviceType.CPU.value
option.data_type = device_data_type_map[mace_pb2.CPU] option.data_type = device_data_type_map[cvt.DeviceType.CPU.value]
option.disable_transpose_filters() option.disable_transpose_filters()
mace_cpu_transformer = transformer.Transformer( mace_cpu_transformer = transformer.Transformer(
option, cpu_graph_def) option, cpu_graph_def)
...@@ -147,8 +148,8 @@ def main(unused_args): ...@@ -147,8 +148,8 @@ def main(unused_args):
memory_optimizer.optimize_cpu_memory(cpu_graph_def) memory_optimizer.optimize_cpu_memory(cpu_graph_def)
print "CPU memory optimization done." print "CPU memory optimization done."
option.device = mace_pb2.GPU option.device = cvt.DeviceType.GPU.value
option.data_type = device_data_type_map[mace_pb2.GPU] option.data_type = device_data_type_map[cvt.DeviceType.GPU.value]
option.enable_transpose_filters() option.enable_transpose_filters()
mace_gpu_transformer = transformer.Transformer( mace_gpu_transformer = transformer.Transformer(
option, output_graph_def) option, output_graph_def)
...@@ -179,18 +180,35 @@ def main(unused_args): ...@@ -179,18 +180,35 @@ def main(unused_args):
print "Memory optimization done." print "Memory optimization done."
if FLAGS.output_type == 'source': if FLAGS.obfuscate:
source_converter_lib.convert_to_source( tensor_util.obfuscate_name(output_graph_def)
output_graph_def, model_checksum, weight_checksum, FLAGS.template,
FLAGS.obfuscate, FLAGS.model_tag, FLAGS.output, FLAGS.runtime,
FLAGS.embed_model_data, FLAGS.winograd,
FLAGS.gpu_data_type)
else: else:
with open(FLAGS.output, "wb") as f: tensor_util.rename_tensor(output_graph_def)
tensor_infos, model_data = tensor_util.get_tensor_info_and_model_data(
output_graph_def, FLAGS.runtime, FLAGS.gpu_data_type)
source_converter_lib.convert_to_source(
output_graph_def, model_checksum, weight_checksum, FLAGS.template,
FLAGS.obfuscate, FLAGS.model_tag, FLAGS.codegen_output,
FLAGS.runtime, FLAGS.embed_model_data, FLAGS.winograd,
FLAGS.model_load_type, tensor_infos, model_data)
if not FLAGS.embed_model_data:
output_dir = os.path.dirname(FLAGS.codegen_output) + '/'
with open(output_dir + FLAGS.model_tag + '.data', "wb") as f:
f.write(bytearray(model_data))
if FLAGS.model_load_type == 'pb':
tensor_util.del_tensor_data(
output_graph_def, FLAGS.runtime, FLAGS.gpu_data_type)
tensor_util.update_tensor_data_type(
output_graph_def, FLAGS.runtime, FLAGS.gpu_data_type)
with open(FLAGS.pb_output, "wb") as f:
f.write(output_graph_def.SerializeToString()) f.write(output_graph_def.SerializeToString())
with open(FLAGS.output + '_txt', "wb") as f: # with open(FLAGS.pb_output + '_txt', "wb") as f:
# output_graph_def.ClearField('tensors') # # output_graph_def.ClearField('tensors')
f.write(str(output_graph_def)) # f.write(str(output_graph_def))
print("Model conversion is completed.") print("Model conversion is completed.")
...@@ -226,10 +244,15 @@ def parse_args(): ...@@ -226,10 +244,15 @@ def parse_args():
default="", default="",
help="Weight file sha256 checksum") help="Weight file sha256 checksum")
parser.add_argument( parser.add_argument(
"--output", "--codegen_output",
type=str, type=str,
default="", default="",
help="File to save the output graph to.") help="File to save the output graph to.")
parser.add_argument(
"--pb_output",
type=str,
default="",
help="File to save the mace model to.")
parser.add_argument( parser.add_argument(
"--runtime", type=str, default="", help="Runtime: cpu/gpu/dsp") "--runtime", type=str, default="", help="Runtime: cpu/gpu/dsp")
parser.add_argument( parser.add_argument(
...@@ -239,8 +262,6 @@ def parse_args(): ...@@ -239,8 +262,6 @@ def parse_args():
help="e.g., input_node") help="e.g., input_node")
parser.add_argument( parser.add_argument(
"--output_node", type=str, default="softmax", help="e.g., softmax") "--output_node", type=str, default="softmax", help="e.g., softmax")
parser.add_argument(
"--output_type", type=str, default="pb", help="output type: source/pb")
parser.add_argument( parser.add_argument(
"--template", type=str, default="", help="template path") "--template", type=str, default="", help="template path")
parser.add_argument( parser.add_argument(
...@@ -273,6 +294,12 @@ def parse_args(): ...@@ -273,6 +294,12 @@ def parse_args():
type=str2bool, type=str2bool,
default=True, default=True,
help="embed model data.") help="embed model data.")
parser.add_argument(
"--model_load_type",
type=str,
default="source",
help="[source|pb] Load models in generated `source` code" +
"or `pb` file.")
parser.add_argument( parser.add_argument(
"--gpu_data_type", type=str, default="half", help="half/float") "--gpu_data_type", type=str, default="half", help="half/float")
return parser.parse_known_args() return parser.parse_known_args()
......
...@@ -18,6 +18,12 @@ from enum import Enum ...@@ -18,6 +18,12 @@ from enum import Enum
from mace.proto import mace_pb2 from mace.proto import mace_pb2
class DeviceType(Enum):
CPU = 0
GPU = 2
HEXAGON = 3
class DataFormat(Enum): class DataFormat(Enum):
NHWC = 0 NHWC = 0
NCHW = 1 NCHW = 1
...@@ -199,7 +205,7 @@ class ConverterOption(object): ...@@ -199,7 +205,7 @@ class ConverterOption(object):
self._input_nodes = {} self._input_nodes = {}
self._output_nodes = {} self._output_nodes = {}
self._data_type = mace_pb2.DT_FLOAT self._data_type = mace_pb2.DT_FLOAT
self._device = mace_pb2.CPU self._device = DeviceType.CPU.value
self._winograd_enabled = False self._winograd_enabled = False
self._transformer_option = [ self._transformer_option = [
TransformerRule.REMOVE_USELESS_RESHAPE_OP, TransformerRule.REMOVE_USELESS_RESHAPE_OP,
......
...@@ -18,14 +18,15 @@ import numpy as np ...@@ -18,14 +18,15 @@ import numpy as np
from mace.proto import mace_pb2 from mace.proto import mace_pb2
from mace.python.tools.converter_tool import base_converter from mace.python.tools.converter_tool import base_converter
from mace.python.tools.converter_tool.base_converter import EltwiseType
from mace.python.tools.converter_tool.base_converter import ActivationType from mace.python.tools.converter_tool.base_converter import ActivationType
from mace.python.tools.converter_tool.base_converter import PaddingMode from mace.python.tools.converter_tool.base_converter import ConverterUtil
from mace.python.tools.converter_tool.base_converter import DataFormat from mace.python.tools.converter_tool.base_converter import DataFormat
from mace.python.tools.converter_tool.base_converter import DeviceType
from mace.python.tools.converter_tool.base_converter import EltwiseType
from mace.python.tools.converter_tool.base_converter import FilterFormat from mace.python.tools.converter_tool.base_converter import FilterFormat
from mace.python.tools.converter_tool.base_converter import MaceOp
from mace.python.tools.converter_tool.base_converter import MaceKeyword from mace.python.tools.converter_tool.base_converter import MaceKeyword
from mace.python.tools.converter_tool.base_converter import ConverterUtil from mace.python.tools.converter_tool.base_converter import MaceOp
from mace.python.tools.converter_tool.base_converter import PaddingMode
from mace.python.tools.converter_tool.base_converter import TransformerRule from mace.python.tools.converter_tool.base_converter import TransformerRule
from mace.python.tools.convert_util import mace_check from mace.python.tools.convert_util import mace_check
...@@ -117,7 +118,7 @@ class Transformer(base_converter.ConverterInterface): ...@@ -117,7 +118,7 @@ class Transformer(base_converter.ConverterInterface):
self._producer = {} self._producer = {}
self._target_data_format = DataFormat.NHWC self._target_data_format = DataFormat.NHWC
if self._option.device == mace_pb2.CPU: if self._option.device == DeviceType.CPU.value:
self._target_data_format = DataFormat.NCHW self._target_data_format = DataFormat.NCHW
def run(self): def run(self):
...@@ -491,7 +492,7 @@ class Transformer(base_converter.ConverterInterface): ...@@ -491,7 +492,7 @@ class Transformer(base_converter.ConverterInterface):
net = self._model net = self._model
filter_format = self.filter_format() filter_format = self.filter_format()
if self._option.device == mace_pb2.GPU: if self._option.device == DeviceType.GPU.value:
for op in net.op: for op in net.op:
if op.type == MaceOp.Conv2D.name \ if op.type == MaceOp.Conv2D.name \
and self.check_if_gpu_use_winograd_conv(op): and self.check_if_gpu_use_winograd_conv(op):
...@@ -619,7 +620,7 @@ class Transformer(base_converter.ConverterInterface): ...@@ -619,7 +620,7 @@ class Transformer(base_converter.ConverterInterface):
return False return False
def flatten_atrous_conv(self): def flatten_atrous_conv(self):
if self._option.device != mace_pb2.GPU: if self._option.device != DeviceType.GPU.value:
return return
net = self._model net = self._model
...@@ -871,7 +872,7 @@ class Transformer(base_converter.ConverterInterface): ...@@ -871,7 +872,7 @@ class Transformer(base_converter.ConverterInterface):
op.input[input_idx] = output_name op.input[input_idx] = output_name
def transform_buffer_image(self): def transform_buffer_image(self):
if self._option.device != mace_pb2.GPU: if self._option.device != DeviceType.GPU.value:
return False return False
print("Transform buffer to image") print("Transform buffer to image")
...@@ -997,7 +998,7 @@ class Transformer(base_converter.ConverterInterface): ...@@ -997,7 +998,7 @@ class Transformer(base_converter.ConverterInterface):
def transform_global_conv_to_fc(self): def transform_global_conv_to_fc(self):
"""Transform global conv to fc should be placed after transposing """Transform global conv to fc should be placed after transposing
input/output and filter""" input/output and filter"""
if self._option.device == mace_pb2.GPU: if self._option.device == DeviceType.GPU.value:
return False return False
net = self._model net = self._model
......
...@@ -19,18 +19,21 @@ ...@@ -19,18 +19,21 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "mace/core/macros.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/public/mace_runtime.h" #include "mace/public/mace_runtime.h"
namespace mace { namespace mace {
{% if model_type == 'source' %}
{% for tag in model_tags %} {% for tag in model_tags %}
namespace {{tag}} { namespace {{tag}} {
extern const unsigned char *LoadModelData(const char *model_data_file); extern const unsigned char *LoadModelData(const std::string &model_data_file);
extern void UnloadModelData(const unsigned char *model_data); extern void UnloadModelData(const unsigned char *model_data);
extern NetDef CreateNet(const unsigned char *model_data); extern const std::shared_ptr<NetDef> CreateNet();
extern const std::string ModelName(); extern const std::string ModelName();
extern const std::string ModelChecksum(); extern const std::string ModelChecksum();
...@@ -48,9 +51,9 @@ std::map<std::string, int> model_name_map { ...@@ -48,9 +51,9 @@ std::map<std::string, int> model_name_map {
}; };
} // namespace } // namespace
MaceStatus CreateMaceEngine( MaceStatus CreateMaceEngineFromCode(
const char *model_name, const std::string &model_name,
const char *model_data_file, const std::string &model_data_file,
const std::vector<std::string> &input_nodes, const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes, const std::vector<std::string> &output_nodes,
const DeviceType device_type, const DeviceType device_type,
...@@ -60,16 +63,16 @@ MaceStatus CreateMaceEngine( ...@@ -60,16 +63,16 @@ MaceStatus CreateMaceEngine(
return MaceStatus::MACE_INVALID_ARGS; return MaceStatus::MACE_INVALID_ARGS;
} }
const unsigned char * model_data = nullptr; const unsigned char * model_data = nullptr;
NetDef net_def; std::shared_ptr<NetDef> net_def;
MaceStatus status = MaceStatus::MACE_SUCCESS; MaceStatus status = MaceStatus::MACE_SUCCESS;
switch (model_name_map[model_name]) { switch (model_name_map[model_name]) {
{% for i in range(model_tags |length) %} {% for i in range(model_tags |length) %}
case {{ i }}: case {{ i }}:
model_data = model_data =
mace::{{model_tags[i]}}::LoadModelData(model_data_file); mace::{{model_tags[i]}}::LoadModelData(model_data_file);
net_def = mace::{{model_tags[i]}}::CreateNet(model_data); net_def = mace::{{model_tags[i]}}::CreateNet();
engine->reset(new mace::MaceEngine(device_type)); engine->reset(new mace::MaceEngine(device_type));
status = (*engine)->Init(&net_def, input_nodes, output_nodes); status = (*engine)->Init(net_def.get(), input_nodes, output_nodes, model_data);
if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) { if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) {
mace::{{model_tags[i]}}::UnloadModelData(model_data); mace::{{model_tags[i]}}::UnloadModelData(model_data);
} }
...@@ -81,5 +84,22 @@ MaceStatus CreateMaceEngine( ...@@ -81,5 +84,22 @@ MaceStatus CreateMaceEngine(
return status; return status;
} }
{% else %}
MaceStatus CreateMaceEngineFromCode(
const std::string &model_name,
const std::string &model_data_file,
const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes,
const DeviceType device_type,
std::shared_ptr<MaceEngine> *engine) {
MACE_UNUSED(model_name);
MACE_UNUSED(model_data_file);
MACE_UNUSED(input_nodes);
MACE_UNUSED(output_nodes);
MACE_UNUSED(device_type);
MACE_UNUSED(engine);
return MaceStatus::MACE_INVALID_ARGS;
}
{% endif %}
} // namespace mace } // namespace mace
...@@ -20,7 +20,7 @@ from jinja2 import Environment, FileSystemLoader ...@@ -20,7 +20,7 @@ from jinja2 import Environment, FileSystemLoader
FLAGS = None FLAGS = None
def gen_mace_engine_factory(model_tags, template_dir, output_dir): def gen_mace_engine_factory(model_tags, template_dir, model_type, output_dir):
# Create the jinja2 environment. # Create the jinja2 environment.
j2_env = Environment( j2_env = Environment(
loader=FileSystemLoader(template_dir), trim_blocks=True) loader=FileSystemLoader(template_dir), trim_blocks=True)
...@@ -29,6 +29,7 @@ def gen_mace_engine_factory(model_tags, template_dir, output_dir): ...@@ -29,6 +29,7 @@ def gen_mace_engine_factory(model_tags, template_dir, output_dir):
template_name = 'mace_engine_factory.h.jinja2' template_name = 'mace_engine_factory.h.jinja2'
source = j2_env.get_template(template_name).render( source = j2_env.get_template(template_name).render(
model_tags=model_tags, model_tags=model_tags,
model_type=model_type,
) )
with open(output_dir + '/mace_engine_factory.h', "wb") as f: with open(output_dir + '/mace_engine_factory.h', "wb") as f:
f.write(source) f.write(source)
...@@ -45,11 +46,16 @@ def parse_args(): ...@@ -45,11 +46,16 @@ def parse_args():
parser.add_argument( parser.add_argument(
"--template_dir", type=str, default="", help="template path") "--template_dir", type=str, default="", help="template path")
parser.add_argument( parser.add_argument(
"--output_dir", type=str, default="", help="template path") "--output_dir", type=str, default="", help="output path")
parser.add_argument(
"--model_type",
type=str,
default="",
help="[source|pb] model load type")
return parser.parse_known_args() return parser.parse_known_args()
if __name__ == '__main__': if __name__ == '__main__':
FLAGS, unparsed = parse_args() FLAGS, unparsed = parse_args()
gen_mace_engine_creator(FLAGS.model_tag, FLAGS.template_dir, gen_mace_engine_creator(FLAGS.model_tag, FLAGS.template_dir,
FLAGS.output_dir) FLAGS.model_type, FLAGS.output_dir)
...@@ -17,6 +17,8 @@ ...@@ -17,6 +17,8 @@
#include <vector> #include <vector>
#include <string> #include <string>
#include "mace/core/macros.h"
#include "mace/proto/mace.pb.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/utils/env_time.h" #include "mace/utils/env_time.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
...@@ -24,9 +26,8 @@ ...@@ -24,9 +26,8 @@
namespace mace { namespace mace {
namespace {{tag}} { namespace {{tag}} {
{% for tensor in tensors %} {% for i in range(net.tensors|length) %}
extern void CreateTensor{{ tensor.id }}(std::vector<mace::ConstTensor> *tensors, extern void CreateTensor{{ i }}(mace::ConstTensor *tensor);
const unsigned char *model_data);
{% endfor %} {% endfor %}
...@@ -39,116 +40,119 @@ extern void CreateOperator{{i}}(mace::OperatorDef *op); ...@@ -39,116 +40,119 @@ extern void CreateOperator{{i}}(mace::OperatorDef *op);
namespace { namespace {
{% if net.arg|length != 0 %} {% if net.arg|length != 0 %}
void CreateNetArg(mace::NetDef &net_def) { void CreateNetArg(NetDef *net_def) {
net_def.mutable_arg().reserve({{ net.arg|length }}); net_def->mutable_arg()->Reserve({{ net.arg|length }});
mace::Argument *arg = nullptr; mace::Argument *arg = nullptr;
{% for arg in net.arg %} {% for i in range(net.arg|length) %}
arg = net_def.add_arg(); arg = net_def->add_arg();
arg->set_name({{ arg.name|tojson }}); arg->set_name({{ net.arg[i].name|tojson }});
{%- if arg.HasField('f') %} {%- if net.arg[i].HasField('f') %}
arg->set_f({{ arg.f }}); arg->set_f({{ net.arg[i].f }});
{% endif %} {% endif %}
{%- if arg.HasField('i') %} {%- if net.arg[i].HasField('i') %}
arg->set_i({{ arg.i }}); arg->set_i({{ net.arg[i].i }});
{% endif %} {% endif %}
{%- if arg.HasField('s') %} {%- if net.arg[i].HasField('s') %}
arg->set_s({{ arg.s|tojson }}); arg->set_s({{ net.arg[i].s|tojson }});
{% endif %} {% endif %}
{% if arg.floats|length != 0 %} arg->mutable_floats()->Reserve({{ net.arg[i].floats|length }});
arg->set_floats({ {{ arg.floats|join(', ') }} }); {% for float_value in net.arg[i].floats %}
{% endif %} arg->add_floats({{ float_value }});
{% if arg.ints|length != 0 %} {% endfor %}
arg->set_ints({ {{ arg.ints|join(', ') }} }); arg->mutable_ints()->Reserve({{ net.arg[i].ints|length }});
{% endif %} {% for int_value in net.arg[i].ints %}
{% if arg.strings|length != 0 %} arg->add_ints({{ int_value }});
arg->set_strings({ {{ arg.strings|stringfy() }} }); {% endfor %}
{% endif %} arg->mutable_strings()->Reserve({{ net.arg[i].strings|length }});
{% for str_value in net.arg[i].strings %}
arg->add_strings({{ str_value }});
{% endfor %}
{% endfor %} {% endfor %}
} }
{% endif %} {% endif %}
{% if net.output_info | length > 0 %} {% if net.output_info | length > 0 %}
void CreateOutputInfo(mace::NetDef &net_def) { void CreateOutputInfo(NetDef *net_def) {
std::vector<std::vector<int>> dims { {{net.output_info | map(attribute='dims') | join(', ') | replace('[', '{') | replace(']', '}') }} }; std::vector<std::vector<int>> dims { {{net.output_info | map(attribute='dims') | join(', ') | replace('[', '{') | replace(']', '}') }} };
std::vector<int> data_types_int { {{ net.output_info | map(attribute='data_type') | join(', ') }} }; std::vector<int> data_types_int { {{ net.output_info | map(attribute='data_type') | join(', ') }} };
std::vector<mace::DataType> data_types({{ net.output_info | length }}); std::vector<mace::DataType> data_types({{ net.output_info | length }});
for (int k = 0; k < {{ net.output_info | length }}; ++k) { for (int k = 0; k < {{ net.output_info | length }}; ++k) {
data_types[k] = static_cast<mace::DataType>(data_types_int[k]); data_types[k] = static_cast<mace::DataType>(data_types_int[k]);
} }
net_def.mutable_output_info().resize({{ net.output_info | length }}); net_def->mutable_output_info()->Reserve({{ net.output_info | length }});
for (int i = 0; i < {{ net.output_info | length }}; ++i) { for (int i = 0; i < {{ net.output_info | length }}; ++i) {
net_def.mutable_output_info()[i].set_data_type(data_types[i]); auto output_info = net_def->add_output_info();
net_def.mutable_output_info()[i].set_dims(dims[i]); output_info->set_data_type(data_types[i]);
output_info->mutable_dims()->Reserve(dims[i].size());
for (size_t j = 0; j < dims[i].size(); ++j) {
output_info->add_dims(dims[i][j]);
}
} }
} }
{% endif %} {% endif %}
void CreateOperators(std::vector<mace::OperatorDef> *ops) { void CreateOperators(NetDef *net_def) {
MACE_LATENCY_LOGGER(1, "Create operators"); MACE_LATENCY_LOGGER(1, "Create operators");
ops->resize({{ net.op|length }}); net_def->mutable_op()->Reserve({{ net.op|length }});
{% for i in range(net.op|length) %} {% for i in range(net.op|length) %}
mace::{{tag}}::CreateOperator{{i}}(&ops->at({{i}})); mace::{{tag}}::CreateOperator{{i}}(net_def->add_op());
{% endfor %} {% endfor %}
} }
void CreateTensors(std::vector<mace::ConstTensor> *tensors, void CreateTensors(NetDef *net_def) {
const unsigned char *model_data) {
MACE_LATENCY_LOGGER(1, "Create tensors"); MACE_LATENCY_LOGGER(1, "Create tensors");
tensors->reserve({{ net.tensors|length }});
{% for tensor in tensors %} net_def->mutable_tensors()->Reserve({{ net.tensors|length }});
mace::{{tag}}::CreateTensor{{tensor.id}}(tensors, model_data); {% for i in range(net.tensors|length) %}
mace::{{tag}}::CreateTensor{{ i }}(net_def->add_tensors());
{% endfor %} {% endfor %}
} }
{% if net.mem_arena.mem_block|length != 0 %} {% if net.mem_arena.mem_block|length != 0 %}
void CreateMemoryArena(mace::MemoryArena *mem_arena) { void CreateMemoryArena(mace::MemoryArena *mem_arena) {
std::vector<mace::MemoryBlock> &mem_block = mem_arena->mutable_mem_block(); mem_arena->mutable_mem_block()->Reserve({{ net.mem_arena.mem_block|length }});
mem_block.reserve({{ net.mem_arena.mem_block|length }}); {% for i in range(net.mem_arena.mem_block|length) %}
{% for mem_blk in net.mem_arena.mem_block %} mace::MemoryBlock* mem_block{{i}} = mem_arena->add_mem_block();
mem_block.emplace_back(mace::MemoryBlock({{ mem_blk.mem_id }}, mem_block{{i}}->set_mem_id({{net.mem_arena.mem_block[i].mem_id}});
{{mem_blk.x}}, mem_block{{i}}->set_x({{net.mem_arena.mem_block[i].x}});
{{mem_blk.y}})); mem_block{{i}}->set_y({{net.mem_arena.mem_block[i].y}});
{% endfor %}
{% endfor %}
} }
{% endif %} {% endif %}
} // namespace } // namespace
namespace {{tag}} { namespace {{tag}} {
NetDef CreateNet(const unsigned char *model_data) { const std::shared_ptr<NetDef> CreateNet() {
MACE_LATENCY_LOGGER(1, "Create net {{ net.name }}"); MACE_LATENCY_LOGGER(1, "Create net {{ net.name }}");
NetDef net_def;
net_def.set_name("{{ net.name}}");
net_def.set_version("{{ net.version }}");
{% if net.arg|length != 0 %}
CreateNetArg(net_def);
{% endif %}
CreateOperators(&net_def.mutable_op()); std::shared_ptr<NetDef> net_def(new NetDef());
net_def->set_name("{{ net.name}}");
net_def->set_version("{{ net.version }}");
CreateTensors(&net_def.mutable_tensors(), model_data); CreateOperators(net_def.get());
CreateTensors(net_def.get());
{% if net.arg|length != 0 %}
CreateNetArg(net_def.get());
{% endif %}
{% if net.mem_arena.mem_block|length != 0 %} {% if net.mem_arena.mem_block|length != 0 %}
CreateMemoryArena(&net_def.mutable_mem_arena()); CreateMemoryArena(net_def->mutable_mem_arena());
{% endif %} {% endif %}
{% if net.output_info | length > 0 %} {% if net.output_info | length > 0 %}
CreateOutputInfo(net_def); CreateOutputInfo(net_def.get());
{% endif %} {% endif %}
return net_def; return net_def;
......
...@@ -24,11 +24,12 @@ ...@@ -24,11 +24,12 @@
namespace mace { namespace mace {
namespace {{tag}} { namespace {{tag}} {
const unsigned char *LoadModelData(const char *model_data_file);
const unsigned char *LoadModelData(const std::string &model_data_file);
void UnloadModelData(const unsigned char *model_data); void UnloadModelData(const unsigned char *model_data);
NetDef CreateNet(const unsigned char *model_data); const std::shared_ptr<NetDef> CreateNet();
const std::string ModelName(); const std::string ModelName();
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <vector> #include <vector>
#include <string> #include <string>
#include "mace/proto/mace.pb.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/utils/env_time.h" #include "mace/utils/env_time.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
...@@ -34,11 +35,24 @@ void UpdateOp(mace::OperatorDef *op, ...@@ -34,11 +35,24 @@ void UpdateOp(mace::OperatorDef *op,
const std::vector<int> &mem_ids) { const std::vector<int> &mem_ids) {
op->set_name(name); op->set_name(name);
op->set_type(type); op->set_type(type);
op->set_input(inputs);
op->set_output(outputs);
op->set_output_type(output_types);
op->set_node_id(node_id); op->set_node_id(node_id);
op->set_mem_id(mem_ids);
op->mutable_input()->Reserve(inputs.size());
for (auto input : inputs) {
op->add_input(input);
}
op->mutable_output()->Reserve(outputs.size());
for (auto output : outputs) {
op->add_output(output);
}
op->mutable_output_type()->Reserve(output_types.size());
for (auto output_type : output_types) {
op->add_output_type(output_type);
}
op->mutable_mem_id()->Reserve(mem_ids.size());
for (auto mem_id : mem_ids) {
op->add_mem_id(mem_id);
}
} }
} // namespace } // namespace
...@@ -53,6 +67,7 @@ void CreateOperator{{i}}(mace::OperatorDef *op) { ...@@ -53,6 +67,7 @@ void CreateOperator{{i}}(mace::OperatorDef *op) {
MACE_LATENCY_LOGGER(2, "Create operator {{ net.op[i].name }}"); MACE_LATENCY_LOGGER(2, "Create operator {{ net.op[i].name }}");
mace::Argument *arg = nullptr; mace::Argument *arg = nullptr;
op->mutable_arg()->Reserve({{ net.op[i].arg|length }});
{% for arg in net.op[i].arg %} {% for arg in net.op[i].arg %}
arg = op->add_arg(); arg = op->add_arg();
...@@ -68,20 +83,32 @@ void CreateOperator{{i}}(mace::OperatorDef *op) { ...@@ -68,20 +83,32 @@ void CreateOperator{{i}}(mace::OperatorDef *op) {
arg->set_s({{ arg.s|tojson }}); arg->set_s({{ arg.s|tojson }});
{%- endif %} {%- endif %}
{% if arg.floats|length != 0 %} arg->mutable_floats()->Reserve({{ arg.floats|length }});
arg->set_floats({ {{ arg.floats|join(', ') }} }); {% for float_value in arg.floats %}
{% endif %} arg->add_floats({{ float_value }});
{% if arg.ints|length != 0 %} {% endfor %}
arg->set_ints({ {{ arg.ints|join(', ') }} }); arg->mutable_ints()->Reserve({{ arg.ints|length }});
{% endif %} {% for int_value in arg.ints %}
{% if arg.strings|length != 0 %} arg->add_ints({{ int_value }});
arg->set_strings({ {{ arg.strings|stringfy() }} }); {% endfor %}
{% endif %} arg->mutable_strings()->Reserve({{ arg.strings|length }});
{% for str_value in arg.strings %}
arg->add_strings({{ str_value }});
{% endfor %}
{% endfor %} {% endfor %}
op->mutable_output_shape()->Reserve({{ net.op[i].output_shape|length }});
mace::OutputShape * output_shape = nullptr;
{% for shape in net.op[i].output_shape %} {% for shape in net.op[i].output_shape %}
{% if shape.dims | length > 0 %} {% if shape.dims|length > 0 %}
op->add_output_shape(mace::OutputShape({ {{ shape.dims|join(', ') }} })); output_shape = op->add_output_shape();
output_shape->mutable_dims()->Reserve({{ shape.dims|length }});
{% for dim in shape.dims %}
output_shape->add_dims({{ dim }});
{% endfor %}
{% endif %} {% endif %}
{% endfor %} {% endfor %}
...@@ -103,14 +130,19 @@ void CreateOperator{{i}}(mace::OperatorDef *op) { ...@@ -103,14 +130,19 @@ void CreateOperator{{i}}(mace::OperatorDef *op) {
std::vector<int> input_node_ids({ {{ net.op[i].node_input | map(attribute='node_id') | join(', ') }} }); std::vector<int> input_node_ids({ {{ net.op[i].node_input | map(attribute='node_id') | join(', ') }} });
std::vector<int> input_output_ports({ {{ net.op[i].node_input | map(attribute='output_port') | join(', ')}} }); std::vector<int> input_output_ports({ {{ net.op[i].node_input | map(attribute='output_port') | join(', ')}} });
for (size_t i = 0; i < {{ net.op[i].node_input | length }}; ++i) { mace::NodeInput *node_input = nullptr;
mace::NodeInput input(input_node_ids[i], input_output_ports[i]); op->mutable_node_input()->Reserve({{ net.op[i].node_input|length }});
op->add_node_input(input); for (size_t i = 0; i < {{ net.op[i].node_input|length }}; ++i) {
node_input = op->add_node_input();
node_input->set_node_id(input_node_ids[i]);
node_input->set_output_port(input_output_ports[i]);
} }
{% endif %} {% endif %}
{% if net.op[i].out_max_byte_size | length > 0 %} {% if net.op[i].out_max_byte_size | length > 0 %}
std::vector<int> out_max_byte_sizes {{ net.op[i].out_max_byte_size | replace('[', '{') | replace(']', '}') }}; std::vector<int> out_max_byte_sizes {{ net.op[i].out_max_byte_size | replace('[', '{') | replace(']', '}') }};
for (size_t i = 0; i < {{ net.op[i].out_max_byte_size | length }}; ++i) { op->mutable_out_max_byte_size()->Reserve({{ net.op[i].out_max_byte_size|length }});
for (size_t i = 0; i < {{ net.op[i].out_max_byte_size|length }}; ++i) {
op->add_out_max_byte_size(out_max_byte_sizes[i]); op->add_out_max_byte_size(out_max_byte_sizes[i]);
} }
{% endif %} {% endif %}
......
...@@ -14,122 +14,10 @@ ...@@ -14,122 +14,10 @@
import datetime import datetime
import os import os
import uuid
import numpy as np
import hashlib
from mace.proto import mace_pb2 from mace.proto import mace_pb2
from jinja2 import Environment, FileSystemLoader from jinja2 import Environment, FileSystemLoader
GENERATED_NAME = set()
def generate_obfuscated_name(namespace, name):
md5 = hashlib.md5()
md5.update(namespace)
md5.update(name)
md5_digest = md5.hexdigest()
name = md5_digest[:8]
while name in GENERATED_NAME:
name = md5_digest
assert name not in GENERATED_NAME
GENERATED_NAME.add(name)
return name
def generate_tensor_map(tensors):
tensor_map = {}
for t in tensors:
if t.name not in tensor_map:
tensor_map[t.name] = generate_obfuscated_name("tensor", t.name)
return tensor_map
def generate_in_out_map(ops, tensor_map):
in_out_map = {}
for op in ops:
op.name = generate_obfuscated_name("op", op.name)
for input_name in op.input:
if input_name not in in_out_map:
if input_name in tensor_map:
in_out_map[input_name] = tensor_map[input_name]
else:
in_out_map[input_name] = generate_obfuscated_name(
"in", input_name)
for output_name in op.output:
if output_name not in in_out_map:
if output_name in tensor_map:
in_out_map[output_name] = tensor_map[output_name]
else:
in_out_map[output_name] = generate_obfuscated_name(
"out", output_name)
return in_out_map
def obfuscate_name(net_def):
input_node = "mace_input_node"
output_node = "mace_output_node"
tensor_map = generate_tensor_map(net_def.tensors)
in_out_map = generate_in_out_map(net_def.op, tensor_map)
for t in net_def.tensors:
if input_node not in t.name and output_node not in t.name:
t.name = tensor_map[t.name]
for op in net_def.op:
for i in range(len(op.input)):
if input_node not in op.input[i]:
op.input[i] = in_out_map[op.input[i]]
for i in range(len(op.output)):
if output_node not in op.output[i]:
op.output[i] = in_out_map[op.output[i]]
def normalize_op_name(op_name):
idx = op_name.rfind(':')
if idx == -1:
return op_name
else:
return op_name[:idx]
def rename_tensor(net_def):
tensor_map = {}
for t in net_def.tensors:
if t.name not in tensor_map:
tensor_map[t.name] = "_" + normalize_op_name(t.name).replace("/",
"_")
t.name = tensor_map[t.name]
for op in net_def.op:
for i in range(len(op.input)):
if op.input[i] in tensor_map:
op.input[i] = tensor_map[op.input[i]]
for i in range(len(op.output)):
if op.output[i] in tensor_map:
op.output[i] = tensor_map[op.output[i]]
class TensorInfo:
def __init__(self, id, t, runtime, gpu_data_type):
self.id = id
self.data_type = mace_pb2.DataType.Name(t.data_type)
if t.data_type == mace_pb2.DT_FLOAT:
if runtime == 'gpu' and gpu_data_type == 'half':
self.data_type = mace_pb2.DT_HALF
self.data = bytearray(
np.array(t.float_data).astype(np.float16).tobytes())
else:
self.data_type = mace_pb2.DT_FLOAT
self.data = bytearray(
np.array(t.float_data).astype(np.float32).tobytes())
elif t.data_type == mace_pb2.DT_INT32:
self.data = bytearray(
np.array(t.int32_data).astype(np.int32).tobytes())
elif t.data_type == mace_pb2.DT_UINT8:
self.data = bytearray(
np.array(t.int32_data).astype(np.uint8).tolist())
else:
raise Exception('Tensor data type %s not supported' % t.data_type)
def stringfy(value): def stringfy(value):
return ', '.join('"{0}"'.format(w) for w in value) return ', '.join('"{0}"'.format(w) for w in value)
...@@ -137,12 +25,8 @@ def stringfy(value): ...@@ -137,12 +25,8 @@ def stringfy(value):
def convert_to_source(net_def, model_checksum, weight_checksum, template_dir, def convert_to_source(net_def, model_checksum, weight_checksum, template_dir,
obfuscate, model_tag, output, runtime, embed_model_data, obfuscate, model_tag, output, runtime, embed_model_data,
winograd_conv, gpu_data_type): winograd_conv, model_load_type, tensor_infos,
if obfuscate: model_data):
obfuscate_name(net_def)
else:
rename_tensor(net_def)
# Capture our current directory # Capture our current directory
print template_dir print template_dir
...@@ -153,82 +37,63 @@ def convert_to_source(net_def, model_checksum, weight_checksum, template_dir, ...@@ -153,82 +37,63 @@ def convert_to_source(net_def, model_checksum, weight_checksum, template_dir,
output_dir = os.path.dirname(output) + '/' output_dir = os.path.dirname(output) + '/'
# generate tensor source files # generate tensor source files
template_name = 'tensor_source.jinja2' template_name = 'tensor_source.jinja2'
model_data = [] for i in range(len(net_def.tensors)):
offset = 0 if model_load_type == 'source':
counter = 0 source = j2_env.get_template(template_name).render(
for t in net_def.tensors: tensor_info=tensor_infos[i],
tensor_info = TensorInfo(counter, t, runtime, gpu_data_type) tensor=net_def.tensors[i],
# align tag=model_tag,
if tensor_info.data_type != 'DT_UINT8' and offset % 4 != 0: )
padding = 4 - offset % 4 with open(output_dir + 'tensor' + str(i) + '.cc', "wb") as f:
model_data.extend(bytearray([0] * padding)) f.write(source)
offset += padding
if model_load_type == 'source':
# generate tensor data
template_name = 'tensor_data.jinja2'
source = j2_env.get_template(template_name).render( source = j2_env.get_template(template_name).render(
tensor_info=tensor_info,
tensor=t,
tag=model_tag, tag=model_tag,
offset=offset, embed_model_data=embed_model_data,
) model_data_size=len(model_data),
model_data.extend(tensor_info.data) model_data=model_data)
offset += len(tensor_info.data) with open(output_dir + 'tensor_data' + '.cc', "wb") as f:
with open(output_dir + 'tensor' + str(counter) + '.cc', "wb") as f:
f.write(source) f.write(source)
counter += 1
# generate tensor data
template_name = 'tensor_data.jinja2'
source = j2_env.get_template(template_name).render(
tag=model_tag,
embed_model_data=embed_model_data,
model_data_size=offset,
model_data=model_data)
with open(output_dir + 'tensor_data' + '.cc', "wb") as f:
f.write(source)
if not embed_model_data:
with open(output_dir + model_tag + '.data', "wb") as f:
f.write(bytearray(model_data))
# generate op source files # generate op source files
template_name = 'operator.jinja2' template_name = 'operator.jinja2'
counter = 0 counter = 0
op_size = len(net_def.op) op_size = len(net_def.op)
for start in range(0, op_size, 10): for start in range(0, op_size, 10):
source = j2_env.get_template(template_name).render(
start=start,
end=min(start + 10, op_size),
net=net_def,
tag=model_tag,
runtime=runtime,
)
with open(output_dir + 'op' + str(counter) + '.cc', "wb") as f:
f.write(source)
counter += 1
# generate model source files
build_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
template_name = 'model.jinja2'
checksum = model_checksum
if weight_checksum is not None:
checksum = "{},{}".format(model_checksum, weight_checksum)
source = j2_env.get_template(template_name).render( source = j2_env.get_template(template_name).render(
start=start,
end=min(start + 10, op_size),
net=net_def, net=net_def,
tag=model_tag, tag=model_tag,
runtime=runtime, runtime=runtime,
) obfuscate=obfuscate,
with open(output_dir + 'op' + str(counter) + '.cc', "wb") as f: embed_model_data=embed_model_data,
winograd_conv=winograd_conv,
checksum=checksum,
build_time=build_time)
with open(output, "wb") as f:
f.write(source) f.write(source)
counter += 1
# generate model source files # generate model header file
build_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') template_name = 'model_header.jinja2'
template_name = 'model.jinja2' source = j2_env.get_template(template_name).render(tag=model_tag, )
tensors = [ with open(output_dir + model_tag + '.h', "wb") as f:
TensorInfo(i, net_def.tensors[i], runtime, gpu_data_type) f.write(source)
for i in range(len(net_def.tensors))
]
checksum = model_checksum
if weight_checksum is not None:
checksum = "{},{}".format(model_checksum, weight_checksum)
source = j2_env.get_template(template_name).render(
tensors=tensors,
net=net_def,
tag=model_tag,
runtime=runtime,
obfuscate=obfuscate,
embed_model_data=embed_model_data,
winograd_conv=winograd_conv,
checksum=checksum,
build_time=build_time)
with open(output, "wb") as f:
f.write(source)
# generate model header file
template_name = 'model_header.jinja2'
source = j2_env.get_template(template_name).render(tag=model_tag, )
with open(output_dir + model_tag + '.h', "wb") as f:
f.write(source)
...@@ -22,17 +22,13 @@ ...@@ -22,17 +22,13 @@
#include "mace/utils/env_time.h" #include "mace/utils/env_time.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
{% if not embed_model_data %} namespace mace {
#include <errno.h> extern const unsigned char *LoadModelData(const std::string &model_data_file,
#include <fcntl.h> const size_t &data_size);
#include <string.h> extern void UnloadModelData(const unsigned char *model_data,
#include <sys/mman.h> const size_t &data_size);
#include <unistd.h>
{% endif %}
namespace mace {
namespace {{tag}} { namespace {{tag}} {
{% if embed_model_data %} {% if embed_model_data %}
...@@ -41,34 +37,18 @@ alignas(4) const unsigned char model_data[{{ model_data_size }}] = { ...@@ -41,34 +37,18 @@ alignas(4) const unsigned char model_data[{{ model_data_size }}] = {
}; };
{% endif %} {% endif %}
const unsigned char *LoadModelData(const char *model_data_file) { const unsigned char *LoadModelData(const std::string &model_data_file) {
{% if embed_model_data %} {% if embed_model_data %}
MACE_UNUSED(model_data_file); MACE_UNUSED(model_data_file);
return model_data; return model_data;
{% else %} {% else %}
int fd = open(model_data_file, O_RDONLY); return mace::LoadModelData(model_data_file, {{ model_data_size }});
MACE_CHECK(fd >= 0, "Failed to open model data file ",
model_data_file, ", error code: ", errno);
const unsigned char *model_data =
static_cast<const unsigned char *>(mmap(nullptr, {{ model_data_size }},
PROT_READ, MAP_PRIVATE, fd, 0));
MACE_CHECK(model_data != MAP_FAILED, "Failed to map model data file ",
model_data_file, ", error code: ", errno);
int ret = close(fd);
MACE_CHECK(ret == 0, "Failed to close model data file ",
model_data_file, ", error code: ", errno);
return model_data;
{% endif %} {% endif %}
} }
void UnloadModelData(const unsigned char *model_data) { void UnloadModelData(const unsigned char *model_data) {
{% if not embed_model_data %} {% if not embed_model_data %}
int ret = munmap(const_cast<unsigned char *>(model_data), mace::UnloadModelData(model_data, {{ model_data_size }});
{{ model_data_size }});
MACE_CHECK(ret == 0, "Failed to unmap model data file, error code: ", errno);
{% else %} {% else %}
MACE_UNUSED(model_data); MACE_UNUSED(model_data);
{% endif %} {% endif %}
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <vector> #include <vector>
#include <string> #include <string>
#include "mace/proto/mace.pb.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/utils/env_time.h" #include "mace/utils/env_time.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
...@@ -24,12 +25,16 @@ ...@@ -24,12 +25,16 @@
namespace mace { namespace mace {
namespace {{tag}} { namespace {{tag}} {
void CreateTensor{{tensor_info.id}}(std::vector<mace::ConstTensor> *tensors, void CreateTensor{{tensor_info.id}}(mace::ConstTensor *const_tensor) {
const unsigned char *model_data) {
MACE_LATENCY_LOGGER(2, "Create tensor {{ tensor.name }}"); MACE_LATENCY_LOGGER(2, "Create tensor {{ tensor.name }}");
tensors->emplace_back(mace::ConstTensor( const_tensor->set_name({{ tensor.name|tojson }});
{{ tensor.name|tojson }}, model_data + {{ offset }}, const_tensor->set_offset({{ tensor.offset }});
{ {{ tensor.dims|join(', ') }} }, {{ tensor_info.data_type }}, {{ tensor.node_id }})); const_tensor->set_data_size({{ tensor.data_size }});
{% for dim in tensor.dims %}
const_tensor->add_dims({{ dim }});
{% endfor %}
const_tensor->set_data_type(static_cast<DataType>({{ tensor_info.data_type }}));
const_tensor->set_node_id({{ tensor.node_id }});
} }
} // namespace {{tag}} } // namespace {{tag}}
......
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import hashlib
import numpy as np
from mace.proto import mace_pb2
GENERATED_NAME = set()
def generate_obfuscated_name(namespace, name):
md5 = hashlib.md5()
md5.update(namespace)
md5.update(name)
md5_digest = md5.hexdigest()
name = md5_digest[:8]
while name in GENERATED_NAME:
name = md5_digest
assert name not in GENERATED_NAME
GENERATED_NAME.add(name)
return name
def generate_tensor_map(tensors):
tensor_map = {}
for t in tensors:
if t.name not in tensor_map:
tensor_map[t.name] = generate_obfuscated_name("tensor", t.name)
return tensor_map
def generate_in_out_map(ops, tensor_map):
in_out_map = {}
for op in ops:
op.name = generate_obfuscated_name("op", op.name)
for input_name in op.input:
if input_name not in in_out_map:
if input_name in tensor_map:
in_out_map[input_name] = tensor_map[input_name]
else:
in_out_map[input_name] = generate_obfuscated_name(
"in", input_name)
for output_name in op.output:
if output_name not in in_out_map:
if output_name in tensor_map:
in_out_map[output_name] = tensor_map[output_name]
else:
in_out_map[output_name] = generate_obfuscated_name(
"out", output_name)
return in_out_map
def obfuscate_name(net_def):
input_node = "mace_input_node"
output_node = "mace_output_node"
tensor_map = generate_tensor_map(net_def.tensors)
in_out_map = generate_in_out_map(net_def.op, tensor_map)
for t in net_def.tensors:
if input_node not in t.name and output_node not in t.name:
t.name = tensor_map[t.name]
for op in net_def.op:
for i in range(len(op.input)):
if input_node not in op.input[i]:
op.input[i] = in_out_map[op.input[i]]
for i in range(len(op.output)):
if output_node not in op.output[i]:
op.output[i] = in_out_map[op.output[i]]
def normalize_op_name(op_name):
idx = op_name.rfind(':')
if idx == -1:
return op_name
else:
return op_name[:idx]
def rename_tensor(net_def):
tensor_map = {}
for t in net_def.tensors:
if t.name not in tensor_map:
tensor_map[t.name] = "_" + normalize_op_name(t.name).replace("/",
"_")
t.name = tensor_map[t.name]
for op in net_def.op:
for i in range(len(op.input)):
if op.input[i] in tensor_map:
op.input[i] = tensor_map[op.input[i]]
for i in range(len(op.output)):
if op.output[i] in tensor_map:
op.output[i] = tensor_map[op.output[i]]
class TensorInfo:
def __init__(self, id, t, runtime, gpu_data_type):
self.id = id
self.data_type = mace_pb2.DataType.Name(t.data_type)
if t.data_type == mace_pb2.DT_FLOAT:
if runtime == 'gpu' and gpu_data_type == 'half':
self.data_type = mace_pb2.DT_HALF
self.data = bytearray(
np.array(t.float_data).astype(np.float16).tobytes())
else:
self.data_type = mace_pb2.DT_FLOAT
self.data = bytearray(
np.array(t.float_data).astype(np.float32).tobytes())
elif t.data_type == mace_pb2.DT_INT32:
self.data = bytearray(
np.array(t.int32_data).astype(np.int32).tobytes())
elif t.data_type == mace_pb2.DT_UINT8:
self.data = bytearray(
np.array(t.int32_data).astype(np.uint8).tolist())
else:
raise Exception('Tensor data type %s not supported' % t.data_type)
def get_tensor_info_and_model_data(net_def, runtime, gpu_data_type):
model_data = []
offset = 0
counter = 0
tensor_infos = []
for t in net_def.tensors:
tensor_info = TensorInfo(counter, t, runtime, gpu_data_type)
tensor_infos.append(tensor_info)
# align
if tensor_info.data_type != 'DT_UINT8' and offset % 4 != 0:
padding = 4 - offset % 4
model_data.extend(bytearray([0] * padding))
offset += padding
if t.data_type == mace_pb2.DT_FLOAT:
t.data_size = len(t.float_data)
elif t.data_type == mace_pb2.DT_INT32:
t.data_size = len(t.int32_data)
elif t.data_type == mace_pb2.DT_UINT8:
t.data_size = len(t.int32_data)
t.offset = offset
counter += 1
model_data.extend(tensor_info.data)
offset += len(tensor_info.data)
return tensor_infos, model_data
def del_tensor_data(net_def, runtime, gpu_data_type):
for t in net_def.tensors:
if t.data_type == mace_pb2.DT_FLOAT:
del t.float_data[:]
elif t.data_type == mace_pb2.DT_INT32:
del t.int32_data[:]
elif t.data_type == mace_pb2.DT_UINT8:
del t.int32_data[:]
def update_tensor_data_type(net_def, runtime, gpu_data_type):
for t in net_def.tensors:
if t.data_type == mace_pb2.DT_FLOAT and runtime == 'gpu' \
and gpu_data_type == 'half':
t.data_type = mace_pb2.DT_HALF
...@@ -13,7 +13,8 @@ cc_test( ...@@ -13,7 +13,8 @@ cc_test(
name = "mace_api_test", name = "mace_api_test",
testonly = 1, testonly = 1,
srcs = ["mace_api_test.cc"], srcs = ["mace_api_test.cc"],
copts = if_openmp_enabled(["-fopenmp"]) + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"] +
if_openmp_enabled(["-fopenmp"]) +
if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) +
if_android_armv7(["-mfpu=neon"]) + if_android_armv7(["-mfpu=neon"]) +
if_android_armv7(["-mfloat-abi=softfp"]) + if_android_armv7(["-mfloat-abi=softfp"]) +
...@@ -33,7 +34,8 @@ cc_test( ...@@ -33,7 +34,8 @@ cc_test(
name = "mace_api_mt_test", name = "mace_api_mt_test",
testonly = 1, testonly = 1,
srcs = ["mace_api_mt_test.cc"], srcs = ["mace_api_mt_test.cc"],
copts = if_openmp_enabled(["-fopenmp"]) + copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"] +
if_openmp_enabled(["-fopenmp"]) +
if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) +
if_android_armv7(["-mfpu=neon"]) + if_android_armv7(["-mfpu=neon"]) +
if_android_armv7(["-mfloat-abi=softfp"]) + if_android_armv7(["-mfloat-abi=softfp"]) +
......
...@@ -69,8 +69,10 @@ void BufferToImage(const std::string &input_name, ...@@ -69,8 +69,10 @@ void BufferToImage(const std::string &input_name,
.AddIntArg("mode", mode) .AddIntArg("mode", mode)
.Finalize(&operator_def); .Finalize(&operator_def);
operator_def.set_mem_id(mem_ids); operator_def.mutable_mem_id()->Reserve(mem_ids.size());
for (auto mem_id : mem_ids) {
operator_def.add_mem_id(mem_id);
}
net_def->add_op()->CopyFrom(operator_def); net_def->add_op()->CopyFrom(operator_def);
} }
...@@ -112,7 +114,10 @@ void Conv3x3(const std::string &input_name, ...@@ -112,7 +114,10 @@ void Conv3x3(const std::string &input_name,
.AddIntArg("device", static_cast<int>(device_type)) .AddIntArg("device", static_cast<int>(device_type))
.Finalize(&operator_def); .Finalize(&operator_def);
operator_def.set_mem_id(mem_ids); operator_def.mutable_mem_id()->Reserve(mem_ids.size());
for (auto mem_id : mem_ids) {
operator_def.add_mem_id(mem_id);
}
net_def->add_op()->CopyFrom(operator_def); net_def->add_op()->CopyFrom(operator_def);
} }
...@@ -136,20 +141,25 @@ void Relu(const std::string &input_name, ...@@ -136,20 +141,25 @@ void Relu(const std::string &input_name,
template <typename T> template <typename T>
void AddTensor(const std::string &name, void AddTensor(const std::string &name,
const std::vector<int64_t> &shape, const std::vector<int64_t> &shape,
T *data, const int offset,
const int data_size,
NetDef *net_def) { NetDef *net_def) {
ConstTensor tensor(name, ConstTensor *tensor_ptr = net_def->add_tensors();
reinterpret_cast<unsigned char *>(data), tensor_ptr->set_name(name);
shape, tensor_ptr->mutable_dims()->Reserve(shape.size());
DataTypeToEnum<T>::value); for (auto dim : shape) {
tensor_ptr->add_dims(dim);
net_def->mutable_tensors().push_back(tensor); }
tensor_ptr->set_offset(offset);
tensor_ptr->set_data_size(data_size);
tensor_ptr->set_data_type(DataTypeToEnum<T>::value);
} }
template <DeviceType D, typename T> template <DeviceType D, typename T>
void CheckOutputs(const NetDef &net_def, void CheckOutputs(const NetDef &net_def,
const std::map<std::string, mace::MaceTensor> &inputs, const std::map<std::string, mace::MaceTensor> &inputs,
const std::map<std::string, mace::MaceTensor> &outputs) { const std::map<std::string, mace::MaceTensor> &outputs,
const std::vector<T> &tensor_data) {
ops::test::OpsTestNet net; ops::test::OpsTestNet net;
for (auto input : inputs) { for (auto input : inputs) {
auto input_shape = input.second.shape(); auto input_shape = input.second.shape();
...@@ -166,13 +176,14 @@ void CheckOutputs(const NetDef &net_def, ...@@ -166,13 +176,14 @@ void CheckOutputs(const NetDef &net_def,
} }
auto tensors = net_def.tensors(); auto tensors = net_def.tensors();
for (auto tensor : tensors) { for (auto tensor : tensors) {
auto shape = tensor.dims(); std::vector<index_t> shape = {tensor.dims().begin(), tensor.dims().end()};
const int64_t data_size = std::accumulate(shape.begin(), const int64_t data_size = std::accumulate(shape.begin(),
shape.end(), 1, shape.end(), 1,
std::multiplies<int64_t>()); std::multiplies<int64_t>());
std::vector<T> data(data_size); std::vector<T> data(data_size);
memcpy(data.data(), reinterpret_cast<const T *>(tensor.data()), memcpy(data.data(),
data_size * sizeof(T)); reinterpret_cast<const T *>(tensor_data.data()) + tensor.offset(),
tensor.data_size() * sizeof(T));
net.AddInputFromArray<D, T>(tensor.name(), shape, data); net.AddInputFromArray<D, T>(tensor.name(), shape, data);
} }
net.RunNet(net_def, D); net.RunNet(net_def, D);
...@@ -217,9 +228,14 @@ std::map<std::string, int> AddMemoryOptimization( ...@@ -217,9 +228,14 @@ std::map<std::string, int> AddMemoryOptimization(
input_shapes[i][1]); input_shapes[i][1]);
} }
size_t input_size = input_names.size(); size_t input_size = input_names.size();
size_t output_size = output_names.size();
MemoryArena *mem_arena_ptr = net_def->mutable_mem_arena();
mem_arena_ptr->mutable_mem_block()->Reserve(input_size + output_size);
for (size_t i = 0; i < input_size; ++i) { for (size_t i = 0; i < input_size; ++i) {
net_def->mutable_mem_arena().mutable_mem_block().push_back( MemoryBlock *mem_blk_ptr = mem_arena_ptr->add_mem_block();
MemoryBlock(mem_id, in_mem_block_x, in_mem_block_y)); mem_blk_ptr->set_mem_id(mem_id);
mem_blk_ptr->set_x(in_mem_block_x);
mem_blk_ptr->set_y(in_mem_block_y);
res[input_names[i]] = mem_id; res[input_names[i]] = mem_id;
mem_id++; mem_id++;
} }
...@@ -234,10 +250,11 @@ std::map<std::string, int> AddMemoryOptimization( ...@@ -234,10 +250,11 @@ std::map<std::string, int> AddMemoryOptimization(
output_shapes[i][0] * output_shapes[i][0] *
output_shapes[i][1]); output_shapes[i][1]);
} }
size_t output_size = output_names.size();
for (size_t i = 0; i < output_size; ++i) { for (size_t i = 0; i < output_size; ++i) {
net_def->mutable_mem_arena().mutable_mem_block().push_back( MemoryBlock *mem_blk_ptr = mem_arena_ptr->add_mem_block();
MemoryBlock(mem_id, out_mem_block_x, out_mem_block_y)); mem_blk_ptr->set_mem_id(mem_id);
mem_blk_ptr->set_x(out_mem_block_x);
mem_blk_ptr->set_y(out_mem_block_y);
res[output_names[i]] = mem_id; res[output_names[i]] = mem_id;
mem_id++; mem_id++;
} }
...@@ -261,16 +278,17 @@ void MaceRunFunc(const int in_out_size) { ...@@ -261,16 +278,17 @@ void MaceRunFunc(const int in_out_size) {
const std::vector<std::vector<int64_t>> output_shapes = {{1, 32, 32, 16}}; const std::vector<std::vector<int64_t>> output_shapes = {{1, 32, 32, 16}};
const std::vector<int64_t> filter_shape = {16, 16, 3, 3}; const std::vector<int64_t> filter_shape = {16, 16, 3, 3};
NetDef net_def; std::shared_ptr<NetDef> net_def(new NetDef());
// Add memory optimization // Add memory optimization
auto mem_map = AddMemoryOptimization(input_names, output_names, auto mem_map = AddMemoryOptimization(input_names, output_names,
input_shapes, output_shapes, input_shapes, output_shapes,
&net_def); net_def.get());
std::vector<half> data; std::vector<half> data;
ops::test::GenerateRandomRealTypeData<half>(filter_shape, &data); ops::test::GenerateRandomRealTypeData<half>(filter_shape, &data);
AddTensor<half>(filter_tensor_name, filter_shape, data.data(), &net_def); AddTensor<half>(
filter_tensor_name, filter_shape, 0, data.size(), net_def.get());
for (size_t i = 0; i < input_names.size(); ++i) { for (size_t i = 0; i < input_names.size(); ++i) {
std::string input_name = MakeString("mace_input_node_", std::string input_name = MakeString("mace_input_node_",
...@@ -279,16 +297,16 @@ void MaceRunFunc(const int in_out_size) { ...@@ -279,16 +297,16 @@ void MaceRunFunc(const int in_out_size) {
mace::kernels::IN_OUT_CHANNEL, mace::kernels::IN_OUT_CHANNEL,
{mem_map[input_names[i]]}, {mem_map[input_names[i]]},
device, device,
&net_def); net_def.get());
} }
BufferToImage<half>(filter_tensor_name, filter_tensor_img_name, BufferToImage<half>(filter_tensor_name, filter_tensor_img_name,
mace::kernels::CONV2D_FILTER, {}, device, mace::kernels::CONV2D_FILTER, {}, device,
&net_def, NetMode::INIT); net_def.get(), NetMode::INIT);
for (size_t i = 0; i < output_names.size(); ++i) { for (size_t i = 0; i < output_names.size(); ++i) {
Conv3x3<half>(input_names[i], filter_tensor_img_name, Conv3x3<half>(input_names[i], filter_tensor_img_name,
output_names[i], {mem_map[output_names[i]]}, output_names[i], {mem_map[output_names[i]]},
device, device,
&net_def); net_def.get());
} }
for (size_t i = 0; i < output_names.size(); ++i) { for (size_t i = 0; i < output_names.size(); ++i) {
std::string output_name = MakeString("mace_output_node_", std::string output_name = MakeString("mace_output_node_",
...@@ -296,7 +314,7 @@ void MaceRunFunc(const int in_out_size) { ...@@ -296,7 +314,7 @@ void MaceRunFunc(const int in_out_size) {
ImageToBuffer<float>(output_names[i], output_name, ImageToBuffer<float>(output_names[i], output_name,
mace::kernels::IN_OUT_CHANNEL, mace::kernels::IN_OUT_CHANNEL,
device, device,
&net_def); net_def.get());
} }
const std::string file_path ="/data/local/tmp/mace"; const std::string file_path ="/data/local/tmp/mace";
...@@ -305,7 +323,8 @@ void MaceRunFunc(const int in_out_size) { ...@@ -305,7 +323,8 @@ void MaceRunFunc(const int in_out_size) {
mace::SetKVStorageFactory(storage_factory); mace::SetKVStorageFactory(storage_factory);
MaceEngine engine(device); MaceEngine engine(device);
MaceStatus status = engine.Init(&net_def, input_names, output_names); MaceStatus status = engine.Init(net_def.get(), input_names, output_names,
reinterpret_cast<unsigned char *>(data.data()));
ASSERT_EQ(status, MaceStatus::MACE_SUCCESS); ASSERT_EQ(status, MaceStatus::MACE_SUCCESS);
std::map<std::string, mace::MaceTensor> inputs; std::map<std::string, mace::MaceTensor> inputs;
...@@ -322,7 +341,7 @@ void MaceRunFunc(const int in_out_size) { ...@@ -322,7 +341,7 @@ void MaceRunFunc(const int in_out_size) {
} }
} }
CheckOutputs<DeviceType::GPU, half>(net_def, inputs, outputs); CheckOutputs<DeviceType::GPU, half>(*net_def, inputs, outputs, data);
} }
} // namespace } // namespace
......
...@@ -79,7 +79,10 @@ void BufferToImage(const std::string &input_name, ...@@ -79,7 +79,10 @@ void BufferToImage(const std::string &input_name,
.AddIntArg("mode", mode) .AddIntArg("mode", mode)
.Finalize(&operator_def); .Finalize(&operator_def);
operator_def.set_mem_id(mem_ids); operator_def.mutable_mem_id()->Reserve(mem_ids.size());
for (auto mem_id : mem_ids) {
operator_def.add_mem_id(mem_id);
}
net_def->add_op()->CopyFrom(operator_def); net_def->add_op()->CopyFrom(operator_def);
} }
...@@ -122,7 +125,10 @@ void Conv3x3(const std::string &input_name, ...@@ -122,7 +125,10 @@ void Conv3x3(const std::string &input_name,
.AddIntArg("device", static_cast<int>(device_type)) .AddIntArg("device", static_cast<int>(device_type))
.Finalize(&operator_def); .Finalize(&operator_def);
operator_def.set_mem_id(mem_ids); operator_def.mutable_mem_id()->Reserve(mem_ids.size());
for (auto mem_id : mem_ids) {
operator_def.add_mem_id(mem_id);
}
net_def->add_op()->CopyFrom(operator_def); net_def->add_op()->CopyFrom(operator_def);
} }
...@@ -146,20 +152,25 @@ void Relu(const std::string &input_name, ...@@ -146,20 +152,25 @@ void Relu(const std::string &input_name,
template <typename T> template <typename T>
void AddTensor(const std::string &name, void AddTensor(const std::string &name,
const std::vector<int64_t> &shape, const std::vector<int64_t> &shape,
T *data, const int offset,
const int data_size,
NetDef *net_def) { NetDef *net_def) {
ConstTensor tensor(name, ConstTensor *tensor_ptr = net_def->add_tensors();
reinterpret_cast<unsigned char *>(data), tensor_ptr->set_name(name);
shape, tensor_ptr->mutable_dims()->Reserve(shape.size());
DataTypeToEnum<T>::value); for (auto dim : shape) {
tensor_ptr->add_dims(dim);
net_def->mutable_tensors().push_back(tensor); }
tensor_ptr->set_offset(offset);
tensor_ptr->set_data_size(data_size);
tensor_ptr->set_data_type(DataTypeToEnum<T>::value);
} }
template <DeviceType D, typename T> template <DeviceType D, typename T>
void CheckOutputs(const NetDef &net_def, void CheckOutputs(const NetDef &net_def,
const std::map<std::string, mace::MaceTensor> &inputs, const std::map<std::string, mace::MaceTensor> &inputs,
const std::map<std::string, mace::MaceTensor> &outputs) { const std::map<std::string, mace::MaceTensor> &outputs,
const std::vector<T> &tensor_data) {
ops::test::OpsTestNet net; ops::test::OpsTestNet net;
for (auto input : inputs) { for (auto input : inputs) {
auto input_shape = input.second.shape(); auto input_shape = input.second.shape();
...@@ -176,13 +187,14 @@ void CheckOutputs(const NetDef &net_def, ...@@ -176,13 +187,14 @@ void CheckOutputs(const NetDef &net_def,
} }
auto tensors = net_def.tensors(); auto tensors = net_def.tensors();
for (auto tensor : tensors) { for (auto tensor : tensors) {
auto shape = tensor.dims(); std::vector<index_t> shape = {tensor.dims().begin(), tensor.dims().end()};
const int64_t data_size = std::accumulate(shape.begin(), const int64_t data_size = std::accumulate(shape.begin(),
shape.end(), 1, shape.end(), 1,
std::multiplies<int64_t>()); std::multiplies<int64_t>());
std::vector<T> data(data_size); std::vector<T> data(data_size);
memcpy(data.data(), reinterpret_cast<const T *>(tensor.data()), memcpy(data.data(),
data_size * sizeof(T)); reinterpret_cast<const T *>(tensor_data.data()) + tensor.offset(),
tensor.data_size() * sizeof(T));
net.AddInputFromArray<D, T>(tensor.name(), shape, data); net.AddInputFromArray<D, T>(tensor.name(), shape, data);
} }
net.RunNet(net_def, D); net.RunNet(net_def, D);
...@@ -227,9 +239,14 @@ std::map<std::string, int> AddMemoryOptimization( ...@@ -227,9 +239,14 @@ std::map<std::string, int> AddMemoryOptimization(
input_shapes[i][1]); input_shapes[i][1]);
} }
size_t input_size = input_names.size(); size_t input_size = input_names.size();
size_t output_size = output_names.size();
MemoryArena *mem_arena_ptr = net_def->mutable_mem_arena();
mem_arena_ptr->mutable_mem_block()->Reserve(input_size + output_size);
for (size_t i = 0; i < input_size; ++i) { for (size_t i = 0; i < input_size; ++i) {
net_def->mutable_mem_arena().mutable_mem_block().push_back( MemoryBlock *mem_blk_ptr = mem_arena_ptr->add_mem_block();
MemoryBlock(mem_id, in_mem_block_x, in_mem_block_y)); mem_blk_ptr->set_mem_id(mem_id);
mem_blk_ptr->set_x(in_mem_block_x);
mem_blk_ptr->set_y(in_mem_block_y);
res[input_names[i]] = mem_id; res[input_names[i]] = mem_id;
mem_id++; mem_id++;
} }
...@@ -244,10 +261,11 @@ std::map<std::string, int> AddMemoryOptimization( ...@@ -244,10 +261,11 @@ std::map<std::string, int> AddMemoryOptimization(
output_shapes[i][0] * output_shapes[i][0] *
output_shapes[i][1]); output_shapes[i][1]);
} }
size_t output_size = output_names.size();
for (size_t i = 0; i < output_size; ++i) { for (size_t i = 0; i < output_size; ++i) {
net_def->mutable_mem_arena().mutable_mem_block().push_back( MemoryBlock *mem_blk_ptr = mem_arena_ptr->add_mem_block();
MemoryBlock(mem_id, out_mem_block_x, out_mem_block_y)); mem_blk_ptr->set_mem_id(mem_id);
mem_blk_ptr->set_x(out_mem_block_x);
mem_blk_ptr->set_y(out_mem_block_y);
res[output_names[i]] = mem_id; res[output_names[i]] = mem_id;
mem_id++; mem_id++;
} }
...@@ -271,16 +289,16 @@ void MaceRun(const int in_out_size, ...@@ -271,16 +289,16 @@ void MaceRun(const int in_out_size,
const DeviceType device = DeviceType::GPU; const DeviceType device = DeviceType::GPU;
NetDef net_def; std::shared_ptr<NetDef> net_def(new NetDef());
// Add memory optimization // Add memory optimization
auto mem_map = AddMemoryOptimization(input_names, output_names, auto mem_map = AddMemoryOptimization(input_names, output_names,
input_shapes, output_shapes, input_shapes, output_shapes,
&net_def); net_def.get());
std::vector<T> data; std::vector<T> data;
ops::test::GenerateRandomRealTypeData<T>(filter_shape, &data); ops::test::GenerateRandomRealTypeData<T>(filter_shape, &data);
AddTensor<T>(filter_tensor_name, filter_shape, data.data(), &net_def); AddTensor<T>(filter_tensor_name, filter_shape, 0, data.size(), net_def.get());
for (size_t i = 0; i < input_names.size(); ++i) { for (size_t i = 0; i < input_names.size(); ++i) {
std::string input_name = MakeString("mace_input_node_", std::string input_name = MakeString("mace_input_node_",
...@@ -289,15 +307,15 @@ void MaceRun(const int in_out_size, ...@@ -289,15 +307,15 @@ void MaceRun(const int in_out_size,
mace::kernels::IN_OUT_CHANNEL, mace::kernels::IN_OUT_CHANNEL,
{mem_map[input_names[i]]}, {mem_map[input_names[i]]},
device, device,
&net_def); net_def.get());
} }
BufferToImage<half>(filter_tensor_name, filter_tensor_img_name, BufferToImage<half>(filter_tensor_name, filter_tensor_img_name,
mace::kernels::CONV2D_FILTER, {}, device, mace::kernels::CONV2D_FILTER, {}, device,
&net_def, NetMode::INIT); net_def.get(), NetMode::INIT);
for (size_t i = 0; i < output_names.size(); ++i) { for (size_t i = 0; i < output_names.size(); ++i) {
Conv3x3<half>(input_names[i], filter_tensor_img_name, Conv3x3<half>(input_names[i], filter_tensor_img_name,
output_names[i], {mem_map[output_names[i]]}, output_names[i], {mem_map[output_names[i]]},
device, &net_def); device, net_def.get());
} }
for (size_t i = 0; i < output_names.size(); ++i) { for (size_t i = 0; i < output_names.size(); ++i) {
std::string output_name = MakeString("mace_output_node_", std::string output_name = MakeString("mace_output_node_",
...@@ -305,11 +323,12 @@ void MaceRun(const int in_out_size, ...@@ -305,11 +323,12 @@ void MaceRun(const int in_out_size,
ImageToBuffer<float>(output_names[i], output_name, ImageToBuffer<float>(output_names[i], output_name,
mace::kernels::IN_OUT_CHANNEL, mace::kernels::IN_OUT_CHANNEL,
device, device,
&net_def); net_def.get());
} }
MaceEngine engine(device); MaceEngine engine(device);
MaceStatus status = engine.Init(&net_def, input_names, output_names); MaceStatus status = engine.Init(net_def.get(), input_names, output_names,
reinterpret_cast<unsigned char *>(data.data()));
ASSERT_EQ(status, MaceStatus::MACE_SUCCESS); ASSERT_EQ(status, MaceStatus::MACE_SUCCESS);
std::map<std::string, mace::MaceTensor> inputs; std::map<std::string, mace::MaceTensor> inputs;
...@@ -326,7 +345,7 @@ void MaceRun(const int in_out_size, ...@@ -326,7 +345,7 @@ void MaceRun(const int in_out_size,
} }
} }
CheckOutputs<DeviceType::GPU, T>(net_def, inputs, outputs); CheckOutputs<DeviceType::GPU, T>(*net_def, inputs, outputs, data);
} }
} // namespace } // namespace
......
...@@ -28,7 +28,6 @@ ...@@ -28,7 +28,6 @@
#include <stdint.h> #include <stdint.h>
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <fstream>
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
...@@ -37,6 +36,7 @@ ...@@ -37,6 +36,7 @@
#include "mace/public/mace_runtime.h" #include "mace/public/mace_runtime.h"
#include "mace/utils/env_time.h" #include "mace/utils/env_time.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
#include "mace/utils/utils.h"
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/runtime/opencl/opencl_runtime.h"
...@@ -186,7 +186,10 @@ DEFINE_string(output_file, ...@@ -186,7 +186,10 @@ DEFINE_string(output_file,
"output file name | output file prefix for multiple outputs"); "output file name | output file prefix for multiple outputs");
DEFINE_string(model_data_file, DEFINE_string(model_data_file,
"", "",
"model data file name, used when EMBED_MODEL_DATA set to 0"); "model data file name, used when EMBED_MODEL_DATA set to 0 or 2");
DEFINE_string(model_file,
"",
"model file name, used when load mace model in pb");
DEFINE_string(device, "GPU", "CPU/GPU/HEXAGON"); DEFINE_string(device, "GPU", "CPU/GPU/HEXAGON");
DEFINE_int32(round, 1, "round"); DEFINE_int32(round, 1, "round");
DEFINE_int32(restart_round, 1, "restart round"); DEFINE_int32(restart_round, 1, "restart round");
...@@ -228,22 +231,26 @@ bool RunModel(const std::string &model_name, ...@@ -228,22 +231,26 @@ bool RunModel(const std::string &model_name,
MaceStatus create_engine_status; MaceStatus create_engine_status;
// Create Engine // Create Engine
int64_t t0 = NowMicros(); int64_t t0 = NowMicros();
if (FLAGS_model_data_file.empty()) { if (FLAGS_model_file != "") {
std::vector<unsigned char> model_pb_data;
if (!mace::ReadBinaryFile(&model_pb_data, FLAGS_model_file)) {
LOG(FATAL) << "Failed to read file: " << FLAGS_model_file;
}
create_engine_status = create_engine_status =
CreateMaceEngine(model_name.c_str(), CreateMaceEngineFromProto(model_pb_data,
nullptr, FLAGS_model_data_file,
input_names, input_names,
output_names, output_names,
device_type, device_type,
&engine); &engine);
} else { } else {
create_engine_status = create_engine_status =
CreateMaceEngine(model_name.c_str(), CreateMaceEngineFromCode(model_name,
FLAGS_model_data_file.c_str(), FLAGS_model_data_file,
input_names, input_names,
output_names, output_names,
device_type, device_type,
&engine); &engine);
} }
int64_t t1 = NowMicros(); int64_t t1 = NowMicros();
...@@ -358,6 +365,7 @@ int Main(int argc, char **argv) { ...@@ -358,6 +365,7 @@ int Main(int argc, char **argv) {
LOG(INFO) << "input_file: " << FLAGS_input_file; LOG(INFO) << "input_file: " << FLAGS_input_file;
LOG(INFO) << "output_file: " << FLAGS_output_file; LOG(INFO) << "output_file: " << FLAGS_output_file;
LOG(INFO) << "model_data_file: " << FLAGS_model_data_file; LOG(INFO) << "model_data_file: " << FLAGS_model_data_file;
LOG(INFO) << "model_file: " << FLAGS_model_file;
LOG(INFO) << "device: " << FLAGS_device; LOG(INFO) << "device: " << FLAGS_device;
LOG(INFO) << "round: " << FLAGS_round; LOG(INFO) << "round: " << FLAGS_round;
LOG(INFO) << "restart_round: " << FLAGS_restart_round; LOG(INFO) << "restart_round: " << FLAGS_restart_round;
......
...@@ -28,6 +28,7 @@ cc_library( ...@@ -28,6 +28,7 @@ cc_library(
linkopts = if_android([ linkopts = if_android([
"-llog", "-llog",
]), ]),
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
deps = [ deps = [
"//mace/public", "//mace/public",
], ],
...@@ -38,6 +39,7 @@ cc_library( ...@@ -38,6 +39,7 @@ cc_library(
srcs = [ srcs = [
"tuner_development.cc", "tuner_development.cc",
], ],
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
deps = [ deps = [
":utils", ":utils",
], ],
...@@ -48,6 +50,7 @@ cc_library( ...@@ -48,6 +50,7 @@ cc_library(
srcs = [ srcs = [
"tuner_production.cc", "tuner_production.cc",
], ],
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
deps = [ deps = [
":utils", ":utils",
"//mace/codegen:generated_tuning_params", "//mace/codegen:generated_tuning_params",
...@@ -60,6 +63,7 @@ cc_test( ...@@ -60,6 +63,7 @@ cc_test(
srcs = [ srcs = [
"tuner_test.cc", "tuner_test.cc",
], ],
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
linkopts = if_android([ linkopts = if_android([
"-pie", "-pie",
"-lm", # Required by unordered_map "-lm", # Required by unordered_map
......
...@@ -22,9 +22,9 @@ ...@@ -22,9 +22,9 @@
#include <utility> #include <utility>
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/public/mace_types.h"
#include "mace/utils/env_time.h" #include "mace/utils/env_time.h"
#include "mace/utils/string_util.h" #include "mace/utils/string_util.h"
#include "mace/utils/utils.h"
#undef ERROR #undef ERROR
......
...@@ -15,12 +15,22 @@ ...@@ -15,12 +15,22 @@
#ifndef MACE_UTILS_UTILS_H_ #ifndef MACE_UTILS_UTILS_H_
#define MACE_UTILS_UTILS_H_ #define MACE_UTILS_UTILS_H_
#include <fstream>
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <utility> #include <utility>
#include <vector> #include <vector>
namespace mace { namespace mace {
// Disable the copy and assignment operator for a class.
#ifndef DISABLE_COPY_AND_ASSIGN
#define DISABLE_COPY_AND_ASSIGN(classname) \
private: \
classname(const classname &) = delete; \
classname &operator=(const classname &) = delete
#endif
template <typename Integer> template <typename Integer>
Integer RoundUp(Integer i, Integer factor) { Integer RoundUp(Integer i, Integer factor) {
return (i + factor - 1) / factor * factor; return (i + factor - 1) / factor * factor;
...@@ -121,5 +131,26 @@ inline std::vector<std::string> Split(const std::string &str, char delims) { ...@@ -121,5 +131,26 @@ inline std::vector<std::string> Split(const std::string &str, char delims) {
return result; return result;
} }
inline bool ReadBinaryFile(std::vector<unsigned char> *data,
const std::string &filename) {
std::ifstream ifs(filename, std::ios::in | std::ios::binary);
if (!ifs.is_open()) {
return false;
}
ifs.seekg(0, ifs.end);
size_t length = ifs.tellg();
ifs.seekg(0, ifs.beg);
data->reserve(length);
data->insert(data->begin(), std::istreambuf_iterator<char>(ifs),
std::istreambuf_iterator<char>());
if (ifs.fail()) {
return false;
}
ifs.close();
return true;
}
} // namespace mace } // namespace mace
#endif // MACE_UTILS_UTILS_H_ #endif // MACE_UTILS_UTILS_H_
...@@ -159,6 +159,7 @@ def tuning_run(target_abi, ...@@ -159,6 +159,7 @@ def tuning_run(target_abi,
output_nodes, output_nodes,
input_shapes, input_shapes,
output_shapes, output_shapes,
mace_model_dir,
model_name, model_name,
device_type, device_type,
running_round, running_round,
...@@ -181,6 +182,7 @@ def tuning_run(target_abi, ...@@ -181,6 +182,7 @@ def tuning_run(target_abi,
output_nodes, output_nodes,
input_shapes, input_shapes,
output_shapes, output_shapes,
mace_model_dir,
model_name, model_name,
device_type, device_type,
running_round, running_round,
...@@ -203,12 +205,12 @@ def tuning_run(target_abi, ...@@ -203,12 +205,12 @@ def tuning_run(target_abi,
stdout, target_abi, serialno, model_name, device_type) stdout, target_abi, serialno, model_name, device_type)
def build_mace_run_prod(hexagon_mode, runtime, target_abi, def build_mace_run_prod(hexagon_mode, runtime, target_abi, serialno,
serialno, vlog_level, embed_model_data, vlog_level, embed_model_data, model_load_type,
model_output_dir, input_nodes, output_nodes, model_output_dir, input_nodes, output_nodes,
input_shapes, output_shapes, model_name, device_type, input_shapes, output_shapes, mace_model_dir,
running_round, restart_round, tuning, model_name, device_type, running_round, restart_round,
limit_opencl_kernel_time, phone_data_dir, tuning, limit_opencl_kernel_time, phone_data_dir,
enable_openmp): enable_openmp):
mace_run_target = "//mace/tools/validation:mace_run" mace_run_target = "//mace/tools/validation:mace_run"
strip = "always" strip = "always"
...@@ -226,14 +228,14 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi, ...@@ -226,14 +228,14 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi,
hexagon_mode=hexagon_mode, hexagon_mode=hexagon_mode,
enable_openmp=enable_openmp enable_openmp=enable_openmp
) )
sh_commands.update_mace_run_lib(model_output_dir, sh_commands.update_mace_run_lib(model_output_dir, model_load_type,
model_name, embed_model_data) model_name, embed_model_data)
device_type = parse_device_type("gpu") device_type = parse_device_type("gpu")
tuning_run(target_abi, serialno, vlog_level, embed_model_data, tuning_run(target_abi, serialno, vlog_level, embed_model_data,
model_output_dir, input_nodes, output_nodes, input_shapes, model_output_dir, input_nodes, output_nodes, input_shapes,
output_shapes, model_name, device_type, running_round=0, output_shapes, mace_model_dir, model_name, device_type,
restart_round=1, out_of_range_check=False, running_round=0, restart_round=1, out_of_range_check=False,
phone_data_dir=phone_data_dir, tuning=tuning, phone_data_dir=phone_data_dir, tuning=tuning,
limit_opencl_kernel_time=limit_opencl_kernel_time) limit_opencl_kernel_time=limit_opencl_kernel_time)
...@@ -248,7 +250,7 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi, ...@@ -248,7 +250,7 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi,
debug=debug, debug=debug,
enable_openmp=enable_openmp enable_openmp=enable_openmp
) )
sh_commands.update_mace_run_lib(model_output_dir, sh_commands.update_mace_run_lib(model_output_dir, model_load_type,
model_name, embed_model_data) model_name, embed_model_data)
else: else:
gen_opencl_and_tuning_code(target_abi, serialno, [], False) gen_opencl_and_tuning_code(target_abi, serialno, [], False)
...@@ -261,7 +263,7 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi, ...@@ -261,7 +263,7 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi,
debug=debug, debug=debug,
enable_openmp=enable_openmp enable_openmp=enable_openmp
) )
sh_commands.update_mace_run_lib(model_output_dir, sh_commands.update_mace_run_lib(model_output_dir, model_load_type,
model_name, embed_model_data) model_name, embed_model_data)
...@@ -271,17 +273,21 @@ def merge_libs_and_tuning_results(target_soc, ...@@ -271,17 +273,21 @@ def merge_libs_and_tuning_results(target_soc,
project_name, project_name,
output_dir, output_dir,
model_output_dirs, model_output_dirs,
mace_model_dirs_kv,
model_load_type,
hexagon_mode, hexagon_mode,
embed_model_data): embed_model_data):
gen_opencl_and_tuning_code( gen_opencl_and_tuning_code(
target_abi, serialno, model_output_dirs, False) target_abi, serialno, model_output_dirs, False)
sh_commands.build_production_code(target_abi) sh_commands.build_production_code(model_load_type, target_abi)
sh_commands.merge_libs(target_soc, sh_commands.merge_libs(target_soc,
target_abi, target_abi,
project_name, project_name,
output_dir, output_dir,
model_output_dirs, model_output_dirs,
mace_model_dirs_kv,
model_load_type,
hexagon_mode, hexagon_mode,
embed_model_data) embed_model_data)
...@@ -366,6 +372,9 @@ def parse_model_configs(): ...@@ -366,6 +372,9 @@ def parse_model_configs():
print("CONFIG ERROR:") print("CONFIG ERROR:")
print("embed_model_data must be integer in range [0, 1]") print("embed_model_data must be integer in range [0, 1]")
exit(1) exit(1)
elif FLAGS.model_load_type == "pb":
configs["embed_model_data"] = 0
print("emebed_model_data is set 0")
model_names = configs.get("models", "") model_names = configs.get("models", "")
if not model_names: if not model_names:
...@@ -523,6 +532,12 @@ def parse_args(): ...@@ -523,6 +532,12 @@ def parse_args():
type=str, type=str,
default="cpu", default="cpu",
help="validation runtime.") help="validation runtime.")
parser.add_argument(
"--model_load_type",
type=str,
default="source",
help="[source|pb] Load models in generated `source` code" +
"or `pb` file.")
parser.add_argument( parser.add_argument(
"--gpu_data_type", "--gpu_data_type",
type=str, type=str,
...@@ -532,9 +547,11 @@ def parse_args(): ...@@ -532,9 +547,11 @@ def parse_args():
def process_models(project_name, configs, embed_model_data, vlog_level, def process_models(project_name, configs, embed_model_data, vlog_level,
target_abi, phone_data_dir, target_soc="", serialno=""): target_abi, phone_data_dir, model_load_type,
target_soc="", serialno=""):
hexagon_mode = get_hexagon_mode(configs) hexagon_mode = get_hexagon_mode(configs)
model_output_dirs = [] model_output_dirs = []
mace_model_dirs_kv = {}
for model_name in configs["models"]: for model_name in configs["models"]:
print '===================', model_name, '===================' print '===================', model_name, '==================='
...@@ -550,6 +567,11 @@ def process_models(project_name, configs, embed_model_data, vlog_level, ...@@ -550,6 +567,11 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
model_output_base_dir = "%s/%s/%s/%s/%s" % ( model_output_base_dir = "%s/%s/%s/%s/%s" % (
FLAGS.output_dir, project_name, "build", FLAGS.output_dir, project_name, "build",
model_name, model_path_digest) model_name, model_path_digest)
if model_load_type == "pb":
mace_model_dir = model_output_base_dir
mace_model_dirs_kv[model_name] = mace_model_dir
else:
mace_model_dir = ""
if target_abi == "host": if target_abi == "host":
model_output_dir = "%s/%s" % (model_output_base_dir, target_abi) model_output_dir = "%s/%s" % (model_output_base_dir, target_abi)
...@@ -587,11 +609,13 @@ def process_models(project_name, configs, embed_model_data, vlog_level, ...@@ -587,11 +609,13 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
serialno, serialno,
vlog_level, vlog_level,
embed_model_data, embed_model_data,
model_load_type,
model_output_dir, model_output_dir,
model_config["input_nodes"], model_config["input_nodes"],
model_config["output_nodes"], model_config["output_nodes"],
model_config["input_shapes"], model_config["input_shapes"],
model_config["output_shapes"], model_config["output_shapes"],
mace_model_dir,
model_name, model_name,
model_device_type, model_device_type,
FLAGS.round, FLAGS.round,
...@@ -617,6 +641,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level, ...@@ -617,6 +641,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
model_config["output_nodes"], model_config["output_nodes"],
model_config["input_shapes"], model_config["input_shapes"],
model_config["output_shapes"], model_config["output_shapes"],
mace_model_dir,
model_name, model_name,
run_device_type, run_device_type,
FLAGS.round, FLAGS.round,
...@@ -636,6 +661,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level, ...@@ -636,6 +661,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
vlog_level, vlog_level,
embed_model_data, embed_model_data,
model_output_dir, model_output_dir,
mace_model_dir,
model_config["input_nodes"], model_config["input_nodes"],
model_config["output_nodes"], model_config["output_nodes"],
model_config["input_shapes"], model_config["input_shapes"],
...@@ -672,6 +698,8 @@ def process_models(project_name, configs, embed_model_data, vlog_level, ...@@ -672,6 +698,8 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
project_name, project_name,
FLAGS.output_dir, FLAGS.output_dir,
model_output_dirs, model_output_dirs,
mace_model_dirs_kv,
model_load_type,
hexagon_mode, hexagon_mode,
embed_model_data) embed_model_data)
...@@ -732,7 +760,8 @@ def main(unused_args): ...@@ -732,7 +760,8 @@ def main(unused_args):
# generate source # generate source
sh_commands.gen_mace_version() sh_commands.gen_mace_version()
sh_commands.gen_encrypted_opencl_source() sh_commands.gen_encrypted_opencl_source()
sh_commands.gen_mace_engine_factory_source(configs['models'].keys()) sh_commands.gen_mace_engine_factory_source(configs['models'].keys(),
FLAGS.model_load_type)
embed_model_data = configs["embed_model_data"] embed_model_data = configs["embed_model_data"]
target_socs = get_target_socs(configs) target_socs = get_target_socs(configs)
...@@ -784,6 +813,8 @@ def main(unused_args): ...@@ -784,6 +813,8 @@ def main(unused_args):
embed_model_data, embed_model_data,
model_config["fast_conv"], model_config["fast_conv"],
model_config["obfuscate"], model_config["obfuscate"],
model_output_base_dir,
FLAGS.model_load_type,
FLAGS.gpu_data_type) FLAGS.gpu_data_type)
for target_abi in configs["target_abis"]: for target_abi in configs["target_abis"]:
...@@ -802,12 +833,14 @@ def main(unused_args): ...@@ -802,12 +833,14 @@ def main(unused_args):
props["ro.product.model"])) props["ro.product.model"]))
process_models(project_name, configs, embed_model_data, process_models(project_name, configs, embed_model_data,
vlog_level, target_abi, phone_data_dir, vlog_level, target_abi, phone_data_dir,
target_soc, serialno) FLAGS.model_load_type, target_soc,
serialno)
else: else:
print("====================================================") print("====================================================")
print("Run on host") print("Run on host")
process_models(project_name, configs, embed_model_data, process_models(project_name, configs, embed_model_data,
vlog_level, target_abi, phone_data_dir) vlog_level, target_abi, phone_data_dir,
FLAGS.model_load_type)
if FLAGS.mode == "build" or FLAGS.mode == "all": if FLAGS.mode == "build" or FLAGS.mode == "all":
sh_commands.packaging_lib(FLAGS.output_dir, project_name) sh_commands.packaging_lib(FLAGS.output_dir, project_name)
......
...@@ -287,9 +287,6 @@ def bazel_build(target, ...@@ -287,9 +287,6 @@ def bazel_build(target,
target, target,
"--copt=-std=c++11", "--copt=-std=c++11",
"--copt=-D_GLIBCXX_USE_C99_MATH_TR1", "--copt=-D_GLIBCXX_USE_C99_MATH_TR1",
"--copt=-Werror",
"--copt=-Wextra",
"--copt=-Wno-missing-field-initializers",
"--copt=-O3", "--copt=-O3",
"--define", "--define",
"openmp=%s" % str(enable_openmp).lower(), "openmp=%s" % str(enable_openmp).lower(),
...@@ -316,9 +313,6 @@ def bazel_build(target, ...@@ -316,9 +313,6 @@ def bazel_build(target,
"--cpu=%s" % abi, "--cpu=%s" % abi,
"--copt=-std=c++11", "--copt=-std=c++11",
"--copt=-D_GLIBCXX_USE_C99_MATH_TR1", "--copt=-D_GLIBCXX_USE_C99_MATH_TR1",
"--copt=-Werror",
"--copt=-Wextra",
"--copt=-Wno-missing-field-initializers",
"--copt=-DMACE_OBFUSCATE_LITERALS", "--copt=-DMACE_OBFUSCATE_LITERALS",
"--copt=-O3", "--copt=-O3",
"--define", "--define",
...@@ -375,7 +369,9 @@ def gen_encrypted_opencl_source(codegen_path="mace/codegen"): ...@@ -375,7 +369,9 @@ def gen_encrypted_opencl_source(codegen_path="mace/codegen"):
"mace/codegen/opencl/opencl_encrypt_program.cc") "mace/codegen/opencl/opencl_encrypt_program.cc")
def gen_mace_engine_factory_source(model_tags, codegen_path="mace/codegen"): def gen_mace_engine_factory_source(model_tags,
model_load_type,
codegen_path="mace/codegen"):
print("* Genearte mace engine creator source") print("* Genearte mace engine creator source")
codegen_tools_dir = "%s/engine" % codegen_path codegen_tools_dir = "%s/engine" % codegen_path
sh.rm("-rf", codegen_tools_dir) sh.rm("-rf", codegen_tools_dir)
...@@ -383,6 +379,7 @@ def gen_mace_engine_factory_source(model_tags, codegen_path="mace/codegen"): ...@@ -383,6 +379,7 @@ def gen_mace_engine_factory_source(model_tags, codegen_path="mace/codegen"):
gen_mace_engine_factory( gen_mace_engine_factory(
model_tags, model_tags,
"mace/python/tools", "mace/python/tools",
model_load_type,
codegen_tools_dir) codegen_tools_dir)
print("Genearte mace engine creator source done!\n") print("Genearte mace engine creator source done!\n")
...@@ -472,12 +469,16 @@ def gen_model_code(model_codegen_dir, ...@@ -472,12 +469,16 @@ def gen_model_code(model_codegen_dir,
embed_model_data, embed_model_data,
fast_conv, fast_conv,
obfuscate, obfuscate,
model_output_dir,
model_load_type,
gpu_data_type): gpu_data_type):
print("* Genearte model code") print("* Genearte model code")
bazel_build_common("//mace/python/tools:converter") bazel_build_common("//mace/python/tools:converter")
if os.path.exists(model_codegen_dir): if os.path.exists(model_codegen_dir):
sh.rm("-rf", model_codegen_dir) sh.rm("-rf", model_codegen_dir)
sh.mkdir("-p", model_codegen_dir) sh.mkdir("-p", model_codegen_dir)
stdout_buff = [] stdout_buff = []
process_output = make_output_processor(stdout_buff) process_output = make_output_processor(stdout_buff)
p = sh.python("bazel-bin/mace/python/tools/converter", p = sh.python("bazel-bin/mace/python/tools/converter",
...@@ -486,11 +487,9 @@ def gen_model_code(model_codegen_dir, ...@@ -486,11 +487,9 @@ def gen_model_code(model_codegen_dir,
"--model_file=%s" % model_file_path, "--model_file=%s" % model_file_path,
"--weight_file=%s" % weight_file_path, "--weight_file=%s" % weight_file_path,
"--model_checksum=%s" % model_sha256_checksum, "--model_checksum=%s" % model_sha256_checksum,
"--output=%s" % model_codegen_dir + "/model.cc",
"--input_node=%s" % input_nodes, "--input_node=%s" % input_nodes,
"--output_node=%s" % output_nodes, "--output_node=%s" % output_nodes,
"--runtime=%s" % runtime, "--runtime=%s" % runtime,
"--output_type=source",
"--template=%s" % "mace/python/tools", "--template=%s" % "mace/python/tools",
"--model_tag=%s" % model_tag, "--model_tag=%s" % model_tag,
"--input_shape=%s" % input_shapes, "--input_shape=%s" % input_shapes,
...@@ -498,6 +497,9 @@ def gen_model_code(model_codegen_dir, ...@@ -498,6 +497,9 @@ def gen_model_code(model_codegen_dir,
"--embed_model_data=%s" % embed_model_data, "--embed_model_data=%s" % embed_model_data,
"--winograd=%s" % fast_conv, "--winograd=%s" % fast_conv,
"--obfuscate=%s" % obfuscate, "--obfuscate=%s" % obfuscate,
"--codegen_output=%s/model.cc" % model_codegen_dir,
"--pb_output=%s/%s.pb" % (model_output_dir, model_tag),
"--model_load_type=%s" % model_load_type,
"--gpu_data_type=%s" % gpu_data_type, "--gpu_data_type=%s" % gpu_data_type,
_out=process_output, _out=process_output,
_bg=True, _bg=True,
...@@ -549,6 +551,7 @@ def gen_random_input(model_output_dir, ...@@ -549,6 +551,7 @@ def gen_random_input(model_output_dir,
def update_mace_run_lib(model_output_dir, def update_mace_run_lib(model_output_dir,
model_load_type,
model_tag, model_tag,
embed_model_data): embed_model_data):
mace_run_filepath = model_output_dir + "/mace_run" mace_run_filepath = model_output_dir + "/mace_run"
...@@ -560,8 +563,9 @@ def update_mace_run_lib(model_output_dir, ...@@ -560,8 +563,9 @@ def update_mace_run_lib(model_output_dir,
sh.cp("-f", "mace/codegen/models/%s/%s.data" % (model_tag, model_tag), sh.cp("-f", "mace/codegen/models/%s/%s.data" % (model_tag, model_tag),
model_output_dir) model_output_dir)
sh.cp("-f", "mace/codegen/models/%s/%s.h" % (model_tag, model_tag), if model_load_type == "source":
model_output_dir) sh.cp("-f", "mace/codegen/models/%s/%s.h" % (model_tag, model_tag),
model_output_dir)
def create_internal_storage_dir(serialno, phone_data_dir): def create_internal_storage_dir(serialno, phone_data_dir):
...@@ -579,6 +583,7 @@ def tuning_run(abi, ...@@ -579,6 +583,7 @@ def tuning_run(abi,
output_nodes, output_nodes,
input_shapes, input_shapes,
output_shapes, output_shapes,
mace_model_dir,
model_tag, model_tag,
device_type, device_type,
running_round, running_round,
...@@ -603,6 +608,10 @@ def tuning_run(abi, ...@@ -603,6 +608,10 @@ def tuning_run(abi,
str(out_of_range_check), omp_num_threads, cpu_affinity_policy, str(out_of_range_check), omp_num_threads, cpu_affinity_policy,
gpu_perf_hint, gpu_priority_hint)) gpu_perf_hint, gpu_priority_hint))
if abi == "host": if abi == "host":
if mace_model_dir:
mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag)
else:
mace_model_path = ""
p = subprocess.Popen( p = subprocess.Popen(
[ [
"env", "env",
...@@ -623,6 +632,7 @@ def tuning_run(abi, ...@@ -623,6 +632,7 @@ def tuning_run(abi,
"--cpu_affinity_policy=%s" % cpu_affinity_policy, "--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint, "--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint, "--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_path,
], ],
stderr=subprocess.PIPE, stderr=subprocess.PIPE,
stdout=subprocess.PIPE) stdout=subprocess.PIPE)
...@@ -649,6 +659,14 @@ def tuning_run(abi, ...@@ -649,6 +659,14 @@ def tuning_run(abi,
adb_push("mace/third_party/nnlib/libhexagon_controller.so", adb_push("mace/third_party/nnlib/libhexagon_controller.so",
phone_data_dir, serialno) phone_data_dir, serialno)
if mace_model_dir:
mace_model_path = "%s/%s.pb" % (phone_data_dir, model_tag)
adb_push("%s/%s.pb" % (mace_model_dir, model_tag),
mace_model_path,
serialno)
else:
mace_model_path = ""
stdout_buff = [] stdout_buff = []
process_output = make_output_processor(stdout_buff) process_output = make_output_processor(stdout_buff)
adb_cmd = [ adb_cmd = [
...@@ -683,6 +701,7 @@ def tuning_run(abi, ...@@ -683,6 +701,7 @@ def tuning_run(abi,
"--cpu_affinity_policy=%s" % cpu_affinity_policy, "--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint, "--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint, "--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_path,
]) ])
adb_cmd = ' '.join(adb_cmd) adb_cmd = ' '.join(adb_cmd)
p = sh.adb( p = sh.adb(
...@@ -820,13 +839,17 @@ def validate_model(abi, ...@@ -820,13 +839,17 @@ def validate_model(abi,
print("Validation done!\n") print("Validation done!\n")
def build_production_code(abi): def build_production_code(model_load_type, abi):
bazel_build("//mace/codegen:generated_opencl", abi=abi) bazel_build("//mace/codegen:generated_opencl", abi=abi)
bazel_build("//mace/codegen:generated_tuning_params", abi=abi) bazel_build("//mace/codegen:generated_tuning_params", abi=abi)
if abi == 'host': if abi == 'host':
bazel_build( if model_load_type == "source":
"//mace/codegen:generated_models", bazel_build(
abi=abi) "//mace/codegen:generated_models",
abi=abi)
else:
bazel_build("//mace/core:core", abi=abi)
bazel_build("//mace/ops:ops", abi=abi)
def merge_libs(target_soc, def merge_libs(target_soc,
...@@ -834,6 +857,8 @@ def merge_libs(target_soc, ...@@ -834,6 +857,8 @@ def merge_libs(target_soc,
project_name, project_name,
libmace_output_dir, libmace_output_dir,
model_output_dirs, model_output_dirs,
mace_model_dirs_kv,
model_load_type,
hexagon_mode, hexagon_mode,
embed_model_data): embed_model_data):
print("* Merge mace lib") print("* Merge mace lib")
...@@ -853,7 +878,20 @@ def merge_libs(target_soc, ...@@ -853,7 +878,20 @@ def merge_libs(target_soc,
if hexagon_mode: if hexagon_mode:
sh.cp("-f", hexagon_lib_file, model_bin_dir) sh.cp("-f", hexagon_lib_file, model_bin_dir)
sh.cp("-f", glob.glob("mace/codegen/engine/*.h"), model_header_dir) if model_load_type == "source":
sh.cp("-f", glob.glob("mace/codegen/engine/*.h"), model_header_dir)
for model_output_dir in model_output_dirs:
if not embed_model_data:
sh.cp("-f", glob.glob("%s/*.data" % model_output_dir),
model_data_dir)
if model_load_type == "source":
sh.cp("-f", glob.glob("%s/*.h" % model_output_dir),
model_header_dir)
for model_name in mace_model_dirs_kv:
sh.cp("-f", "%s/%s.pb" % (mace_model_dirs_kv[model_name], model_name),
model_data_dir)
mri_stream = "" mri_stream = ""
if abi == "host": if abi == "host":
...@@ -865,12 +903,24 @@ def merge_libs(target_soc, ...@@ -865,12 +903,24 @@ def merge_libs(target_soc,
mri_stream += ( mri_stream += (
"addlib " "addlib "
"bazel-bin/mace/codegen/libgenerated_tuning_params.pic.a\n") "bazel-bin/mace/codegen/libgenerated_tuning_params.pic.a\n")
mri_stream += ( if model_load_type == "source":
"addlib " mri_stream += (
"bazel-bin/mace/codegen/libgenerated_models.pic.a\n") "addlib "
"bazel-bin/mace/codegen/libgenerated_models.pic.a\n")
else:
mri_stream += (
"addlib "
"bazel-bin/mace/core/libcore.pic.a\n")
mri_stream += (
"addlib "
"bazel-bin/mace/ops/libops.pic.lo\n")
else: else:
mri_stream += "create %s/libmace_%s.%s.a\n" % \ mri_stream += "create %s/libmace_%s.%s.a\n" % \
(model_bin_dir, project_name, target_soc) (model_bin_dir, project_name, target_soc)
if model_load_type == "source":
mri_stream += (
"addlib "
"bazel-bin/mace/codegen/libgenerated_models.a\n")
mri_stream += ( mri_stream += (
"addlib " "addlib "
"bazel-bin/mace/codegen/libgenerated_opencl.a\n") "bazel-bin/mace/codegen/libgenerated_opencl.a\n")
...@@ -880,9 +930,6 @@ def merge_libs(target_soc, ...@@ -880,9 +930,6 @@ def merge_libs(target_soc,
mri_stream += ( mri_stream += (
"addlib " "addlib "
"bazel-bin/mace/codegen/libgenerated_version.a\n") "bazel-bin/mace/codegen/libgenerated_version.a\n")
mri_stream += (
"addlib "
"bazel-bin/mace/codegen/libgenerated_models.a\n")
mri_stream += ( mri_stream += (
"addlib " "addlib "
"bazel-bin/mace/core/libcore.a\n") "bazel-bin/mace/core/libcore.a\n")
...@@ -895,16 +942,16 @@ def merge_libs(target_soc, ...@@ -895,16 +942,16 @@ def merge_libs(target_soc,
mri_stream += ( mri_stream += (
"addlib " "addlib "
"bazel-bin/mace/utils/libutils_prod.a\n") "bazel-bin/mace/utils/libutils_prod.a\n")
mri_stream += (
"addlib "
"bazel-bin/mace/proto/libmace_cc.a\n")
mri_stream += (
"addlib "
"bazel-bin/external/com_google_protobuf/libprotobuf_lite.a\n")
mri_stream += ( mri_stream += (
"addlib " "addlib "
"bazel-bin/mace/ops/libops.lo\n") "bazel-bin/mace/ops/libops.lo\n")
for model_output_dir in model_output_dirs:
if not embed_model_data:
sh.cp("-f", glob.glob("%s/*.data" % model_output_dir),
model_data_dir)
sh.cp("-f", glob.glob("%s/*.h" % model_output_dir), model_header_dir)
mri_stream += "save\n" mri_stream += "save\n"
mri_stream += "end\n" mri_stream += "end\n"
...@@ -971,6 +1018,7 @@ def benchmark_model(abi, ...@@ -971,6 +1018,7 @@ def benchmark_model(abi,
vlog_level, vlog_level,
embed_model_data, embed_model_data,
model_output_dir, model_output_dir,
mace_model_dir,
input_nodes, input_nodes,
output_nodes, output_nodes,
input_shapes, input_shapes,
...@@ -988,6 +1036,10 @@ def benchmark_model(abi, ...@@ -988,6 +1036,10 @@ def benchmark_model(abi,
stdout_buff = [] stdout_buff = []
process_output = make_output_processor(stdout_buff) process_output = make_output_processor(stdout_buff)
if abi == "host": if abi == "host":
if mace_model_dir:
mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag)
else:
mace_model_path = ""
p = subprocess.Popen( p = subprocess.Popen(
[ [
"env", "env",
...@@ -1005,6 +1057,7 @@ def benchmark_model(abi, ...@@ -1005,6 +1057,7 @@ def benchmark_model(abi,
"--cpu_affinity_policy=%s" % cpu_affinity_policy, "--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint, "--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint, "--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_path,
]) ])
p.wait() p.wait()
else: else:
...@@ -1022,6 +1075,14 @@ def benchmark_model(abi, ...@@ -1022,6 +1075,14 @@ def benchmark_model(abi,
if not embed_model_data: if not embed_model_data:
adb_push("%s/%s.data" % (model_output_dir, model_tag), adb_push("%s/%s.data" % (model_output_dir, model_tag),
phone_data_dir, serialno) phone_data_dir, serialno)
if mace_model_dir:
mace_model_path = "%s/%s.pb" % (phone_data_dir, model_tag)
adb_push("%s/%s.pb" % (mace_model_dir, model_tag),
mace_model_path,
serialno)
else:
mace_model_path = ""
p = sh.adb( p = sh.adb(
"-s", "-s",
serialno, serialno,
...@@ -1045,6 +1106,7 @@ def benchmark_model(abi, ...@@ -1045,6 +1106,7 @@ def benchmark_model(abi,
"--cpu_affinity_policy=%s" % cpu_affinity_policy, "--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint, "--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint, "--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_path,
_out=process_output, _out=process_output,
_bg=True, _bg=True,
_err_to_out=True) _err_to_out=True)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册