提交 08a362e7 编写于 作者: Y yejianwu

merge with origin master

......@@ -8,6 +8,7 @@ package(
cc_library(
name = "generated_models",
srcs = glob(["models/*/*.cc"]),
hdrs = glob(["models/*/*.h"]),
linkstatic = 1,
deps = [
"//mace/core",
......@@ -33,7 +34,6 @@ cc_library(
linkstatic = 1,
)
cc_library(
name = "generated_version",
srcs = ["version/version.cc"],
......
......@@ -10,6 +10,7 @@
#include <map>
#include "mace/public/mace.h"
#include "mace/public/mace_types.h"
namespace mace {
......
此差异已折叠。
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/public/mace_runtime.h"
#include "mace/core/runtime/cpu/cpu_runtime.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
namespace mace {
void ConfigOpenCLRuntime(GPUPerfHint gpu_perf_hint,
GPUPriorityHint gpu_priority_hint) {
VLOG(1) << "Set GPU configurations, gpu_perf_hint: " << gpu_perf_hint
<< ", gpu_priority_hint: " << gpu_priority_hint;
OpenCLRuntime::Configure(gpu_perf_hint, gpu_priority_hint);
}
void ConfigOmpThreadsAndAffinity(int omp_num_threads,
CPUPowerOption power_option) {
VLOG(1) << "Config CPU Runtime: omp_num_threads: " << omp_num_threads
<< ", cpu_power_option: " << power_option;
SetOmpThreadsAndAffinity(omp_num_threads, power_option);
}
}; // namespace mace
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <memory>
#include <numeric>
#include "mace/public/mace_types.h"
#include "mace/utils/logging.h"
namespace mace {
ConstTensor::ConstTensor(const std::string &name,
const unsigned char *data,
const std::vector<int64_t> &dims,
const DataType data_type,
uint32_t node_id)
: name_(name),
data_(data),
data_size_(std::accumulate(
dims.begin(), dims.end(), 1, std::multiplies<int64_t>())),
dims_(dims.begin(), dims.end()),
data_type_(data_type),
node_id_(node_id) {}
ConstTensor::ConstTensor(const std::string &name,
const unsigned char *data,
const std::vector<int64_t> &dims,
const int data_type,
uint32_t node_id)
: name_(name),
data_(data),
data_size_(std::accumulate(
dims.begin(), dims.end(), 1, std::multiplies<int64_t>())),
dims_(dims.begin(), dims.end()),
data_type_(static_cast<DataType>(data_type)),
node_id_(node_id) {}
const std::string &ConstTensor::name() const { return name_; }
const unsigned char *ConstTensor::data() const { return data_; }
int64_t ConstTensor::data_size() const { return data_size_; }
const std::vector<int64_t> &ConstTensor::dims() const { return dims_; }
DataType ConstTensor::data_type() const { return data_type_; }
uint32_t ConstTensor::node_id() const { return node_id_; }
Argument::Argument() : has_bits_(0) {}
void Argument::CopyFrom(const Argument &from) {
this->name_ = from.name();
this->f_ = from.f();
this->i_ = from.i();
this->s_ = from.s();
auto floats = from.floats();
this->floats_.resize(floats.size());
std::copy(floats.begin(), floats.end(), this->floats_.begin());
auto ints = from.ints();
this->ints_.resize(ints.size());
std::copy(ints.begin(), ints.end(), this->ints_.begin());
auto strings = from.floats();
this->strings_.resize(strings.size());
std::copy(floats.begin(), floats.end(), this->floats_.begin());
this->has_bits_ = from.has_bits_;
}
const std::string &Argument::name() const { return name_; }
void Argument::set_name(const std::string &value) { name_ = value; }
bool Argument::has_f() const { return (has_bits_ & 0x00000001u) != 0; }
void Argument::set_has_f() { has_bits_ |= 0x00000001u; }
float Argument::f() const { return f_; }
void Argument::set_f(float value) {
set_has_f();
f_ = value;
}
bool Argument::has_i() const { return (has_bits_ & 0x00000002u) != 0; }
void Argument::set_has_i() { has_bits_ |= 0x00000002u; }
int64_t Argument::i() const { return i_; }
void Argument::set_i(int64_t value) {
set_has_i();
i_ = value;
}
bool Argument::has_s() const { return (has_bits_ & 0x00000004u) != 0; }
void Argument::set_has_s() { has_bits_ |= 0x00000004u; }
std::string Argument::s() const { return s_; }
void Argument::set_s(const std::string &value) {
set_has_s();
s_ = value;
}
const std::vector<float> &Argument::floats() const { return floats_; }
void Argument::add_floats(float value) { floats_.push_back(value); }
void Argument::set_floats(const std::vector<float> &value) {
floats_.resize(value.size());
std::copy(value.begin(), value.end(), floats_.begin());
}
const std::vector<int64_t> &Argument::ints() const { return ints_; }
void Argument::add_ints(int64_t value) { ints_.push_back(value); }
void Argument::set_ints(const std::vector<int64_t> &value) {
ints_.resize(value.size());
std::copy(value.begin(), value.end(), ints_.begin());
}
const std::vector<std::string> &Argument::strings() const { return strings_; }
void Argument::add_strings(const ::std::string &value) {
strings_.push_back(value);
}
void Argument::set_strings(const std::vector<std::string> &value) {
strings_.resize(value.size());
std::copy(value.begin(), value.end(), strings_.begin());
}
// Node Input
NodeInput::NodeInput(int node_id, int output_port)
: node_id_(node_id), output_port_(output_port) {}
void NodeInput::CopyFrom(const NodeInput &from) {
node_id_ = from.node_id();
output_port_ = from.output_port();
}
int NodeInput::node_id() const { return node_id_; }
void NodeInput::set_node_id(int node_id) { node_id_ = node_id; }
int NodeInput::output_port() const { return output_port_; }
void NodeInput::set_output_port(int output_port) { output_port_ = output_port; }
// OutputShape
OutputShape::OutputShape() {}
OutputShape::OutputShape(const std::vector<int64_t> &dims)
: dims_(dims.begin(), dims.end()) {}
void OutputShape::CopyFrom(const OutputShape &from) {
auto from_dims = from.dims();
dims_.resize(from_dims.size());
std::copy(from_dims.begin(), from_dims.end(), dims_.begin());
}
const std::vector<int64_t> &OutputShape::dims() const { return dims_; }
// Operator Def
void OperatorDef::CopyFrom(const OperatorDef &from) {
name_ = from.name();
type_ = from.type();
auto from_input = from.input();
input_.resize(from_input.size());
std::copy(from_input.begin(), from_input.end(), input_.begin());
auto from_output = from.output();
output_.resize(from_output.size());
std::copy(from_output.begin(), from_output.end(), output_.begin());
auto from_arg = from.arg();
arg_.resize(from_arg.size());
for (int i = 0; i < from_arg.size(); ++i) {
arg_[i].CopyFrom(from_arg[i]);
}
auto from_output_shape = from.output_shape();
output_shape_.resize(from_output_shape.size());
for (int i = 0; i < from_output_shape.size(); ++i) {
output_shape_[i].CopyFrom(from_output_shape[i]);
}
auto from_data_type = from.output_type();
output_type_.resize(from_data_type.size());
std::copy(from_data_type.begin(), from_data_type.end(), output_type_.begin());
auto mem_ids = from.mem_id();
mem_id_.resize(mem_ids.size());
std::copy(mem_ids.begin(), mem_ids.end(), mem_id_.begin());
// nnlib
node_id_ = from.node_id();
op_id_ = from.op_id();
padding_ = from.padding();
auto from_node_input = from.node_input();
node_input_.resize(from_node_input.size());
for (int i = 0; i < from_node_input.size(); ++i) {
node_input_[i].CopyFrom(from_node_input[i]);
}
auto from_out_max_byte_size = from.out_max_byte_size();
out_max_byte_size_.resize(from_out_max_byte_size.size());
std::copy(from_out_max_byte_size.begin(), from_out_max_byte_size.end(),
out_max_byte_size_.begin());
has_bits_ = from.has_bits_;
}
const std::string &OperatorDef::name() const { return name_; }
void OperatorDef::set_name(const std::string &name_) {
set_has_name();
OperatorDef::name_ = name_;
}
bool OperatorDef::has_name() const { return (has_bits_ & 0x00000001u) != 0; }
void OperatorDef::set_has_name() { has_bits_ |= 0x00000001u; }
const std::string &OperatorDef::type() const { return type_; }
void OperatorDef::set_type(const std::string &type_) {
set_has_type();
OperatorDef::type_ = type_;
}
bool OperatorDef::has_type() const { return (has_bits_ & 0x00000002u) != 0; }
void OperatorDef::set_has_type() { has_bits_ |= 0x00000002u; }
const std::vector<int> &OperatorDef::mem_id() const { return mem_id_; }
void OperatorDef::set_mem_id(const std::vector<int> &value) {
mem_id_.resize(value.size());
std::copy(value.begin(), value.end(), mem_id_.begin());
}
uint32_t OperatorDef::node_id() const { return node_id_; }
void OperatorDef::set_node_id(uint32_t node_id) { node_id_ = node_id; }
uint32_t OperatorDef::op_id() const { return op_id_; }
uint32_t OperatorDef::padding() const { return padding_; }
void OperatorDef::set_padding(uint32_t padding) { padding_ = padding; }
const std::vector<NodeInput> &OperatorDef::node_input() const {
return node_input_;
}
void OperatorDef::add_node_input(const NodeInput &value) {
node_input_.push_back(value);
}
const std::vector<int> &OperatorDef::out_max_byte_size() const {
return out_max_byte_size_;
}
void OperatorDef::add_out_max_byte_size(int value) {
out_max_byte_size_.push_back(value);
}
const std::vector<std::string> &OperatorDef::input() const { return input_; }
const std::string &OperatorDef::input(int index) const {
MACE_CHECK(0 <= index && index <= input_.size());
return input_[index];
}
std::string *OperatorDef::add_input() {
input_.push_back("");
return &input_.back();
}
void OperatorDef::add_input(const ::std::string &value) {
input_.push_back(value);
}
void OperatorDef::add_input(::std::string &&value) { input_.push_back(value); }
void OperatorDef::set_input(const std::vector<std::string> &value) {
input_.resize(value.size());
std::copy(value.begin(), value.end(), input_.begin());
}
const std::vector<std::string> &OperatorDef::output() const { return output_; }
const std::string &OperatorDef::output(int index) const {
MACE_CHECK(0 <= index && index <= output_.size());
return output_[index];
}
std::string *OperatorDef::add_output() {
output_.push_back("");
return &output_.back();
}
void OperatorDef::add_output(const ::std::string &value) {
output_.push_back(value);
}
void OperatorDef::add_output(::std::string &&value) {
output_.push_back(value);
}
void OperatorDef::set_output(const std::vector<std::string> &value) {
output_.resize(value.size());
std::copy(value.begin(), value.end(), output_.begin());
}
const std::vector<Argument> &OperatorDef::arg() const { return arg_; }
Argument *OperatorDef::add_arg() {
arg_.emplace_back(Argument());
return &arg_.back();
}
const std::vector<OutputShape> &OperatorDef::output_shape() const {
return output_shape_;
}
void OperatorDef::add_output_shape(const OutputShape &value) {
output_shape_.push_back(value);
}
const std::vector<DataType> &OperatorDef::output_type() const {
return output_type_;
}
void OperatorDef::set_output_type(const std::vector<DataType> &value) {
output_type_.resize(value.size());
std::copy(value.begin(), value.end(), output_type_.begin());
}
// MemoryBlock
MemoryBlock::MemoryBlock(int mem_id, uint32_t x, uint32_t y)
: mem_id_(mem_id), x_(x), y_(y) {}
int MemoryBlock::mem_id() const { return mem_id_; }
uint32_t MemoryBlock::x() const { return x_; }
uint32_t MemoryBlock::y() const { return y_; }
// MemoryArena
const std::vector<MemoryBlock> &MemoryArena::mem_block() const {
return mem_block_;
}
std::vector<MemoryBlock> &MemoryArena::mutable_mem_block() {
return mem_block_;
}
int MemoryArena::mem_block_size() const { return mem_block_.size(); }
// InputInfo
const std::string &InputInfo::name() const { return name_; }
int32_t InputInfo::node_id() const { return node_id_; }
int32_t InputInfo::max_byte_size() const { return max_byte_size_; }
DataType InputInfo::data_type() const { return data_type_; }
const std::vector<int32_t> &InputInfo::dims() const { return dims_; }
// OutputInfo
const std::string &OutputInfo::name() const { return name_; }
int32_t OutputInfo::node_id() const { return node_id_; }
int32_t OutputInfo::max_byte_size() const { return max_byte_size_; }
DataType OutputInfo::data_type() const { return data_type_; }
void OutputInfo::set_data_type(DataType data_type) { data_type_ = data_type; }
const std::vector<int32_t> &OutputInfo::dims() const { return dims_; }
void OutputInfo::set_dims(const std::vector<int32_t> &dims) { dims_ = dims; }
// NetDef
NetDef::NetDef() : has_bits_(0) {}
const std::string &NetDef::name() const { return name_; }
void NetDef::set_name(const std::string &value) {
set_has_name();
name_ = value;
}
bool NetDef::has_name() const { return (has_bits_ & 0x00000001u) != 0; }
void NetDef::set_has_name() { has_bits_ |= 0x00000001u; }
const std::string &NetDef::version() const { return version_; }
void NetDef::set_version(const std::string &value) {
set_has_version();
version_ = value;
}
bool NetDef::has_version() const { return (has_bits_ & 0x00000002u) != 0; }
void NetDef::set_has_version() { has_bits_ |= 0x00000002u; }
const std::vector<OperatorDef> &NetDef::op() const { return op_; }
OperatorDef *NetDef::add_op() {
op_.emplace_back(OperatorDef());
return &op_.back();
}
std::vector<OperatorDef> &NetDef::mutable_op() { return op_; }
const std::vector<Argument> &NetDef::arg() const { return arg_; }
Argument *NetDef::add_arg() {
arg_.emplace_back(Argument());
return &arg_.back();
}
std::vector<Argument> &NetDef::mutable_arg() { return arg_; }
const std::vector<ConstTensor> &NetDef::tensors() const { return tensors_; }
std::vector<ConstTensor> &NetDef::mutable_tensors() { return tensors_; }
const MemoryArena &NetDef::mem_arena() const { return mem_arena_; }
MemoryArena &NetDef::mutable_mem_arena() {
set_has_mem_arena();
return mem_arena_;
}
bool NetDef::has_mem_arena() const { return (has_bits_ & 0x00000004u) != 0; }
void NetDef::set_has_mem_arena() { has_bits_ |= 0x00000004u; }
const std::vector<InputInfo> &NetDef::input_info() const { return input_info_; }
const std::vector<OutputInfo> &NetDef::output_info() const {
return output_info_;
}
std::vector<OutputInfo> &NetDef::mutable_output_info() { return output_info_; }
int NetDef::op_size() const { return op_.size(); }
const OperatorDef &NetDef::op(const int idx) const {
MACE_CHECK(0 <= idx && idx < op_size());
return op_[idx];
}
}; // namespace mace
......@@ -16,6 +16,7 @@
#include "mace/core/tensor.h"
#include "mace/core/workspace.h"
#include "mace/public/mace.h"
#include "mace/public/mace_types.h"
namespace mace {
......
......@@ -6,7 +6,7 @@
#ifndef MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H_
#define MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H_
#include "mace/public/mace.h"
#include "mace/public/mace_runtime.h"
namespace mace {
......
......@@ -14,6 +14,7 @@
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_wrapper.h"
#include "mace/public/mace_runtime.h"
#include "mace/utils/timer.h"
namespace mace {
......
......@@ -8,7 +8,7 @@
#include <cstdint>
#include <string>
#include "mace/public/mace.h"
#include "mace/public/mace_types.h"
#include "include/half.hpp"
namespace mace {
......
......@@ -23,9 +23,11 @@
#include "gflags/gflags.h"
#include "mace/public/mace.h"
#include "mace/public/mace_runtime.h"
#include "mace/utils/env_time.h"
#include "mace/utils/logging.h"
// #include "mace/codegen/models/${MACE_MODEL_TAG}/${MACE_MODEL_TAG}.h" instead
namespace mace {
namespace MACE_MODEL_TAG {
......@@ -145,19 +147,26 @@ struct mallinfo LogMallinfoChange(struct mallinfo prev) {
return curr;
}
DEFINE_string(input_node, "input_node0,input_node1",
DEFINE_string(input_node,
"input_node0,input_node1",
"input nodes, separated by comma");
DEFINE_string(input_shape, "1,224,224,3:1,1,1,10",
DEFINE_string(input_shape,
"1,224,224,3:1,1,1,10",
"input shapes, separated by colon and comma");
DEFINE_string(output_node, "output_node0,output_node1",
DEFINE_string(output_node,
"output_node0,output_node1",
"output nodes, separated by comma");
DEFINE_string(output_shape, "1,224,224,2:1,1,1,10",
DEFINE_string(output_shape,
"1,224,224,2:1,1,1,10",
"output shapes, separated by colon and comma");
DEFINE_string(input_file, "",
DEFINE_string(input_file,
"",
"input file name | input file prefix for multiple inputs.");
DEFINE_string(output_file, "",
DEFINE_string(output_file,
"",
"output file name | output file prefix for multiple outputs");
DEFINE_string(model_data_file, "",
DEFINE_string(model_data_file,
"",
"model data file name, used when EMBED_MODEL_DATA set to 0");
DEFINE_string(device, "OPENCL", "CPU/NEON/OPENCL/HEXAGON");
DEFINE_int32(round, 1, "round");
......@@ -166,115 +175,14 @@ DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable");
DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 8, "num of openmp threads");
DEFINE_int32(cpu_power_option, 0,
DEFINE_int32(cpu_power_option,
0,
"0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE");
bool SingleInputAndOutput(const std::vector<int64_t> &input_shape,
const std::vector<int64_t> &output_shape) {
// load model
int64_t t0 = NowMicros();
const unsigned char *model_data =
mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str());
NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data);
int64_t t1 = NowMicros();
LOG(INFO) << "CreateNetDef latency: " << t1 - t0 << " us";
int64_t init_micros = t1 - t0;
DeviceType device_type = ParseDeviceType(FLAGS_device);
LOG(INFO) << "Runing with device type: " << device_type;
// config runtime
if (device_type == DeviceType::OPENCL) {
mace::ConfigOpenCLRuntime(
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
} else if (device_type == DeviceType::CPU) {
mace::ConfigOmpThreadsAndAffinity(
FLAGS_omp_num_threads,
static_cast<CPUPowerOption>(FLAGS_cpu_power_option));
}
// Init model
LOG(INFO) << "Run init";
t0 = NowMicros();
mace::MaceEngine engine(&net_def, device_type);
if (device_type == DeviceType::OPENCL || device_type == DeviceType::HEXAGON) {
mace::MACE_MODEL_TAG::UnloadModelData(model_data);
}
t1 = NowMicros();
init_micros += t1 - t0;
LOG(INFO) << "Net init latency: " << t1 - t0 << " us";
LOG(INFO) << "Total init latency: " << init_micros << " us";
// Allocate input and output
int64_t input_size =
std::accumulate(input_shape.begin(), input_shape.end(), 1,
std::multiplies<int64_t>());
int64_t output_size =
std::accumulate(output_shape.begin(), output_shape.end(), 1,
std::multiplies<int64_t>());
std::unique_ptr<float[]> input_data(new float[input_size]);
std::unique_ptr<float[]> output_data(new float[output_size]);
// load input
std::ifstream in_file(FLAGS_input_file + "_" + FormatName(FLAGS_input_node),
std::ios::in | std::ios::binary);
if (in_file.is_open()) {
in_file.read(reinterpret_cast<char *>(input_data.get()),
input_size * sizeof(float));
in_file.close();
} else {
LOG(INFO) << "Open input file failed";
return -1;
}
LOG(INFO) << "Warm up run";
t0 = NowMicros();
engine.Run(input_data.get(), input_shape, output_data.get());
t1 = NowMicros();
LOG(INFO) << "1st warm up run latency: " << t1 - t0 << " us";
if (FLAGS_round > 0) {
LOG(INFO) << "Run model";
t0 = NowMicros();
struct mallinfo prev = mallinfo();
for (int i = 0; i < FLAGS_round; ++i) {
engine.Run(input_data.get(), input_shape, output_data.get());
if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) {
LOG(INFO) << "=== check malloc info change #" << i << " ===";
prev = LogMallinfoChange(prev);
}
}
t1 = NowMicros();
LOG(INFO) << "Average latency: " << (t1 - t0) / FLAGS_round << " us";
}
if (FLAGS_restart_round == 1) {
if (output_data != nullptr) {
std::string
output_name = FLAGS_output_file + "_" + FormatName(FLAGS_output_node);
std::ofstream out_file(output_name, std::ios::binary);
out_file.write((const char *) (output_data.get()),
output_size * sizeof(float));
out_file.flush();
out_file.close();
LOG(INFO) << "Write output file "
<< output_name
<< " with size " << output_size
<< " done.";
} else {
LOG(INFO) << "Output data is null";
}
}
return true;
}
bool MultipleInputOrOutput(
const std::vector<std::string> &input_names,
const std::vector<std::vector<int64_t>> &input_shapes,
const std::vector<std::string> &output_names,
const std::vector<std::vector<int64_t>> &output_shapes) {
bool RunModel(const std::vector<std::string> &input_names,
const std::vector<std::vector<int64_t>> &input_shapes,
const std::vector<std::string> &output_names,
const std::vector<std::vector<int64_t>> &output_shapes) {
// load model
int64_t t0 = NowMicros();
const unsigned char *model_data =
......@@ -312,42 +220,42 @@ bool MultipleInputOrOutput(
const size_t input_count = input_names.size();
const size_t output_count = output_names.size();
std::vector<mace::MaceInputInfo> input_infos(input_count);
std::map<std::string, float*> outputs;
std::vector<std::unique_ptr<float[]>> input_datas(input_count);
std::map<std::string, mace::MaceTensor> inputs;
std::map<std::string, mace::MaceTensor> outputs;
for (size_t i = 0; i < input_count; ++i) {
// Allocate input and output
int64_t input_size =
std::accumulate(input_shapes[i].begin(), input_shapes[i].end(), 1,
std::multiplies<int64_t>());
input_datas[i].reset(new float[input_size]);
auto buffer_in = std::shared_ptr<float>(new float[input_size],
std::default_delete<float[]>());
// load input
std::ifstream in_file(FLAGS_input_file + "_" + FormatName(input_names[i]),
std::ios::in | std::ios::binary);
if (in_file.is_open()) {
in_file.read(reinterpret_cast<char *>(input_datas[i].get()),
in_file.read(reinterpret_cast<char *>(buffer_in.get()),
input_size * sizeof(float));
in_file.close();
} else {
LOG(INFO) << "Open input file failed";
return -1;
}
input_infos[i].name = input_names[i];
input_infos[i].shape = input_shapes[i];
input_infos[i].data = input_datas[i].get();
inputs[input_names[i]] = mace::MaceTensor(input_shapes[i], buffer_in);
}
std::vector<std::unique_ptr<float[]>> output_datas(output_count);
for (size_t i = 0; i < output_count; ++i) {
int64_t output_size =
std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 1,
std::multiplies<int64_t>());
output_datas[i].reset(new float[output_size]);
outputs[output_names[i]] = output_datas[i].get();
auto buffer_out = std::shared_ptr<float>(new float[output_size],
std::default_delete<float[]>());
outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out);
}
LOG(INFO) << "Warm up run";
t0 = NowMicros();
engine.Run(input_infos, outputs);
engine.Run(inputs, &outputs);
t1 = NowMicros();
LOG(INFO) << "1st warm up run latency: " << t1 - t0 << " us";
......@@ -356,7 +264,7 @@ bool MultipleInputOrOutput(
t0 = NowMicros();
struct mallinfo prev = mallinfo();
for (int i = 0; i < FLAGS_round; ++i) {
engine.Run(input_infos, outputs);
engine.Run(inputs, &outputs);
if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) {
LOG(INFO) << "=== check malloc info change #" << i << " ===";
prev = LogMallinfoChange(prev);
......@@ -367,20 +275,19 @@ bool MultipleInputOrOutput(
}
for (size_t i = 0; i < output_count; ++i) {
std::string output_name = FLAGS_output_file + "_"
+ FormatName(output_names[i]);
std::string output_name =
FLAGS_output_file + "_" + FormatName(output_names[i]);
std::ofstream out_file(output_name, std::ios::binary);
int64_t output_size =
std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 1,
std::multiplies<int64_t>());
out_file.write((const char *) outputs[output_names[i]],
output_size * sizeof(float));
out_file.write(
reinterpret_cast<char *>(outputs[output_names[i]].data().get()),
output_size * sizeof(float));
out_file.flush();
out_file.close();
LOG(INFO) << "Write output file "
<< output_name
<< " with size " << output_size
<< " done.";
LOG(INFO) << "Write output file " << output_name << " with size "
<< output_size << " done.";
}
return true;
......@@ -391,7 +298,6 @@ int Main(int argc, char **argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
LOG(INFO) << "mace version: " << MaceVersion();
LOG(INFO) << "mace git version: " << MaceGitVersion();
LOG(INFO) << "model checksum: " << mace::MACE_MODEL_TAG::ModelChecksum();
LOG(INFO) << "input node: " << FLAGS_input_node;
LOG(INFO) << "input shape: " << FLAGS_input_shape;
......@@ -431,14 +337,8 @@ int Main(int argc, char **argv) {
#pragma omp parallel for
for (int i = 0; i < FLAGS_restart_round; ++i) {
VLOG(0) << "restart round " << i;
if (input_count == 1 && output_count == 1) {
ret = SingleInputAndOutput(input_shape_vec[0], output_shape_vec[0]);
} else {
ret = MultipleInputOrOutput(input_names,
input_shape_vec,
output_names,
output_shape_vec);
}
ret =
RunModel(input_names, input_shape_vec, output_names, output_shape_vec);
}
if (ret) {
return 0;
......
......@@ -95,7 +95,12 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(
static_cast<uint32_t>(buffer->buffer_offset() /
GetEnumTypeSize(buffer->dtype())));
}
if (type == ARGUMENT) {
if (type == CONV2D_FILTER) {
b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->dim(0)));
b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->dim(1)));
b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->dim(2)));
b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->dim(3)));
} else if (type == ARGUMENT) {
b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->dim(0)));
} else if (type == WEIGHT_HEIGHT || type == WEIGHT_WIDTH) {
b2f_kernel.setArg(idx++, static_cast<uint32_t>(buffer->dim(0)));
......
......@@ -2,6 +2,7 @@
__kernel void filter_buffer_to_image(__global const DATA_TYPE *input, /* h, w, oc, ic */
__private const int input_offset,
__private const int filter_h,
__private const int filter_w,
__private const int out_channel,
__private const int in_channel,
......@@ -22,16 +23,18 @@ __kernel void filter_buffer_to_image(__global const DATA_TYPE *input, /* h, w, o
}
#endif
const int out_channel_idx = h * 4;
const int rounded_in_channel = ((in_channel + 3) / 4) * 4;
const int hw_idx = w / rounded_in_channel;
const int in_channel_idx = w % rounded_in_channel;
const int in_channel_idx = w;
const int hw_size = filter_w * filter_h;
const int out_channel_idx = h / hw_size * 4;
const int hw_idx = h % hw_size;
const int h_idx = hw_idx / filter_w;
const int w_idx = hw_idx % filter_w;
const int offset = input_offset + ((h_idx * filter_w + w_idx) * out_channel + out_channel_idx) * in_channel
+ in_channel_idx;
const int offset = input_offset
+ ((h_idx * filter_w + w_idx) * out_channel
+ out_channel_idx) * in_channel
+ in_channel_idx;
VEC_DATA_TYPE(DATA_TYPE, 4) values = 0;
DATA_TYPE4 values = 0;
if (out_channel_idx < out_channel) {
const int size = out_channel - out_channel_idx;
if (size < 4) {
......@@ -52,10 +55,11 @@ __kernel void filter_buffer_to_image(__global const DATA_TYPE *input, /* h, w, o
}
int2 coord = (int2)(w, h);
CMD_TYPE(write_image, CMD_DATA_TYPE)(output, coord, values);
WRITE_IMAGET(output, coord, values);
}
__kernel void filter_image_to_buffer(__global DATA_TYPE *output, /* h, w, oc, ic */
__private const int filter_h,
__private const int filter_w,
__private const int out_channel,
__private const int in_channel,
......@@ -76,18 +80,19 @@ __kernel void filter_image_to_buffer(__global DATA_TYPE *output, /* h, w, oc, ic
}
#endif
const int out_channel_idx = h * 4;
const int rounded_in_channel = ((in_channel + 3) / 4) * 4;
const int hw_idx = w / rounded_in_channel;
const int in_channel_idx = w % rounded_in_channel;
const int in_channel_idx = w;
const int hw_size = filter_w * filter_h;
const int out_channel_idx = h / hw_size * 4;
const int hw_idx = h % hw_size;
const int h_idx = hw_idx / filter_w;
const int w_idx = hw_idx % filter_w;
const int offset = ((h_idx * filter_w + w_idx) * out_channel + out_channel_idx) * in_channel
+ in_channel_idx;
const int offset = ((h_idx * filter_w + w_idx) * out_channel
+ out_channel_idx) * in_channel
+ in_channel_idx;
if (out_channel_idx < out_channel) {
int2 coord = (int2)(w, h);
VEC_DATA_TYPE(DATA_TYPE, 4) values = CMD_TYPE(read_image, CMD_DATA_TYPE)(input, SAMPLER, coord);
DATA_TYPE4 values = READ_IMAGET(input, SAMPLER, coord);
const int size = (out_channel - out_channel_idx);
if (size < 4) {
switch (size) {
......@@ -200,7 +205,7 @@ __kernel void in_out_buffer_to_image(__global const DATA_TYPE *input, /* nhwc */
+ channel_idx;
const int size = channels - channel_idx;
VEC_DATA_TYPE(DATA_TYPE, 4) values = 0;
DATA_TYPE4 values = 0;
if (size < 4) {
switch(size) {
case 3:
......@@ -214,7 +219,7 @@ __kernel void in_out_buffer_to_image(__global const DATA_TYPE *input, /* nhwc */
values = vload4(0, input + offset);
}
int2 coord = (int2)(w, h);
CMD_TYPE(write_image, CMD_DATA_TYPE)(output, coord, values);
WRITE_IMAGET(output, coord, values);
}
__kernel void in_out_image_to_buffer(__global DATA_TYPE *output, /* nhwc */
......@@ -246,7 +251,7 @@ __kernel void in_out_image_to_buffer(__global DATA_TYPE *output, /* nhwc */
+ channel_idx;
int2 coord = (int2)(w, h);
VEC_DATA_TYPE(DATA_TYPE, 4) values = CMD_TYPE(read_image, CMD_DATA_TYPE)(input, SAMPLER, coord);
DATA_TYPE4 values = READ_IMAGET(input, SAMPLER, coord);
const int size = channels - channel_idx;
if (size < 4) {
switch (size) {
......@@ -286,7 +291,7 @@ __kernel void arg_buffer_to_image(__global const DATA_TYPE *input, /* nhwc */
const int size = count - w * 4;
VEC_DATA_TYPE(DATA_TYPE, 4) values = 0;
DATA_TYPE4 values = 0;
if (size < 4) {
switch(size) {
case 3:
......@@ -300,7 +305,7 @@ __kernel void arg_buffer_to_image(__global const DATA_TYPE *input, /* nhwc */
values = vload4(0, input + offset);
}
int2 coord = (int2)(w, h);
CMD_TYPE(write_image, CMD_DATA_TYPE)(output, coord, values);
WRITE_IMAGET(output, coord, values);
}
__kernel void arg_image_to_buffer(__global DATA_TYPE *output, /* nhwc */
......@@ -325,7 +330,7 @@ __kernel void arg_image_to_buffer(__global DATA_TYPE *output, /* nhwc */
const int offset = w * 4;
int2 coord = (int2)(w, h);
VEC_DATA_TYPE(DATA_TYPE, 4) values = CMD_TYPE(read_image, CMD_DATA_TYPE)(input, SAMPLER, coord);
DATA_TYPE4 values = READ_IMAGET(input, SAMPLER, coord);
const int size = count - offset;
if (size < 4) {
switch (size) {
......
#include <common.h>
__kernel void conv_2d(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */
__read_only image2d_t filter, /* cout%4 * cin * kh * kw, cout/4 */
__read_only image2d_t filter, /* cout%4 * cin, kh * kw * cout/4 */
#ifdef BIAS
__read_only image2d_t bias, /* cout%4 * cout/4 */
#endif
......@@ -41,8 +41,6 @@ __kernel void conv_2d(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */
const int out_w_blks = get_global_size(1);
#endif
const int rounded_in_ch = in_ch_blks << 2;
#ifdef BIAS
DATA_TYPE4 out0 =
READ_IMAGET(bias, SAMPLER, (int2)(out_ch_blk, 0));
......@@ -64,21 +62,21 @@ __kernel void conv_2d(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */
const int height_idx = mad24((out_hb % out_height), stride, -padding_top);
const int batch_idx = mul24((out_hb / out_height), in_height);
const int rounded_in_ch_x_filter_width = mul24(rounded_in_ch, filter_width);
const int filter_hw = mul24(filter_width, filter_height);
DATA_TYPE4 in0, in1, in2, in3;
DATA_TYPE4 weights0, weights1, weights2, weights3;
for (short in_ch_blk = 0; in_ch_blk < in_ch_blks; ++in_ch_blk) {
const int in_idx = mul24(in_ch_blk, in_width);
int filter_x_part0 = in_ch_blk << 2;
int filter_x_idx = in_ch_blk << 2;
int filter_y_idx = mul24(out_ch_blk, filter_hw);
for (short hb_idx = 0; hb_idx < filter_height; ++hb_idx) {
// TODO(heliangliang) optimize out these muls
int in_hb_value = height_idx + mul24(hb_idx, dilation_h);
in_hb_value = select(in_hb_value + batch_idx,
-1,
(in_hb_value < 0 || in_hb_value >= in_height));
int filter_x_part1 = 0;
#pragma unroll
for (short width_idx = 0; width_idx < filter_width; ++width_idx) {
int in_width_value;
#define READ_INPUT(i) \
......@@ -96,11 +94,10 @@ __kernel void conv_2d(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */
#undef READ_INPUT
// int filter_idx = (hb_idx * filter_width + width_idx) * rounded_in_ch + (in_ch_blk << 2);
int filter_idx = filter_x_part0 + filter_x_part1;
weights0 = READ_IMAGET(filter, SAMPLER, (int2)(filter_idx + 0, out_ch_blk));
weights1 = READ_IMAGET(filter, SAMPLER, (int2)(filter_idx + 1, out_ch_blk));
weights2 = READ_IMAGET(filter, SAMPLER, (int2)(filter_idx + 2, out_ch_blk));
weights3 = READ_IMAGET(filter, SAMPLER, (int2)(filter_idx + 3, out_ch_blk));
weights0 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 0, filter_y_idx));
weights1 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 1, filter_y_idx));
weights2 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 2, filter_y_idx));
weights3 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 3, filter_y_idx));
out0 = mad(in0.x, weights0, out0);
out0 = mad(in0.y, weights1, out0);
......@@ -123,9 +120,8 @@ __kernel void conv_2d(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */
out3 = mad(in3.z, weights2, out3);
out3 = mad(in3.w, weights3, out3);
filter_x_part1 += rounded_in_ch;
filter_y_idx += 1;
}
filter_x_part0 += rounded_in_ch_x_filter_width;
}
}
......
#include <common.h>
__kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */
__read_only image2d_t filter, /* cout%4 * cin * kh * kw, cout/4 */
__read_only image2d_t filter, /* cout%4 * cin , kh * kw * cout/4 */
#ifdef BIAS
__read_only image2d_t bias, /* cout%4 * cout/4 */
#endif
......@@ -39,8 +39,6 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
const int out_w_blks = get_global_size(1);
#endif
const int rounded_in_ch = in_ch_blks << 2;
#ifdef BIAS
DATA_TYPE4 out0 =
READ_IMAGET(bias, SAMPLER, (int2)(out_ch_blk, 0));
......@@ -65,19 +63,18 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
const int height_idx = mad24((out_hb % out_height), stride, -padding_top);
const int batch_idx = mul24((out_hb / out_height), in_height);
const int rounded_in_ch_x_3 = (rounded_in_ch << 1) + rounded_in_ch;
DATA_TYPE4 in0, in1, in2, in3, in4;
DATA_TYPE4 weights0, weights1, weights2, weights3;
for (short in_ch_blk = 0; in_ch_blk < in_ch_blks; ++in_ch_blk) {
const int in_idx = mul24(in_ch_blk, in_width);
int filter_x_part0 = in_ch_blk << 2;
int filter_x_idx = in_ch_blk << 2;
int filter_y_idx = mul24(out_ch_blk, 9);
int in_hb_idx = height_idx;
for (short hb_idx = 0; hb_idx < 3; ++hb_idx) {
int in_hb_value = select(in_hb_idx + batch_idx,
-1,
(in_hb_idx < 0 || in_hb_idx >= in_height));
int filter_x_part1 = 0;
int in_width_idx = 0;
for (short width_idx = 0; width_idx < 3; ++width_idx) {
int in_width_value;
......@@ -97,11 +94,10 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
#undef READ_INPUT
// int filter_idx = (hb_idx * 3 + width_idx) * rounded_in_ch + (in_ch_blk << 2);
int filter_idx = filter_x_part0 + filter_x_part1;
weights0 = READ_IMAGET(filter, SAMPLER, (int2)(filter_idx + 0, out_ch_blk));
weights1 = READ_IMAGET(filter, SAMPLER, (int2)(filter_idx + 1, out_ch_blk));
weights2 = READ_IMAGET(filter, SAMPLER, (int2)(filter_idx + 2, out_ch_blk));
weights3 = READ_IMAGET(filter, SAMPLER, (int2)(filter_idx + 3, out_ch_blk));
weights0 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 0, filter_y_idx));
weights1 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 1, filter_y_idx));
weights2 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 2, filter_y_idx));
weights3 = READ_IMAGET(filter, SAMPLER, (int2)(filter_x_idx + 3, filter_y_idx));
out0 = mad(in0.x, weights0, out0);
out0 = mad(in0.y, weights1, out0);
......@@ -129,10 +125,9 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
out4 = mad(in4.z, weights2, out4);
out4 = mad(in4.w, weights3, out4);
filter_x_part1 += rounded_in_ch;
in_width_idx += dilation_w;
filter_y_idx += 1;
}
filter_x_part0 += rounded_in_ch_x_3;
in_hb_idx += dilation_h;
}
}
......
......@@ -23,13 +23,13 @@ void CalInOutputImageShape(const std::vector<index_t> &shape, /* NHWC */
(*image_shape)[1] = shape[0] * shape[1];
}
// [RoundUp<4>(Ic) * H * W, (Oc + 3) / 4]
// [RoundUp<4>(Ic), H * W * (Oc + 3) / 4]
void CalConv2dFilterImageShape(const std::vector<index_t> &shape, /* HWOI */
std::vector<size_t> *image_shape) {
MACE_CHECK(shape.size() == 4);
image_shape->resize(2);
(*image_shape)[0] = shape[0] * shape[1] * RoundUp<index_t>(shape[3], 4);
(*image_shape)[1] = RoundUpDiv4(shape[2]);
(*image_shape)[0] = RoundUp<index_t>(shape[3], 4);
(*image_shape)[1] = shape[0] * shape[1] * RoundUpDiv4(shape[2]);
}
// [H * W * M, (Ic + 3) / 4]
......
......@@ -114,6 +114,7 @@ static void Conv2d(int iters,
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, OPENCL); \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, half, OPENCL);
BM_CONV_2D(1, 256, 64, 64, 3, 3, 1, 1, VALID, 256);
BM_CONV_2D(1, 512, 15, 15, 1, 1, 1, 1, VALID, 1024);
......@@ -135,6 +136,8 @@ BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, 1, SAME, 128);
BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, 1, SAME, 128);
BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, 1, SAME, 128);
BM_CONV_2D(1, 1024, 16, 16, 15, 1, 1, 1, SAME, 2);
// Dilation
BM_CONV_2D(1, 32, 256, 256, 3, 3, 1, 2, VALID, 32);
BM_CONV_2D(1, 32, 256, 256, 3, 3, 1, 4, VALID, 32);
......
......@@ -7,11 +7,11 @@ package(
licenses(["notice"]) # Apache 2.0
load("//mace:mace.bzl", "if_android")
cc_library(
name = "public",
hdrs = [
"mace.h",
"mace_runtime.h",
"mace_types.h",
],
)
......@@ -2,6 +2,9 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
// This file defines core MACE APIs.
// There APIs will be stable and backward compatible.
#ifndef MACE_PUBLIC_MACE_H_
#define MACE_PUBLIC_MACE_H_
......@@ -13,415 +16,60 @@
namespace mace {
#define MACE_MAJOR_VERSION 0
#define MACE_MINOR_VERSION 1
#define MACE_PATCH_VERSION 0
// MACE_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
// "-beta", "-rc", "-rc.1")
#define MACE_VERSION_SUFFIX ""
#define MACE_STR_HELPER(x) #x
#define MACE_STR(x) MACE_STR_HELPER(x)
// e.g. "0.5.0" or "0.6.0-alpha".
#define MACE_VERSION_STRING \
(MACE_STR(MACE_MAJOR_VERSION) "." MACE_STR(MACE_MINOR_VERSION) "." MACE_STR( \
MACE_PATCH_VERSION) MACE_VERSION_SUFFIX)
inline const char *MaceVersion() { return MACE_VERSION_STRING; }
extern const char *MaceGitVersion();
// Disable the copy and assignment operator for a class.
#ifndef DISABLE_COPY_AND_ASSIGN
#define DISABLE_COPY_AND_ASSIGN(classname) \
private: \
classname(const classname &) = delete; \
classname &operator=(const classname &) = delete
#endif
enum NetMode { INIT = 0, NORMAL = 1 };
const char *MaceVersion();
enum DeviceType { CPU = 0, NEON = 1, OPENCL = 2, HEXAGON = 3 };
enum DataType {
DT_INVALID = 0,
DT_FLOAT = 1,
DT_DOUBLE = 2,
DT_INT32 = 3,
DT_UINT8 = 4,
DT_INT16 = 5,
DT_INT8 = 6,
DT_STRING = 7,
DT_INT64 = 8,
DT_UINT16 = 9,
DT_BOOL = 10,
DT_HALF = 19,
DT_UINT32 = 22
};
enum GPUPerfHint {
PERF_DEFAULT = 0,
PERF_LOW = 1,
PERF_NORMAL = 2,
PERF_HIGH = 3
};
enum GPUPriorityHint {
PRIORITY_DEFAULT = 0,
PRIORITY_LOW = 1,
PRIORITY_NORMAL = 2,
PRIORITY_HIGH = 3
};
enum CPUPowerOption { DEFAULT = 0, HIGH_PERFORMANCE = 1, BATTERY_SAVE = 2};
class ConstTensor {
public:
ConstTensor(const std::string &name,
const unsigned char *data,
const std::vector<int64_t> &dims,
const DataType data_type = DT_FLOAT,
uint32_t node_id = 0);
ConstTensor(const std::string &name,
const unsigned char *data,
const std::vector<int64_t> &dims,
const int data_type,
uint32_t node_id = 0);
const std::string &name() const;
const unsigned char *data() const;
int64_t data_size() const;
const std::vector<int64_t> &dims() const;
DataType data_type() const;
uint32_t node_id() const;
private:
const std::string name_;
const unsigned char *data_;
const int64_t data_size_;
const std::vector<int64_t> dims_;
const DataType data_type_;
const uint32_t node_id_;
};
class Argument {
public:
Argument();
void CopyFrom(const Argument &from);
public:
const std::string &name() const;
void set_name(const std::string &value);
bool has_f() const;
float f() const;
void set_f(float value);
bool has_i() const;
int64_t i() const;
void set_i(int64_t value);
bool has_s() const;
std::string s() const;
void set_s(const std::string &value);
const std::vector<float> &floats() const;
void add_floats(float value);
void set_floats(const std::vector<float> &value);
const std::vector<int64_t> &ints() const;
void add_ints(int64_t value);
void set_ints(const std::vector<int64_t> &value);
const std::vector<std::string> &strings() const;
void add_strings(const ::std::string &value);
void set_strings(const std::vector<std::string> &value);
private:
void set_has_f();
void set_has_i();
void set_has_s();
private:
std::string name_;
float f_;
int64_t i_;
std::string s_;
std::vector<float> floats_;
std::vector<int64_t> ints_;
std::vector<std::string> strings_;
uint32_t has_bits_;
};
class NodeInput {
public:
NodeInput() {}
NodeInput(int node_id, int output_port);
void CopyFrom(const NodeInput &from);
public:
int node_id() const;
void set_node_id(int node_id);
int output_port() const;
void set_output_port(int output_port);
private:
int node_id_;
int output_port_;
};
class OutputShape {
public:
OutputShape();
OutputShape(const std::vector<int64_t> &dims); // NOLINT(runtime/explicit)
void CopyFrom(const OutputShape &from);
public:
const std::vector<int64_t> &dims() const;
private:
std::vector<int64_t> dims_;
};
class OperatorDef {
public:
void CopyFrom(const OperatorDef &from);
public:
const std::string &name() const;
void set_name(const std::string &name_);
bool has_name() const;
const std::string &type() const;
void set_type(const std::string &type_);
bool has_type() const;
const std::vector<int> &mem_id() const;
void set_mem_id(const std::vector<int> &value);
uint32_t node_id() const;
void set_node_id(uint32_t node_id);
uint32_t op_id() const;
uint32_t padding() const;
void set_padding(uint32_t padding);
const std::vector<NodeInput> &node_input() const;
void add_node_input(const NodeInput &value);
const std::vector<int> &out_max_byte_size() const;
void add_out_max_byte_size(int value);
const std::vector<std::string> &input() const;
const std::string &input(int index) const;
std::string *add_input();
void add_input(const ::std::string &value);
void add_input(::std::string &&value);
void set_input(const std::vector<std::string> &value);
const std::vector<std::string> &output() const;
const std::string &output(int index) const;
std::string *add_output();
void add_output(const ::std::string &value);
void add_output(::std::string &&value);
void set_output(const std::vector<std::string> &value);
const std::vector<Argument> &arg() const;
Argument *add_arg();
const std::vector<OutputShape> &output_shape() const;
void add_output_shape(const OutputShape &value);
const std::vector<DataType> &output_type() const;
void set_output_type(const std::vector<DataType> &value);
private:
void set_has_name();
void set_has_type();
void set_has_mem_id();
private:
std::string name_;
std::string type_;
std::vector<std::string> input_;
std::vector<std::string> output_;
std::vector<Argument> arg_;
std::vector<OutputShape> output_shape_;
std::vector<DataType> output_type_;
std::vector<int> mem_id_;
// nnlib
uint32_t node_id_;
uint32_t op_id_;
uint32_t padding_;
std::vector<NodeInput> node_input_;
std::vector<int> out_max_byte_size_;
uint32_t has_bits_;
};
class MemoryBlock {
public:
MemoryBlock(int mem_id, uint32_t x, uint32_t y);
public:
int mem_id() const;
uint32_t x() const;
uint32_t y() const;
private:
int mem_id_;
uint32_t x_;
uint32_t y_;
};
class MemoryArena {
public:
const std::vector<MemoryBlock> &mem_block() const;
std::vector<MemoryBlock> &mutable_mem_block();
int mem_block_size() const;
private:
std::vector<MemoryBlock> mem_block_;
};
enum MaceStatus { MACE_SUCCESS = 0, MACE_INVALID_ARGS = 1 };
// for hexagon mace-nnlib
class InputInfo {
// MACE input/output tensor
class MaceTensor {
public:
const std::string &name() const;
int32_t node_id() const;
int32_t max_byte_size() const;
DataType data_type() const;
const std::vector<int32_t> &dims() const;
// shape - the shape of the tensor, with size n
// data - the buffer of the tensor, must not be null with size equals
// shape[0] * shape[1] * ... * shape[n-1]
explicit MaceTensor(const std::vector<int64_t> &shape,
std::shared_ptr<float> data);
MaceTensor();
MaceTensor(const MaceTensor &other);
MaceTensor(const MaceTensor &&other);
MaceTensor &operator=(const MaceTensor &other);
MaceTensor &operator=(const MaceTensor &&other);
~MaceTensor();
private:
std::string name_;
int32_t node_id_;
int32_t max_byte_size_; // only support 32-bit len
DataType data_type_;
std::vector<int32_t> dims_;
};
class OutputInfo {
public:
const std::string &name() const;
int32_t node_id() const;
int32_t max_byte_size() const;
DataType data_type() const;
void set_data_type(DataType data_type);
const std::vector<int32_t> &dims() const;
void set_dims(const std::vector<int32_t> &dims);
const std::vector<int64_t> &shape() const;
const std::shared_ptr<float> data() const;
std::shared_ptr<float> data();
private:
std::string name_;
int32_t node_id_;
int32_t max_byte_size_; // only support 32-bit len
DataType data_type_;
std::vector<int32_t> dims_;
class Impl;
std::unique_ptr<Impl> impl_;
};
class NetDef {
public:
NetDef();
int op_size() const;
const OperatorDef &op(const int idx) const;
public:
const std::string &name() const;
bool has_name() const;
void set_name(const std::string &value);
const std::string &version() const;
bool has_version() const;
void set_version(const std::string &value);
const std::vector<OperatorDef> &op() const;
OperatorDef *add_op();
std::vector<OperatorDef> &mutable_op();
const std::vector<Argument> &arg() const;
Argument *add_arg();
std::vector<Argument> &mutable_arg();
const std::vector<ConstTensor> &tensors() const;
std::vector<ConstTensor> &mutable_tensors();
const MemoryArena &mem_arena() const;
bool has_mem_arena() const;
MemoryArena &mutable_mem_arena();
const std::vector<InputInfo> &input_info() const;
const std::vector<OutputInfo> &output_info() const;
std::vector<OutputInfo> &mutable_output_info();
private:
void set_has_name();
void set_has_version();
void set_has_mem_arena();
private:
std::string name_;
std::string version_;
std::vector<OperatorDef> op_;
std::vector<Argument> arg_;
std::vector<ConstTensor> tensors_;
// for mem optimization
MemoryArena mem_arena_;
// for hexagon mace-nnlib
std::vector<InputInfo> input_info_;
std::vector<OutputInfo> output_info_;
uint32_t has_bits_;
};
struct CallStats {
int64_t start_micros;
int64_t end_micros;
};
struct OperatorStats {
std::string operator_name;
std::string type;
CallStats stats;
};
struct RunMetadata {
std::vector<OperatorStats> op_stats;
};
class Workspace;
class NetBase;
class OperatorRegistry;
class HexagonControlWrapper;
struct MaceInputInfo {
std::string name;
std::vector<int64_t> shape;
const float *data;
};
void ConfigOpenCLRuntime(GPUPerfHint, GPUPriorityHint);
void ConfigOmpThreadsAndAffinity(int omp_num_threads,
CPUPowerOption power_option);
class NetDef;
class RunMetadata;
class MaceEngine {
public:
// Single input and output
explicit MaceEngine(const NetDef *net_def, DeviceType device_type);
// Multiple input or output
explicit MaceEngine(const NetDef *net_def,
DeviceType device_type,
const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes);
~MaceEngine();
// Single input and output
bool Run(const float *input,
const std::vector<int64_t> &input_shape,
float *output);
// Single input and output for benchmark
bool Run(const float *input,
const std::vector<int64_t> &input_shape,
float *output,
RunMetadata *run_metadata);
// Multiple input or output
bool Run(
const std::vector<MaceInputInfo> &input,
std::map<std::string, float *> &output, // NOLINT(runtime/references)
RunMetadata *run_metadata = nullptr);
MaceEngine(const MaceEngine &) = delete;
MaceEngine &operator=(const MaceEngine &) = delete;
MaceStatus Run(const std::map<std::string, MaceTensor> &inputs,
std::map<std::string, MaceTensor> *outputs);
MaceStatus Run(const std::map<std::string, MaceTensor> &inputs,
std::map<std::string, MaceTensor> *outputs,
RunMetadata *run_metadata);
private:
std::shared_ptr<OperatorRegistry> op_registry_;
DeviceType device_type_;
std::unique_ptr<Workspace> ws_;
std::unique_ptr<NetBase> net_;
std::unique_ptr<HexagonControlWrapper> hexagon_controller_;
class Impl;
std::unique_ptr<Impl> impl_;
MaceEngine(const MaceEngine &) = delete;
MaceEngine &operator=(const MaceEngine &) = delete;
};
} // namespace mace
......
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
// This file defines runtime tuning APIs.
// These APIs are not stable.
#ifndef MACE_PUBLIC_MACE_RUNTIME_H_
#define MACE_PUBLIC_MACE_RUNTIME_H_
namespace mace {
enum GPUPerfHint {
PERF_DEFAULT = 0,
PERF_LOW = 1,
PERF_NORMAL = 2,
PERF_HIGH = 3
};
enum GPUPriorityHint {
PRIORITY_DEFAULT = 0,
PRIORITY_LOW = 1,
PRIORITY_NORMAL = 2,
PRIORITY_HIGH = 3
};
enum CPUPowerOption { DEFAULT = 0, HIGH_PERFORMANCE = 1, BATTERY_SAVE = 2 };
void ConfigOpenCLRuntime(GPUPerfHint, GPUPriorityHint);
void ConfigOmpThreadsAndAffinity(int omp_num_threads,
CPUPowerOption power_option);
} // namespace mace
#endif // MACE_PUBLIC_MACE_RUNTIME_H_
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
// This file defines data types used by net creation and benchmark tools.
// These APIs are not stable and should only be used by advanced users.
#ifndef MACE_PUBLIC_MACE_TYPES_H_
#define MACE_PUBLIC_MACE_TYPES_H_
#include <string>
#include <vector>
namespace mace {
// Disable the copy and assignment operator for a class.
#ifndef DISABLE_COPY_AND_ASSIGN
#define DISABLE_COPY_AND_ASSIGN(classname) \
private: \
classname(const classname &) = delete; \
classname &operator=(const classname &) = delete
#endif
enum NetMode { INIT = 0, NORMAL = 1 };
enum DataType {
DT_INVALID = 0,
DT_FLOAT = 1,
DT_DOUBLE = 2,
DT_INT32 = 3,
DT_UINT8 = 4,
DT_INT16 = 5,
DT_INT8 = 6,
DT_STRING = 7,
DT_INT64 = 8,
DT_UINT16 = 9,
DT_BOOL = 10,
DT_HALF = 19,
DT_UINT32 = 22
};
class ConstTensor {
public:
ConstTensor(const std::string &name,
const unsigned char *data,
const std::vector<int64_t> &dims,
const DataType data_type = DT_FLOAT,
uint32_t node_id = 0);
ConstTensor(const std::string &name,
const unsigned char *data,
const std::vector<int64_t> &dims,
const int data_type,
uint32_t node_id = 0);
const std::string &name() const;
const unsigned char *data() const;
int64_t data_size() const;
const std::vector<int64_t> &dims() const;
DataType data_type() const;
uint32_t node_id() const;
private:
const std::string name_;
const unsigned char *data_;
const int64_t data_size_;
const std::vector<int64_t> dims_;
const DataType data_type_;
const uint32_t node_id_;
};
class Argument {
public:
Argument();
void CopyFrom(const Argument &from);
public:
const std::string &name() const;
void set_name(const std::string &value);
bool has_f() const;
float f() const;
void set_f(float value);
bool has_i() const;
int64_t i() const;
void set_i(int64_t value);
bool has_s() const;
std::string s() const;
void set_s(const std::string &value);
const std::vector<float> &floats() const;
void add_floats(float value);
void set_floats(const std::vector<float> &value);
const std::vector<int64_t> &ints() const;
void add_ints(int64_t value);
void set_ints(const std::vector<int64_t> &value);
const std::vector<std::string> &strings() const;
void add_strings(const ::std::string &value);
void set_strings(const std::vector<std::string> &value);
private:
void set_has_f();
void set_has_i();
void set_has_s();
private:
std::string name_;
float f_;
int64_t i_;
std::string s_;
std::vector<float> floats_;
std::vector<int64_t> ints_;
std::vector<std::string> strings_;
uint32_t has_bits_;
};
class NodeInput {
public:
NodeInput() {}
NodeInput(int node_id, int output_port);
void CopyFrom(const NodeInput &from);
public:
int node_id() const;
void set_node_id(int node_id);
int output_port() const;
void set_output_port(int output_port);
private:
int node_id_;
int output_port_;
};
class OutputShape {
public:
OutputShape();
OutputShape(const std::vector<int64_t> &dims); // NOLINT(runtime/explicit)
void CopyFrom(const OutputShape &from);
public:
const std::vector<int64_t> &dims() const;
private:
std::vector<int64_t> dims_;
};
class OperatorDef {
public:
void CopyFrom(const OperatorDef &from);
public:
const std::string &name() const;
void set_name(const std::string &name_);
bool has_name() const;
const std::string &type() const;
void set_type(const std::string &type_);
bool has_type() const;
const std::vector<int> &mem_id() const;
void set_mem_id(const std::vector<int> &value);
uint32_t node_id() const;
void set_node_id(uint32_t node_id);
uint32_t op_id() const;
uint32_t padding() const;
void set_padding(uint32_t padding);
const std::vector<NodeInput> &node_input() const;
void add_node_input(const NodeInput &value);
const std::vector<int> &out_max_byte_size() const;
void add_out_max_byte_size(int value);
const std::vector<std::string> &input() const;
const std::string &input(int index) const;
std::string *add_input();
void add_input(const ::std::string &value);
void add_input(::std::string &&value);
void set_input(const std::vector<std::string> &value);
const std::vector<std::string> &output() const;
const std::string &output(int index) const;
std::string *add_output();
void add_output(const ::std::string &value);
void add_output(::std::string &&value);
void set_output(const std::vector<std::string> &value);
const std::vector<Argument> &arg() const;
Argument *add_arg();
const std::vector<OutputShape> &output_shape() const;
void add_output_shape(const OutputShape &value);
const std::vector<DataType> &output_type() const;
void set_output_type(const std::vector<DataType> &value);
private:
void set_has_name();
void set_has_type();
void set_has_mem_id();
private:
std::string name_;
std::string type_;
std::vector<std::string> input_;
std::vector<std::string> output_;
std::vector<Argument> arg_;
std::vector<OutputShape> output_shape_;
std::vector<DataType> output_type_;
std::vector<int> mem_id_;
// nnlib
uint32_t node_id_;
uint32_t op_id_;
uint32_t padding_;
std::vector<NodeInput> node_input_;
std::vector<int> out_max_byte_size_;
uint32_t has_bits_;
};
class MemoryBlock {
public:
MemoryBlock(int mem_id, uint32_t x, uint32_t y);
public:
int mem_id() const;
uint32_t x() const;
uint32_t y() const;
private:
int mem_id_;
uint32_t x_;
uint32_t y_;
};
class MemoryArena {
public:
const std::vector<MemoryBlock> &mem_block() const;
std::vector<MemoryBlock> &mutable_mem_block();
int mem_block_size() const;
private:
std::vector<MemoryBlock> mem_block_;
};
// for hexagon mace-nnlib
class InputInfo {
public:
const std::string &name() const;
int32_t node_id() const;
int32_t max_byte_size() const;
DataType data_type() const;
const std::vector<int32_t> &dims() const;
private:
std::string name_;
int32_t node_id_;
int32_t max_byte_size_; // only support 32-bit len
DataType data_type_;
std::vector<int32_t> dims_;
};
class OutputInfo {
public:
const std::string &name() const;
int32_t node_id() const;
int32_t max_byte_size() const;
DataType data_type() const;
void set_data_type(DataType data_type);
const std::vector<int32_t> &dims() const;
void set_dims(const std::vector<int32_t> &dims);
private:
std::string name_;
int32_t node_id_;
int32_t max_byte_size_; // only support 32-bit len
DataType data_type_;
std::vector<int32_t> dims_;
};
class NetDef {
public:
NetDef();
int op_size() const;
const OperatorDef &op(const int idx) const;
public:
const std::string &name() const;
bool has_name() const;
void set_name(const std::string &value);
const std::string &version() const;
bool has_version() const;
void set_version(const std::string &value);
const std::vector<OperatorDef> &op() const;
OperatorDef *add_op();
std::vector<OperatorDef> &mutable_op();
const std::vector<Argument> &arg() const;
Argument *add_arg();
std::vector<Argument> &mutable_arg();
const std::vector<ConstTensor> &tensors() const;
std::vector<ConstTensor> &mutable_tensors();
const MemoryArena &mem_arena() const;
bool has_mem_arena() const;
MemoryArena &mutable_mem_arena();
const std::vector<InputInfo> &input_info() const;
const std::vector<OutputInfo> &output_info() const;
std::vector<OutputInfo> &mutable_output_info();
private:
void set_has_name();
void set_has_version();
void set_has_mem_arena();
private:
std::string name_;
std::string version_;
std::vector<OperatorDef> op_;
std::vector<Argument> arg_;
std::vector<ConstTensor> tensors_;
// for mem optimization
MemoryArena mem_arena_;
// for hexagon mace-nnlib
std::vector<InputInfo> input_info_;
std::vector<OutputInfo> output_info_;
uint32_t has_bits_;
};
struct CallStats {
int64_t start_micros;
int64_t end_micros;
};
struct OperatorStats {
std::string operator_name;
std::string type;
CallStats stats;
};
struct RunMetadata {
std::vector<OperatorStats> op_stats;
};
} // namespace mace
#endif // MACE_PUBLIC_MACE_TYPES_H_
......@@ -72,9 +72,9 @@ class Shapes(object):
output_shape = np.zeros_like(input_shape)
output_shape[0] = input_shape[0]
output_shape[1] = int(round_func((input_shape[1] + paddings[0] - filter_shape[0]
- (filter_shape[0] - 1) * (dilations[0] - 1)) / float(strides[0]))) + 1
- (filter_shape[0] - 1) * (dilations[0] - 1)) / float(strides[0]))) + 1
output_shape[2] = int(round_func((input_shape[2] + paddings[1] - filter_shape[1]
- (filter_shape[1] - 1) * (dilations[1] - 1)) / float(strides[1]))) + 1
- (filter_shape[1] - 1) * (dilations[1] - 1)) / float(strides[1]))) + 1
output_shape[3] = filter_shape[2]
return output_shape
......@@ -247,12 +247,9 @@ class CaffeConverter(object):
arg.i = self.dt
return output_name
def add_input_transform(self, names, is_single):
def add_input_transform(self, names):
for name in names:
if is_single:
new_input_name = MACE_INPUT_NODE_NAME + ":0"
else:
new_input_name = MACE_INPUT_NODE_NAME + '_' + name + ":0"
new_input_name = MACE_INPUT_NODE_NAME + '_' + name + ":0"
op_def = self.net_def.op.add()
op_def.name = name
op_def.type = 'BufferToImage'
......@@ -267,12 +264,9 @@ class CaffeConverter(object):
arg.name = 'T'
arg.i = self.dt
def add_output_transform(self, names, is_single):
def add_output_transform(self, names):
for name in names:
if is_single:
output_name = MACE_OUTPUT_NODE_NAME + ":0"
else:
output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0"
output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0"
op_def = self.net_def.op.add()
op_def.name = output_name[:-2]
op_def.type = 'ImageToBuffer'
......@@ -333,8 +327,18 @@ class CaffeConverter(object):
return pad, stride, kernel
def convert_conv2d(self, op):
op_def = self.CommonConvert(op, 'Conv2D')
param = op.layer.convolution_param
is_depthwise = False
if param.HasField('group'):
if param.group == op.data[0].shape[0] and op.data[0].shape[1] == 1:
is_depthwise = True
else:
raise Exception("Mace do not support group convolution yet")
if is_depthwise:
op_def = self.CommonConvert(op, 'DepthwiseConv2d')
else:
op_def = self.CommonConvert(op, 'Conv2D')
# Add filter
weight_tensor_name = op.name + '_weight:0'
......@@ -342,7 +346,7 @@ class CaffeConverter(object):
self.add_tensor(weight_tensor_name, weight_data)
if self.device == 'gpu':
buffer_type = "CONV2D_FILTER"
buffer_type = "DW_CONV2D_FILTER" if is_depthwise else "CONV2D_FILTER"
output_name = self.add_buffer_to_image(weight_tensor_name, buffer_type)
op_def.input.extend([output_name])
else:
......@@ -373,15 +377,16 @@ class CaffeConverter(object):
self.resolved_ops.add(op.name)
output_shape = Shapes.conv_pool_shape(op.get_single_parent().output_shape_map[op.layer.bottom[0]],
weight_data.shape,
paddings, strides, dilations,
math.floor)
weight_data.shape,
paddings, strides, dilations,
math.floor)
op.output_shape_map[op.layer.top[0]] = output_shape
if len(self.ops_map[final_op.name].children) == 1 \
and self.ops_map[final_op.name].children[0].type in activation_name_map:
activation_op = self.ops_map[final_op.name].children[0]
op_def.type = "FusedConv2D"
if not is_depthwise:
op_def.type = "FusedConv2D"
fused_act_arg = op_def.arg.add()
fused_act_arg.name = 'activation'
fused_act_arg.s = activation_name_map[activation_op.type]
......@@ -412,7 +417,7 @@ class CaffeConverter(object):
width = output_shape[0] * ((output_shape[1] + 1)/2) * ((output_shape[2]+1)/2)
return self.winograd and self.device == 'gpu' and \
filter_shape[0] == 3 and (filter_shape[0] == filter_shape[1]) and \
dilations[0] == 1 and (dilations[0] == dilations[1]) and\
dilations[0] == 1 and (dilations[0] == dilations[1]) and \
(strides[0] == 1) and (strides[0] == strides[1]) and \
(16 * filter_shape[2] < OPENCL_IMAGE_MAX_SIZE) and \
(16 * filter_shape[3] < OPENCL_IMAGE_MAX_SIZE) and \
......@@ -662,7 +667,7 @@ class CaffeConverter(object):
filter_shape = [kernels[0], kernels[1], input_shape[3], input_shape[3]]
output_shape = Shapes.conv_pool_shape(input_shape, filter_shape,
paddings, strides, [1, 1], math.ceil)
paddings, strides, [1, 1], math.ceil)
op.output_shape_map[op.layer.top[0]] = output_shape
op_def.output.extend([op.name + ':0'])
......@@ -764,7 +769,7 @@ class CaffeConverter(object):
input_shape = op.parents[0].output_shape_map[op.layer.bottom[0]]
num_outputs = len(op.layer.top)
if (input_shape[3] % num_outputs) != 0 or \
(self.device == 'gpu' and ((input_shape[3] / num_outputs) % 4 != 0)) :
(self.device == 'gpu' and ((input_shape[3] / num_outputs) % 4 != 0)) :
raise Exception('Mace do not support slice with input shape '
+ str(input_shape) + ' and number of output ' + str(num_outputs))
output_shape = Shapes.slice_shape(input_shape, num_outputs)
......@@ -789,7 +794,6 @@ class CaffeConverter(object):
input_shape = op.parents[0].output_shape_map[op.layer.bottom[0]]
output_shape = input_shape
shape_param = np.asarray(op.layer.reshape_param.shape.dim)[[0, 3, 2, 1]]
print shape_param
for i in range(len(shape_param)):
if shape_param[i] != 0:
output_shape[i] = shape_param[i]
......@@ -844,29 +848,20 @@ class CaffeConverter(object):
self.net_def.op.extend([op_def])
self.resolved_ops.add(op.name)
def replace_in_out_name(self, input_names, output_names, is_single):
def replace_in_out_name(self, input_names, output_names):
in_names = set([input_name + ":0" for input_name in input_names])
out_names = set([output_name + ":0" for output_name in output_names])
if is_single:
for op in self.net_def.op:
for i in range(len(op.input)):
if op.input[i] in in_names:
op.input[i] = MACE_INPUT_NODE_NAME + ':0'
for i in range(len(op.output)):
if op.output[i] in out_names:
op.output[i] = MACE_OUTPUT_NODE_NAME + ':0'
else:
for op in self.net_def.op:
for i in range(len(op.input)):
if op.input[i] in in_names:
op.input[i] = MACE_INPUT_NODE_NAME + '_' + op.input[i]
if op.input[i] in out_names:
op.input[i] = MACE_OUTPUT_NODE_NAME + '_' + op.input[i]
for i in range(len(op.output)):
if op.output[i] in in_names:
op.output[i] = MACE_INPUT_NODE_NAME + '_' + op.output[i]
if op.output[i] in out_names:
op.output[i] = MACE_OUTPUT_NODE_NAME + '_' + op.output[i]
for op in self.net_def.op:
for i in range(len(op.input)):
if op.input[i] in in_names:
op.input[i] = MACE_INPUT_NODE_NAME + '_' + op.input[i]
if op.input[i] in out_names:
op.input[i] = MACE_OUTPUT_NODE_NAME + '_' + op.input[i]
for i in range(len(op.output)):
if op.output[i] in in_names:
op.output[i] = MACE_INPUT_NODE_NAME + '_' + op.output[i]
if op.output[i] in out_names:
op.output[i] = MACE_OUTPUT_NODE_NAME + '_' + op.output[i]
def add_input_op_shape(self, input_nodes, input_shapes):
assert len(input_nodes) == len(input_shapes)
......@@ -878,9 +873,8 @@ class CaffeConverter(object):
input_op.output_shape_map[input_op.name] = input_shapes[i]
def convert(self, input_nodes, input_shapes, output_nodes):
is_single = len(input_nodes) == 1 and len(output_nodes) == 1
if self.device == 'gpu':
self.add_input_transform(input_nodes, is_single)
self.add_input_transform(input_nodes)
assert self.ops[0].type == 'Input'
self.add_input_op_shape(input_nodes, input_shapes)
......@@ -925,10 +919,10 @@ class CaffeConverter(object):
raise Exception('Unknown Op: %s, type: %s' % (op.name, op.type))
if self.device == 'gpu':
self.add_output_transform(output_nodes, is_single)
self.add_output_transform(output_nodes)
if self.device == 'cpu':
self.replace_in_out_name(input_nodes, output_nodes, is_single)
self.replace_in_out_name(input_nodes, output_nodes)
for op in self.ops:
if op.name not in self.resolved_ops:
......@@ -967,3 +961,4 @@ def convert_to_mace_pb(model_file, weight_file, input_node_str, input_shape_str,
print "Memory optimization done."
return net_def
//
// Copyright (c) 2017 XiaoMi All rights reserved.
// Generated by the mace converter. DO NOT EDIT!
//
// Generated by the mace converter. DO NOT EDIT!
#ifndef MACE_CODEGEN_MODELS_{{tag|upper}}_{{tag|upper}}_H_
#define MACE_CODEGEN_MODELS_{{tag|upper}}_{{tag|upper}}_H_
#include <string>
......@@ -10,13 +13,16 @@
namespace mace {
namespace {{tag}} {
extern const unsigned char *LoadModelData(const char *model_data_file);
const unsigned char *LoadModelData(const char *model_data_file);
extern void UnloadModelData(const unsigned char *model_data);
void UnloadModelData(const unsigned char *model_data);
extern NetDef CreateNet(const unsigned char *model_data);
NetDef CreateNet(const unsigned char *model_data);
extern const std::string ModelChecksum();
const std::string ModelChecksum();
} // namespace {{ tag }}
} // namespace mace
#endif // MACE_CODEGEN_MODELS_{{tag|upper}}_{{tag|upper}}_H_
......@@ -118,12 +118,9 @@ class TFConverter(object):
arg.i = self.dt
return output_name
def add_input_transform(self, names, is_single):
def add_input_transform(self, names):
for name in names:
if is_single:
new_input_name = MACE_INPUT_NODE_NAME + ":0"
else:
new_input_name = MACE_INPUT_NODE_NAME + '_' + name + ":0"
new_input_name = MACE_INPUT_NODE_NAME + '_' + name + ":0"
op_def = self.net_def.op.add()
op_def.name = name
op_def.type = 'BufferToImage'
......@@ -138,12 +135,9 @@ class TFConverter(object):
arg.name = 'T'
arg.i = self.dt
def add_output_transform(self, names, is_single):
def add_output_transform(self, names):
for name in names:
if is_single:
output_name = MACE_OUTPUT_NODE_NAME + ":0"
else:
output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0"
output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0"
op_def = self.net_def.op.add()
op_def.name = output_name[:-2]
op_def.type = 'ImageToBuffer'
......@@ -362,7 +356,8 @@ class TFConverter(object):
if len(self.tf_graph.get(final_op.name, [])) == 1 \
and self.tf_graph[final_op.name][0].type in activation_name_map:
activation_op = self.tf_graph[final_op.name][0]
op_def.type = "FusedConv2D"
if op_def.type == "Conv2D":
op_def.type = "FusedConv2D"
fused_act_arg = op_def.arg.add()
fused_act_arg.name = 'activation'
fused_act_arg.s = activation_name_map[activation_op.type]
......@@ -805,26 +800,18 @@ class TFConverter(object):
self.add_output_shape(op.outputs, op_def)
self.resolved_ops[op.name] = 1
def replace_in_out_name(self, input_names, output_names, is_single):
def replace_in_out_name(self, input_names, output_names):
in_names = set([input_name + ":0" for input_name in input_names])
out_names = set([output_name + ":0" for output_name in output_names])
if is_single:
for op in self.net_def.op:
if len(op.input) > 0 and op.input[0] in in_names:
op.input[0] = MACE_INPUT_NODE_NAME + ':0'
if len(op.output) > 0 and op.output[0] in out_names:
op.output[0] = MACE_OUTPUT_NODE_NAME + ':0'
else:
for op in self.net_def.op:
if len(op.input) > 0 and op.input[0] in in_names:
op.input[0] = MACE_INPUT_NODE_NAME + '_' + op.input[0]
if len(op.output) > 0 and op.output[0] in out_names:
op.output[0] = MACE_OUTPUT_NODE_NAME + '_' + op.output[0]
for op in self.net_def.op:
if op.input[0] in in_names:
op.input[0] = MACE_INPUT_NODE_NAME + '_' + op.input[0]
if op.output[0] in out_names:
op.output[0] = MACE_OUTPUT_NODE_NAME + '_' + op.output[0]
def convert(self, input_nodes, output_nodes):
is_single = len(input_nodes) == 1 and len(output_nodes) == 1
if self.device == 'gpu':
self.add_input_transform(input_nodes, is_single)
self.add_input_transform(input_nodes)
for op in self.tf_ops:
if self.resolved_ops[op.name] == 1:
......@@ -892,10 +879,10 @@ class TFConverter(object):
raise Exception('Unknown Op: %s, type: %s' % (op.name, op.type))
if self.device == 'gpu':
self.add_output_transform(output_nodes, is_single)
self.add_output_transform(output_nodes)
if self.device == 'cpu':
self.replace_in_out_name(input_nodes, output_nodes, is_single)
self.replace_in_out_name(input_nodes, output_nodes)
for key in self.resolved_ops:
if self.resolved_ops[key] != 1:
......
......@@ -25,6 +25,6 @@ cat <<EOF > ${OUTPUT_FILENAME}
// This is a generated file, DO NOT EDIT
namespace mace {
const char *MaceGitVersion() { return "${GIT_VERSION}"; }
const char *MaceVersion() { return "${GIT_VERSION}"; }
} // namespace mace
EOF
......@@ -12,6 +12,7 @@
#include <utility>
#include "mace/public/mace.h"
#include "mace/public/mace_types.h"
#include "mace/utils/env_time.h"
#include "mace/utils/string_util.h"
......
......@@ -76,26 +76,28 @@ def generate_random_input(target_soc, model_output_dir,
target_soc, model_output_dir, int(generate_data_or_not))
run_command(command)
input_name_list = []
input_file_list = []
if isinstance(input_names, list):
input_name_list.extend(input_names)
else:
input_name_list.append(input_names)
if isinstance(input_files, list):
input_file_list.extend(input_files)
else:
input_file_list.append(input_files)
assert len(input_file_list) == len(input_name_list)
for i in range(len(input_file_list)):
if input_file_list[i] is not None:
dst_input_file = model_output_dir + '/' + input_file_name(input_name_list[i])
if input_file_list[i].startswith("http://") or \
input_file_list[i].startswith("https://"):
urllib.urlretrieve(input_file_list[i], dst_input_file)
else:
print 'Copy input data:', dst_input_file
shutil.copy(input_file_list[i], dst_input_file)
if len(input_file_list) != 0:
input_name_list = []
if isinstance(input_names, list):
input_name_list.extend(input_names)
else:
input_name_list.append(input_names)
if len(input_file_list) != len(input_name_list):
raise Exception('If input_files set, the input files should match the input names.')
for i in range(len(input_file_list)):
if input_file_list[i] is not None:
dst_input_file = model_output_dir + '/' + input_file_name(input_name_list[i])
if input_file_list[i].startswith("http://") or \
input_file_list[i].startswith("https://"):
urllib.urlretrieve(input_file_list[i], dst_input_file)
else:
print 'Copy input data:', dst_input_file
shutil.copy(input_file_list[i], dst_input_file)
def generate_model_code():
command = "bash tools/generate_model_code.sh"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册