提交 9a91a63d 编写于 作者: Y yejianwu

merge with origin

......@@ -7,21 +7,32 @@ package(
licenses(["notice"]) # Apache 2.0
load("//mace:mace.bzl", "if_android", "if_profiling_enabled")
load("//mace:mace.bzl", "if_android", "if_android_armv7", "if_android_arm64",
"if_profiling_enabled")
cc_library(
name = "core",
srcs = glob([
"*.cc",
"runtime/opencl/*.cc",
], exclude=["runtime/opencl/opencl_binary_linkage.cc",
"runtime/hexagon/*.cc",
],
exclude = [
"runtime/opencl/opencl_binary_linkage.cc",
"runtime/opencl/opencl_source_linkage.cc",
"*_test.cc",
"runtime/hexagon/hexagon_controller_dummy.cc",
]) + if_android_armv7([
"runtime/hexagon/libhexagon_controller.so",
]) + if_android_arm64([
"runtime/hexagon/hexagon_controller_dummy.cc",
]),
hdrs = glob([
"*.h",
"public/*.h",
"runtime/opencl/cl2.hpp",
"runtime/opencl/*.h",
"runtime/hexagon/*.h",
]),
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1", "-Werror=return-type"] +
if_profiling_enabled(["-DMACE_OPENCL_PROFILING"]),
......
......@@ -24,5 +24,6 @@ Allocator *GetDeviceAllocator(DeviceType type) {
MACE_REGISTER_ALLOCATOR(DeviceType::CPU, new CPUAllocator());
MACE_REGISTER_ALLOCATOR(DeviceType::NEON, new CPUAllocator());
MACE_REGISTER_ALLOCATOR(DeviceType::OPENCL, new OpenCLAllocator());
MACE_REGISTER_ALLOCATOR(DeviceType::HEXAGON, new CPUAllocator());
} // namespace mace
......@@ -5,9 +5,7 @@
#include "mace/core/public/mace.h"
#include "mace/core/types.h"
#include "mace/core/net.h"
#include "mace/core/operator.h"
#include "mace/core/workspace.h"
#include "mace/utils/logging.h"
#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
namespace mace {
......@@ -16,26 +14,26 @@ ConstTensor::ConstTensor(const std::string &name,
const std::vector<int64_t> &dims,
const DataType data_type,
uint32_t node_id) :
name_(name),
data_(data),
data_size_(std::accumulate(dims.begin(), dims.end(), 1,
std::multiplies<int64_t>())),
dims_(dims.begin(), dims.end()),
data_type_(data_type),
node_id_(node_id) {}
name_(name),
data_(data),
data_size_(std::accumulate(dims.begin(), dims.end(), 1,
std::multiplies<int64_t>())),
dims_(dims.begin(), dims.end()),
data_type_(data_type),
node_id_(node_id) {}
ConstTensor::ConstTensor(const std::string &name,
unsigned char *data,
const std::vector<int64_t> &dims,
const int data_type,
uint32_t node_id) :
name_(name),
data_(data),
data_size_(std::accumulate(dims.begin(), dims.end(), 1,
std::multiplies<int64_t>())),
dims_(dims.begin(), dims.end()),
data_type_(static_cast<DataType>(data_type)),
node_id_(node_id) {}
name_(name),
data_(data),
data_size_(std::accumulate(dims.begin(), dims.end(), 1,
std::multiplies<int64_t>())),
dims_(dims.begin(), dims.end()),
data_type_(static_cast<DataType>(data_type)),
node_id_(node_id) {}
const std::string &ConstTensor::name() const {
return name_;
......@@ -152,6 +150,8 @@ void Argument::set_strings(const std::vector<std::string> &value) {
}
// Node Input
NodeInput::NodeInput(int node_id, int output_port)
: node_id_(node_id), output_port_(output_port) {}
void NodeInput::CopyFrom(const NodeInput &from) {
node_id_ = from.node_id();
output_port_ = from.output_port();
......@@ -159,14 +159,20 @@ void NodeInput::CopyFrom(const NodeInput &from) {
int NodeInput::node_id() const {
return node_id_;
}
void NodeInput::set_node_id(int node_id) {
node_id_ = node_id;
}
int NodeInput::output_port() const {
return output_port_;
}
void NodeInput::set_output_port(int output_port) {
output_port_ = output_port;
}
// OutputShape
OutputShape::OutputShape() {}
OutputShape::OutputShape(const std::vector<int64_t> &dims):
dims_(dims.begin(), dims.end()) {}
OutputShape::OutputShape(const std::vector<int64_t> &dims) :
dims_(dims.begin(), dims.end()) {}
void OutputShape::CopyFrom(const OutputShape &from) {
auto from_dims = from.dims();
dims_.resize(from_dims.size());
......@@ -214,7 +220,9 @@ void OperatorDef::CopyFrom(const OperatorDef &from) {
}
auto from_out_max_byte_size = from.out_max_byte_size();
out_max_byte_size_.resize(from_out_max_byte_size.size());
std::copy(from_out_max_byte_size.begin(), from_out_max_byte_size.end(), out_max_byte_size_.begin());
std::copy(from_out_max_byte_size.begin(),
from_out_max_byte_size.end(),
out_max_byte_size_.begin());
has_bits_ = from.has_bits_;
......@@ -262,18 +270,30 @@ void OperatorDef::set_has_mem_id() {
uint32_t OperatorDef::node_id() const {
return node_id_;
}
void OperatorDef::set_node_id(uint32_t node_id) {
node_id_ = node_id;
}
uint32_t OperatorDef::op_id() const {
return op_id_;
}
uint32_t OperatorDef::padding() const {
return padding_;
}
void OperatorDef::set_padding(uint32_t padding) {
padding_ = padding;
}
const std::vector<NodeInput> &OperatorDef::node_input() const {
return node_input_;
}
void OperatorDef::add_node_input(const NodeInput &value) {
node_input_.push_back(value);
}
const std::vector<int> &OperatorDef::out_max_byte_size() const {
return out_max_byte_size_;
}
void OperatorDef::add_out_max_byte_size(int value) {
out_max_byte_size_.push_back(value);
}
const std::vector<std::string> &OperatorDef::input() const {
return input_;
}
......@@ -339,7 +359,7 @@ void OperatorDef::set_output_type(const std::vector<DataType> &value) {
// MemoryBlock
MemoryBlock::MemoryBlock(int mem_id, uint32_t x, uint32_t y) :
mem_id_(mem_id), x_(x), y_(y) {}
mem_id_(mem_id), x_(x), y_(y) {}
int MemoryBlock::mem_id() const {
return mem_id_;
......@@ -392,9 +412,15 @@ int32_t OutputInfo::max_byte_size() const {
DataType OutputInfo::data_type() const {
return data_type_;
}
void OutputInfo::set_data_type(DataType data_type) {
data_type_ = data_type;
}
const std::vector<int32_t> &OutputInfo::dims() const {
return dims_;
}
void OutputInfo::set_dims(const std::vector<int32_t> &dims) {
dims_ = dims;
}
// NetDef
NetDef::NetDef() : has_bits_(0) {}
......@@ -470,6 +496,9 @@ const std::vector<InputInfo> &NetDef::input_info() const {
const std::vector<OutputInfo> &NetDef::output_info() const {
return output_info_;
}
std::vector<OutputInfo> &NetDef::mutable_output_info() {
return output_info_;
}
int NetDef::op_size() const {
return op_.size();
......@@ -481,40 +510,66 @@ const OperatorDef &NetDef::op(const int idx) const {
}
// Mace Engine
MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type):
op_registry_(new OperatorRegistry()), device_type_(device_type),
ws_(new Workspace()), net_(nullptr) {
MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type) :
op_registry_(new OperatorRegistry()), device_type_(device_type),
ws_(new Workspace()), net_(nullptr), hexagon_controller_(nullptr) {
ws_->LoadModelTensor(*net_def, device_type);
if (device_type == HEXAGON) {
hexagon_controller_.reset(new HexagonControlWrapper());
hexagon_controller_->Init();
hexagon_controller_->SetDebugLevel(0);
hexagon_controller_->Config();
hexagon_controller_->SetupGraph(*net_def);
hexagon_controller_->PrintGraph();
} else {
ws_->LoadModelTensor(*net_def, device_type);
// Init model
auto net = CreateNet(op_registry_, *net_def, ws_.get(),
device_type, NetMode::INIT);
if(!net->Run()) {
LOG(FATAL) << "Net init run failed";
// Init model
auto net = CreateNet(op_registry_, *net_def, ws_.get(),
device_type, NetMode::INIT);
if (!net->Run()) {
LOG(FATAL) << "Net init run failed";
}
ws_->CreateTensor("mace_input_node:0",
GetDeviceAllocator(device_type_),
DT_FLOAT);
net_ = std::move(CreateNet(op_registry_, *net_def, ws_.get(), device_type));
}
ws_->CreateTensor("mace_input_node:0", GetDeviceAllocator(device_type_), DT_FLOAT);
net_ = std::move(CreateNet(op_registry_, *net_def, ws_.get(), device_type));
}
MaceEngine::~MaceEngine() = default;
MaceEngine::~MaceEngine() {
if (device_type_ == HEXAGON) {
hexagon_controller_->GetPerfInfo();
hexagon_controller_->PrintLog();
hexagon_controller_->TeardownGraph();
hexagon_controller_->Finalize();
}
};
bool MaceEngine::Run(const float *input,
const std::vector<index_t> &input_shape,
float *output) {
MACE_CHECK(output != nullptr, "output ptr cannot be NULL");
Tensor *input_tensor =
ws_->CreateTensor("mace_input_node:0", GetDeviceAllocator(device_type_), DT_FLOAT);
ws_->CreateTensor("mace_input_node:0",
GetDeviceAllocator(device_type_),
DT_FLOAT);
Tensor *output_tensor =
ws_->CreateTensor("mace_output_node:0",
GetDeviceAllocator(device_type_),
DT_FLOAT);
input_tensor->Resize(input_shape);
{
Tensor::MappingGuard input_guard(input_tensor);
float *input_data = input_tensor->mutable_data<float>();
memcpy(input_data, input, input_tensor->size() * sizeof(float));
}
if(!net_->Run()) {
LOG(FATAL) << "Net run failed";
if (device_type_ == HEXAGON) {
hexagon_controller_->ExecuteGraphPreQuantize(*input_tensor, output_tensor);
} else {
if (!net_->Run()) {
LOG(FATAL) << "Net run failed";
}
}
// save output
const Tensor *output_tensor = ws_->GetTensor("mace_output_node:0");
if (output_tensor != nullptr) {
Tensor::MappingGuard output_guard(output_tensor);
auto shape = output_tensor->shape();
......
......@@ -19,7 +19,8 @@ enum NetMode {
enum DeviceType {
CPU = 0,
NEON = 1,
OPENCL = 2
OPENCL = 2,
HEXAGON = 3
};
enum DataType {
......@@ -70,33 +71,33 @@ class ConstTensor {
class Argument {
public:
Argument();
void CopyFrom(const Argument &from) ;
void CopyFrom(const Argument &from);
public:
const std::string &name() const;
void set_name(const std::string& value);
void set_name(const std::string &value);
bool has_f() const;
float f() const ;
void set_f(float value) ;
bool has_i() const ;
int64_t i() const ;
float f() const;
void set_f(float value);
bool has_i() const;
int64_t i() const;
void set_i(int64_t value);
bool has_s() const ;
std::string s() const ;
void set_s(const std::string& value) ;
const std::vector<float> &floats() const ;
void add_floats(float value) ;
bool has_s() const;
std::string s() const;
void set_s(const std::string &value);
const std::vector<float> &floats() const;
void add_floats(float value);
void set_floats(const std::vector<float> &value);
const std::vector<int64_t> &ints() const ;
void add_ints(int64_t value) ;
const std::vector<int64_t> &ints() const;
void add_ints(int64_t value);
void set_ints(const std::vector<int64_t> &value);
const std::vector<std::string> &strings() const ;
void add_strings(const ::std::string& value) ;
const std::vector<std::string> &strings() const;
void add_strings(const ::std::string &value);
void set_strings(const std::vector<std::string> &value);
private:
void set_has_f() ;
void set_has_i() ;
void set_has_s() ;
void set_has_f();
void set_has_i();
void set_has_s();
private:
std::string name_;
......@@ -104,17 +105,21 @@ class Argument {
int64_t i_;
std::string s_;
std::vector<float> floats_;
std::vector<int64_t > ints_;
std::vector<int64_t> ints_;
std::vector<std::string> strings_;
uint32_t has_bits_;
};
class NodeInput {
public:
NodeInput() {}
NodeInput(int node_id, int output_port);
void CopyFrom(const NodeInput &from);
public:
int node_id() const;
void set_node_id(int node_id);
int output_port() const;
void set_output_port(int output_port);
private:
int node_id_;
int output_port_;
......@@ -146,24 +151,28 @@ class OperatorDef {
void set_mem_id(const int mem_id);
bool has_mem_id() const;
uint32_t node_id() const;
void set_node_id(uint32_t node_id);
uint32_t op_id() const;
uint32_t padding() const;
void set_padding(uint32_t padding);
const std::vector<NodeInput> &node_input() const;
void add_node_input(const NodeInput &value);
const std::vector<int> &out_max_byte_size() const;
void add_out_max_byte_size(int value);
const std::vector<std::string> &input() const;
const std::string& input(int index) const;
std::string* add_input();
void add_input(const ::std::string& value);
void add_input(::std::string&& value);
const std::string &input(int index) const;
std::string *add_input();
void add_input(const ::std::string &value);
void add_input(::std::string &&value);
void set_input(const std::vector<std::string> &value);
const std::vector<std::string> &output() const;
const std::string& output(int index) const;
std::string* add_output();
void add_output(const ::std::string& value);
void add_output(::std::string&& value);
const std::string &output(int index) const;
std::string *add_output();
void add_output(const ::std::string &value);
void add_output(::std::string &&value);
void set_output(const std::vector<std::string> &value);
const std::vector<Argument> &arg() const;
Argument* add_arg();
Argument *add_arg();
const std::vector<OutputShape> &output_shape() const;
void add_output_shape(const OutputShape &value);
const std::vector<DataType> &output_type() const;
......@@ -241,7 +250,9 @@ class OutputInfo {
int32_t node_id() const;
int32_t max_byte_size() const;
DataType data_type() const;
void set_data_type(DataType data_type);
const std::vector<int32_t> &dims() const;
void set_dims(const std::vector<int32_t> &dims);
private:
std::string name_;
int32_t node_id_;
......@@ -259,13 +270,13 @@ class NetDef {
public:
const std::string &name() const;
bool has_name() const;
void set_name(const std::string& value);
void set_name(const std::string &value);
const std::string &version() const;
bool has_version() const;
void set_version(const std::string& value);
void set_version(const std::string &value);
const std::vector<OperatorDef> &op() const;
OperatorDef* add_op();
OperatorDef *add_op();
std::vector<OperatorDef> &mutable_op();
const std::vector<Argument> &arg() const;
Argument *add_arg();
......@@ -277,6 +288,7 @@ class NetDef {
MemoryArena &mutable_mem_arena();
const std::vector<InputInfo> &input_info() const;
const std::vector<OutputInfo> &output_info() const;
std::vector<OutputInfo> &mutable_output_info();
private:
void set_has_name();
......@@ -303,6 +315,7 @@ class NetDef {
class Workspace;
class NetBase;
class OperatorRegistry;
class HexagonControlWrapper;
class MaceEngine {
public:
......@@ -312,14 +325,15 @@ class MaceEngine {
bool Run(const float *input,
const std::vector<int64_t> &input_shape,
float *output);
MaceEngine(const MaceEngine&) = delete;
MaceEngine &operator=(const MaceEngine&) = delete;
MaceEngine(const MaceEngine &) = delete;
MaceEngine &operator=(const MaceEngine &) = delete;
private:
std::shared_ptr<OperatorRegistry> op_registry_;
DeviceType device_type_;
std::unique_ptr<Workspace> ws_;
std::unique_ptr<NetBase> net_;
std::unique_ptr<HexagonControlWrapper> hexagon_controller_;
};
} // namespace mace
......
......@@ -2,8 +2,9 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/dsp/hexagon_control_wrapper.h"
#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
#include <fstream>
#include <unordered_map>
namespace mace {
......@@ -45,7 +46,7 @@ bool HexagonControlWrapper::Finalize() {
bool HexagonControlWrapper::SetupGraph(const NetDef& net_def) {
LOG(INFO) << "Hexagon setup graph";
// const node
for (const TensorProto& tensor_proto: net_def.tensors()) {
for (const ConstTensor& tensor_proto: net_def.tensors()) {
vector<int> tensor_shape(tensor_proto.dims().begin(),
tensor_proto.dims().end());
while (tensor_shape.size() < 4) {
......@@ -53,7 +54,7 @@ bool HexagonControlWrapper::SetupGraph(const NetDef& net_def) {
}
if (tensor_proto.data_type() == DataType::DT_INT32
&& tensor_proto.int32_data_size() == 0) {
&& tensor_proto.data_size() == 0) {
hexagon_nn_append_const_node(nn_id_, node_id(tensor_proto.node_id()),
tensor_shape[0], tensor_shape[1],
tensor_shape[2], tensor_shape[3],
......@@ -81,14 +82,14 @@ bool HexagonControlWrapper::SetupGraph(const NetDef& net_def) {
unsigned int op_id;
MACE_CHECK(hexagon_nn_op_name_to_id(op.type().data(), &op_id) == 0,
"invalid op: ", op.name(), ", type: ", op.type());
vector<hexagon_nn_input> inputs(op.node_input_size());
for (size_t i = 0; i < op.node_input_size(); ++i) {
inputs[i].src_id = node_id(op.node_input(i).node_id());
inputs[i].output_idx = op.node_input(i).output_port();
vector<hexagon_nn_input> inputs(op.node_input().size());
for (size_t i = 0; i < op.node_input().size(); ++i) {
inputs[i].src_id = node_id(op.node_input()[i].node_id());
inputs[i].output_idx = op.node_input()[i].output_port();
}
vector<hexagon_nn_output> outputs(op.out_max_byte_size_size());
for (size_t i = 0; i < op.out_max_byte_size_size(); ++i) {
outputs[i].max_size = op.out_max_byte_size(i);
vector<hexagon_nn_output> outputs(op.out_max_byte_size().size());
for (size_t i = 0; i < op.out_max_byte_size().size(); ++i) {
outputs[i].max_size = op.out_max_byte_size()[i];
}
hexagon_nn_padding_type padding_type = static_cast<hexagon_nn_padding_type>(
......@@ -136,20 +137,18 @@ bool HexagonControlWrapper::SetupGraph(const NetDef& net_def) {
output_shapes_.push_back(output_shape);
output_data_types_.push_back(output_info.data_type());
num_outputs_ += 1;
VLOG(0) << "OutputInfo: "
<< "\n\t shape: " << output_shape[0] << " " << output_shape[1]
<< " " << output_shape[2] << " " << output_shape[3]
<< "\n\t type: " << output_info.data_type();
}
VLOG(0) << "Magic";
bool res = hexagon_nn_prepare(nn_id_) == 0;
return res;
}
bool HexagonControlWrapper::SetupGraph(const std::string& model_file) {
std::ifstream file_stream(model_file, std::ios::in | std::ios::binary);
NetDef net_def;
net_def.ParseFromIstream(&file_stream);
file_stream.close();
return SetupGraph(net_def);
}
bool HexagonControlWrapper::TeardownGraph() {
LOG(INFO) << "Hexagon teardown graph";
return hexagon_nn_teardown(nn_id_) == 0;
......
......@@ -5,12 +5,11 @@
#ifndef MACE_DSP_HEXAGON_CONTROL_WRAPPER_H_
#define MACE_DSP_HEXAGON_CONTROL_WRAPPER_H_
#include "mace/dsp/hexagon/hexagon_controller.h"
#include "mace/dsp/hexagon_nn_ops.h"
#include "mace/dsp/util/quantize.h"
#include "mace/core/runtime/hexagon/hexagon_controller.h"
#include "mace/core/runtime/hexagon/quantize.h"
#include "mace/core/common.h"
#include "mace/core/tensor.h"
#include "mace/proto/mace.pb.h"
#include "mace/core/public/mace.h"
#include "mace/core/serializer.h"
namespace mace {
......@@ -23,7 +22,6 @@ class HexagonControlWrapper {
bool Init();
bool Finalize();
bool SetupGraph(const NetDef& net_def);
bool SetupGraph(const std::string &model_file);
bool ExecuteGraph(const Tensor &input_tensor, Tensor *output_tensor);
bool ExecuteGraphNew(const vector<Tensor>& input_tensors,
vector<Tensor> *output_tensors);
......
#ifndef MACE_DSP_HEXAGON_DSP_CONTROLLER_H_
#define MACE_DSP_HEXAGON_DSP_CONTROLLER_H_
#include "hexagon_nn.h"
#include "mace/core/runtime/hexagon/hexagon_nn.h"
#ifdef __cplusplus
extern "C" {
......
//
// Copyright (c) 2018 XiaoMi All rights reserved.
//
#include "mace/core/runtime/hexagon/hexagon_controller.h"
#include "mace/core/runtime/hexagon/hexagon_nn.h"
int hexagon_controller_InitHexagonWithMaxAttributes(int enable_dcvs,
int bus_usage) {
return 0;
}
int hexagon_controller_DeInitHexagon() {
return 0;
}
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_config)(void) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_init)(void) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_debug_level)(hexagon_nn_nn_id id, int level) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_snpprint)(hexagon_nn_nn_id id, unsigned char* buf, int bufLen) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_getlog)(hexagon_nn_nn_id id, unsigned char* buf, int bufLen) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node)(hexagon_nn_nn_id id, unsigned int node_id, unsigned int operation, hexagon_nn_padding_type padding, const hexagon_nn_input* inputs, int inputsLen, const hexagon_nn_output* outputs, int outputsLen) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node)(hexagon_nn_nn_id id, unsigned int node_id, unsigned int batches, unsigned int height, unsigned int width, unsigned int depth, const unsigned char* data, int dataLen) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_prepare)(hexagon_nn_nn_id id) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute)(hexagon_nn_nn_id id, unsigned int batches_in, unsigned int height_in, unsigned int width_in, unsigned int depth_in, const unsigned char* data_in, int data_inLen, unsigned int* batches_out, unsigned int* height_out, unsigned int* width_out, unsigned int* depth_out, unsigned char* data_out, int data_outLen, unsigned int* data_len_out) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_teardown)(hexagon_nn_nn_id id) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_powersave_level)(unsigned int level) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_get_perfinfo)(hexagon_nn_nn_id id, hexagon_nn_perfinfo* info_out, int info_outLen, unsigned int* n_items) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_reset_perfinfo)(hexagon_nn_nn_id id, unsigned int event) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_last_execution_cycles)(hexagon_nn_nn_id id, unsigned int* cycles_lo, unsigned int* cycles_hi) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_version)(int* ver) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_op_name_to_id)(const char* name, unsigned int* node_id) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_op_id_to_name)(unsigned int node_id, char* name, int nameLen) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_disable_dcvs)(void) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_GetHexagonBinaryVersion)(int* ver) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_PrintLog)(const unsigned char* buf, int bufLen) __QAIC_HEADER_ATTRIBUTE { return 0; }
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute_new)(hexagon_nn_nn_id id, const hexagon_nn_tensordef* inputs, int inputsLen, hexagon_nn_tensordef* outputs, int outputsLen) __QAIC_HEADER_ATTRIBUTE { return 0; }
......@@ -2,7 +2,7 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/dsp/util/quantize.h"
#include "mace/core/runtime/hexagon/quantize.h"
namespace mace {
......
......@@ -40,8 +40,8 @@ unique_ptr<Tensor> Serializer::Deserialize(const ConstTensor &proto,
proto.data_size());
break;
case DT_UINT8:
tensor->CopyWithCast<int32_t, uint8_t>(
reinterpret_cast<const int32_t *>(proto.data()), proto.data_size());
tensor->Copy<uint8_t>(reinterpret_cast<const uint8_t *>(proto.data()),
proto.data_size());
break;
case DT_INT16:
tensor->CopyWithCast<int32_t, uint16_t>(
......
# Description:
# Mace dsp.
#
# Only suport arm-v7a now
# bazel build -c opt mace/dsp:dsp --crosstool_top=//external:android/crosstool --host_crosstool_top=@bazel_tools//tools/cpp:toolchain --cpu=armeabi-v7a --verbose_failures
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"]) # Apache 2.0
load("//mace:mace.bzl", "if_android")
cc_library(
name = "dsp",
srcs = glob([
"*.cc",
"hexagon/libhexagon_controller.so",
], exclude = [
"*_test.cc",
]),
hdrs = glob([
"*.h",
"hexagon/*.h",
]),
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"],
deps = [
"//mace/proto:cc_proto",
"//mace/core:core",
"//mace/dsp/util:util",
],
)
cc_test(
name = "dsp_test",
testonly = 1,
srcs = glob(["*_test.cc"]),
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"],
linkopts = if_android([
"-ldl",
"-lm",
]),
linkstatic = 1,
deps = [
"@gtest//:gtest_main",
":dsp",
],
)
cc_test(
name = "dsp_op_test",
testonly = 1,
srcs = glob(["test/*_test.cc"]),
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"],
linkopts = if_android([
"-ldl",
"-lm",
]),
linkstatic = 1,
deps = [
"@gtest//:gtest_main",
":dsp",
"//mace/kernels:kernels",
],
)
cc_binary(
name = "mace_dsp_run",
srcs = [
"tool/mace_dsp_run.cc",
],
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"],
linkopts = if_android([
"-ldl",
"-lm",
]),
linkstatic = 1,
deps = [
":dsp",
"//mace/kernels:kernels",
"//mace/utils:command_line_flags",
],
)
\ No newline at end of file
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/dsp/hexagon_control_wrapper.h"
#include "mace/utils/logging.h"
#include "mace/utils/env_time.h"
#include "gtest/gtest.h"
using namespace mace;
TEST(HexagonControlerWrapper, InputFloat) {
testing::internal::LogToStderr();
HexagonControlWrapper wrapper;
VLOG(0) << "version: " << wrapper.GetVersion();
wrapper.Init();
wrapper.SetDebugLevel(0);
wrapper.Config();
VLOG(0) << wrapper.SetupGraph("quantized_icnet_dsp.pb");
wrapper.PrintGraph();
Tensor input_tensor;
Tensor output_tensor;
input_tensor.Resize({1, 480, 480, 3});
float *input_data = input_tensor.mutable_data<float>();
for (int i = 0; i < input_tensor.size(); ++i) {
input_data[i] = i % 256;
}
wrapper.ResetPerfInfo();
int64_t start_micros = utils::NowMicros();
int round = 10;
for (int i = 0; i < round; ++i) {
VLOG(0) << wrapper.ExecuteGraph(input_tensor, &output_tensor);
}
int64_t end_micros = utils::NowMicros();
VLOG(0) << "avg duration: " << (end_micros - start_micros) / (double)round
<< " ms";
wrapper.GetPerfInfo();
wrapper.PrintLog();
const float *output_data = output_tensor.data<float>();
VLOG(0) << output_tensor.size() << output_tensor.dtype();
for (int i = 0; i < output_tensor.size(); ++i) {
std::cout << output_data[i] << " ";
}
std::cout << std::endl;
VLOG(0) << wrapper.TeardownGraph();
wrapper.Finalize();
}
TEST(HexagonControlerWrapper, PreQuantize) {
testing::internal::LogToStderr();
HexagonControlWrapper wrapper;
VLOG(0) << "version: " << wrapper.GetVersion();
wrapper.Init();
wrapper.SetDebugLevel(0);
wrapper.Config();
VLOG(0) << wrapper.SetupGraph("quantized_icnet_dsp_u8.pb");
wrapper.PrintGraph();
Tensor input_tensor;
Tensor output_tensor;
input_tensor.Resize({1, 480, 480, 3});
float *input_data = input_tensor.mutable_data<float>();
for (int i = 0; i < input_tensor.size(); ++i) {
input_data[i] = i % 256;
}
wrapper.ResetPerfInfo();
timeval tv1, tv2;
gettimeofday(&tv1, NULL);
int round = 10;
for (int i = 0; i < round; ++i) {
VLOG(0) << wrapper.ExecuteGraphPreQuantize(input_tensor, &output_tensor);
}
gettimeofday(&tv2, NULL);
VLOG(0) << "avg duration: "
<< ((tv2.tv_sec - tv1.tv_sec) * 1000 +
(tv2.tv_usec - tv1.tv_usec) / 1000) /
round;
wrapper.GetPerfInfo();
wrapper.PrintLog();
const float *output_data = output_tensor.data<float>();
for (int i = 0; i < output_tensor.size(); ++i) {
std::cout << output_data[i] << " ";
}
std::cout << std::endl;
VLOG(0) << wrapper.TeardownGraph();
wrapper.Finalize();
}
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_HEXAGON_NN_OPS_H_
#define MACE_HEXAGON_NN_OPS_H_
#include "mace/utils/logging.h"
#include <unordered_map>
namespace mace {
#define OP_INVALID -1
// The following macros are deprecated unless we found cache op meta in stub
// is necessary for performance or other causes.
typedef enum op_type_enum {
#define DEF_OP(NAME, ...) OP_##NAME,
#include "mace/dsp/ops.h"
NN_OPS_MAX
#undef DEF_OP
} op_type;
#define DEF_OP(NAME,...) #NAME,
static const char *hexagon_nn_op_names[NN_OPS_MAX] = {
#include "mace/dsp/ops.h"
};
#undef DEF_OP
class OpMap {
public:
void Init() {
#define DEF_OP(NAME) \
op_map_[#NAME] = OP_##NAME;
#include "mace/dsp/ops.h"
#undef DEF_OP
}
int GetOpId(std::string op_type) {
if (op_map_.find(op_type) != end(op_map_)) {
return op_map_[op_type];
} else {
LOG(ERROR) << "DSP unsupoorted op type: " << op_type;
return OP_INVALID;
}
}
private:
std::unordered_map<std::string, int> op_map_;
};
} // namespace mace
#endif // MACE_HEXAGON_NN_OPS_H_
/*
* You probably want to
*
* ## ##### #####
* # # # # # #
* # # # # # #
* ###### # # # #
* # # # # # #
* # # ##### #####
*
*
* # # #### ##### ###### ####
* ## # # # # # # #
* # # # # # # # ##### ####
* # # # # # # # # #
* # ## # # # # # # #
* # # #### ##### ###### ####
*
*
* ## #####
* # # #
* # # #
* ###### #
* # # #
* # # #
*
*
* ##### # # ######
* # # # #
* # ###### #####
* # # # #
* # # # #
* # # # ######
*
*
* ###### # # #####
* # ## # # #
* ##### # # # # #
* # # # # # #
* # # ## # #
* ###### # # #####
*
* otherwise the interface becomes incompatible.
*/
DEF_OP(INPUT)
DEF_OP(OUTPUT)
DEF_OP(Nop)
DEF_OP(Const)
DEF_OP(Check)
DEF_OP(Close_f)
DEF_OP(Close_quint8)
DEF_OP(Close_q_quint8)
DEF_OP(Close_int32)
DEF_OP(Close_qint32)
DEF_OP(PPrint_8)
DEF_OP(PPrint_32)
DEF_OP(PPrint_f)
DEF_OP(PreFree)
DEF_OP(Flatten)
#ifndef DEF_OP_WREF
#define DEF_OP_WREF(NAME) DEF_OP(NAME) DEF_OP(NAME##_ref)
#define __SELF_DEF_OP_WREF
#endif
DEF_OP_WREF(QuantizedConv2d_8x8to32)
DEF_OP_WREF(QuantizedMatMul_8x8to32)
DEF_OP_WREF(QuantizeDownAndShrinkRange_32to8)
DEF_OP_WREF(QuantizedRelu_8)
DEF_OP_WREF(QuantizedReluX_8)
DEF_OP_WREF(QuantizedMaxPool_8)
DEF_OP_WREF(QuantizedAvgPool_8)
DEF_OP_WREF(QuantizedConcat_8)
DEF_OP_WREF(QuantizedBiasAdd_8p8to32)
DEF_OP_WREF(Min_f)
DEF_OP_WREF(Max_f)
DEF_OP_WREF(Quantize)
DEF_OP_WREF(Dequantize)
DEF_OP_WREF(Supernode_8x8p8to8)
DEF_OP(QuantizedFlatten)
DEF_OP(Softmax_f)
DEF_OP(Conv2d_f)
DEF_OP(MatMul_f)
DEF_OP(Relu_f)
DEF_OP(ReluX_f)
DEF_OP(AvgPool_f)
DEF_OP(MaxPool_f)
DEF_OP(Concat_f)
DEF_OP(BiasAdd_f)
DEF_OP(LRN_f)
DEF_OP(Variable)
DEF_OP(Assign)
DEF_OP(Reshape)
DEF_OP(QuantizedReshape)
DEF_OP(Tanh_f)
DEF_OP(Sigmoid_f)
DEF_OP(Slice_8)
DEF_OP(Slice_f)
DEF_OP(QuantizedSlice_8)
DEF_OP(Add_f)
DEF_OP(Mul_f)
DEF_OP(Minimum_f)
DEF_OP(Maximum_f)
DEF_OP_WREF(Requantize_32to8)
DEF_OP_WREF(RequantizationRange_32)
DEF_OP(Neg_f)
DEF_OP(Sub_f)
DEF_OP(AddN_f)
DEF_OP(Range_int32)
DEF_OP(Rank_int32)
DEF_OP(Transpose_int32)
DEF_OP(Transpose_f)
DEF_OP(InstanceNorm_f)
DEF_OP_WREF(QuantizedInstanceNorm_8)
DEF_OP(Sub_int32)
DEF_OP(Add_int32)
DEF_OP(Split_f)
DEF_OP(Dequantize_qint32_f)
DEF_OP(PRelu_f)
DEF_OP_WREF(QuantizedPRelu_8)
DEF_OP(Sum_f)
DEF_OP(Prod_f)
DEF_OP(Mul_int32)
DEF_OP(LogicalAnd_int32)
DEF_OP(LogicalOr_int32)
DEF_OP(LogicalXor_int32)
DEF_OP(Shape_int32)
DEF_OP(Pack_int32)
DEF_OP(MirrorPad_f)
DEF_OP(ResizeNearestNeighbor_f)
DEF_OP(StridedSlice_int32)
DEF_OP(StridedSlice_f)
DEF_OP(ExpandDims_int32)
DEF_OP(ExpandDims_f)
DEF_OP(LogSoftmax_f)
DEF_OP(Split_int32)
DEF_OP(QuantizedSplit_8)
DEF_OP(Deconv_f)
DEF_OP_WREF(QuantizedDeconv_8x8to32)
DEF_OP_WREF(QuantizedMul_8x8to32)
DEF_OP_WREF(QuantizedAdd_8p8to32)
DEF_OP_WREF(QuantizedSigmoid_8)
DEF_OP_WREF(QuantizedTanh_8)
DEF_OP_WREF(QuantizedSoftmax_8)
DEF_OP_WREF(QuantizedLRN_8)
DEF_OP_WREF(QuantizedSub_8p8to32)
DEF_OP_WREF(QuantizedMaximum_8)
DEF_OP_WREF(QuantizedMinimum_8)
DEF_OP(Pad_f)
DEF_OP(SpaceToBatchND_f)
DEF_OP(BatchToSpaceND_f)
DEF_OP(QuantizedSpaceToBatchND_8)
DEF_OP(QuantizedBatchToSpaceND_8)
DEF_OP(QuantizedPad_8)
DEF_OP(ResizeBilinear_f)
DEF_OP(QuantizedResizeBilinear_8)
DEF_OP(ConcatV2_f)
DEF_OP(ConcatV2_int32)
DEF_OP(Prod_int32)
DEF_OP(Slice_int32)
DEF_OP(QuantizedAdd_8p8to8)
DEF_OP_WREF(AutoQuantize)
DEF_OP_WREF(QuantizedDepthwiseConv2d_8x8to32)
DEF_OP(DepthwiseConv2d_f)
#ifdef __SELF_DEF_OP_WREF
#undef __SELF_DEF_OP_WREF
#undef DEF_OP_WREF
#endif
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/dsp/hexagon_control_wrapper.h"
#include "gtest/gtest.h"
#include <math.h>
using namespace mace;
static NetDef BuildNetDef() {
NetDef net;
net.set_name("quantized_add_test");
// input op
OperatorDef *input_op = net.add_op();
input_op->set_name("input_node");
input_op->set_type("INPUT");
input_op->set_node_id(0);
input_op->set_padding(0);
input_op->add_out_max_byte_size(1000);
// add op
OperatorDef *add_op = net.add_op();
add_op->set_name("add");
add_op->set_type("QuantizedAdd_8p8to8");
add_op->set_node_id(1);
add_op->set_padding(0);
add_op->add_input("input_node");
add_op->add_input("add_num");
add_op->add_input("input_min");
add_op->add_input("input_max");
add_op->add_input("add_num_min");
add_op->add_input("add_num_max");
add_op->add_output("add:0");
add_op->add_output("add:1");
add_op->add_output("add:2");
NodeInput *input_node_input = add_op->add_node_input();
input_node_input->set_node_id(0);
input_node_input->set_output_port(0);
input_node_input = add_op->add_node_input();
input_node_input->set_node_id(10);
input_node_input->set_output_port(0);
input_node_input = add_op->add_node_input();
input_node_input->set_node_id(11);
input_node_input->set_output_port(0);
input_node_input = add_op->add_node_input();
input_node_input->set_node_id(12);
input_node_input->set_output_port(0);
input_node_input = add_op->add_node_input();
input_node_input->set_node_id(13);
input_node_input->set_output_port(0);
input_node_input = add_op->add_node_input();
input_node_input->set_node_id(14);
input_node_input->set_output_port(0);
input_node_input = add_op->add_node_input();
input_node_input->set_node_id(15);
input_node_input->set_output_port(0);
input_node_input = add_op->add_node_input();
input_node_input->set_node_id(16);
input_node_input->set_output_port(0);
add_op->add_out_max_byte_size(1000);
add_op->add_out_max_byte_size(1000);
add_op->add_out_max_byte_size(1000);
// output op
OperatorDef *output_op = net.add_op();
output_op->set_name("__output__");
output_op->set_type("OUTPUT");
output_op->set_op_id(2);
input_node_input = output_op->add_node_input();
input_node_input->set_node_id(1);
input_node_input->set_output_port(0);
// tensor
TensorProto *add_num_tensor = net.add_tensors();
add_num_tensor->set_name("add_num");
add_num_tensor->add_dims(3);
add_num_tensor->set_data_type(DataType::DT_UINT8);
add_num_tensor->set_node_id(10);
add_num_tensor->add_int32_data(0);
add_num_tensor->add_int32_data(127);
add_num_tensor->add_int32_data(255);
TensorProto *input_min_tensor = net.add_tensors();
input_min_tensor->set_name("input_min");
input_min_tensor->add_dims(1);
input_min_tensor->set_data_type(DataType::DT_FLOAT);
input_min_tensor->set_node_id(11);
input_min_tensor->add_float_data(-100);
TensorProto *input_max_tensor = net.add_tensors();
input_max_tensor->set_name("input_max");
input_max_tensor->add_dims(1);
input_max_tensor->set_data_type(DataType::DT_FLOAT);
input_max_tensor->set_node_id(12);
input_max_tensor->add_float_data(50.0);
TensorProto *add_num_min_tensor = net.add_tensors();
add_num_min_tensor->set_name("add_num_min");
add_num_min_tensor->add_dims(1);
add_num_min_tensor->set_data_type(DataType::DT_FLOAT);
add_num_min_tensor->set_node_id(13);
add_num_min_tensor->add_float_data(0);
TensorProto *add_num_max_tensor = net.add_tensors();
add_num_max_tensor->set_name("add_num_max");
add_num_max_tensor->add_dims(1);
add_num_max_tensor->set_data_type(DataType::DT_FLOAT);
add_num_max_tensor->set_node_id(14);
add_num_max_tensor->add_float_data(100.0);
TensorProto *output_min_tensor = net.add_tensors();
output_min_tensor->set_name("output_min");
output_min_tensor->add_dims(1);
output_min_tensor->set_data_type(DataType::DT_FLOAT);
output_min_tensor->set_node_id(15);
output_min_tensor->add_float_data(-INFINITY);
TensorProto *output_max_tensor = net.add_tensors();
output_max_tensor->set_name("output_max");
output_max_tensor->add_dims(1);
output_max_tensor->set_data_type(DataType::DT_FLOAT);
output_max_tensor->set_node_id(16);
output_max_tensor->add_float_data(INFINITY);
// input & output info
InputInfo *input_info = net.add_input_info();
input_info->set_name("input_node");
input_info->set_node_id(0);
input_info->add_dims(1);
input_info->add_dims(1);
input_info->add_dims(1);
input_info->add_dims(3);
input_info->set_data_type(DataType::DT_UINT8);
input_info->set_max_byte_size(1000);
OutputInfo *output_info = net.add_output_info();
output_info->set_name("output_node");
output_info->set_node_id(1);
output_info->add_dims(1);
output_info->add_dims(1);
output_info->add_dims(1);
output_info->add_dims(3);
output_info->set_data_type(DataType::DT_UINT8);
output_info->set_max_byte_size(1000);
return net;
}
TEST(QuantizedAddTest, QuantizedAdd) {
testing::internal::LogToStderr();
HexagonControlWrapper wrapper;
wrapper.Init();
wrapper.SetDebugLevel(10);
wrapper.Config();
NetDef net = BuildNetDef();
wrapper.SetupGraph(net);
Allocator *cpu_allocator = GetDeviceAllocator(DeviceType::CPU);
Tensor input_tensor(cpu_allocator, DT_UINT8);
Tensor output_tensor(cpu_allocator, DT_UINT8);
input_tensor.Resize({1, 1, 1, 3});
output_tensor.Resize({1, 1, 1, 3});
uint8_t *input_data = input_tensor.mutable_data<uint8_t>();
const uint8_t *output_data = output_tensor.data<uint8_t>();
// [-100.0 0 50] + [0.0, 50.0, 100.0] = [-100.0, 50.0, 150.0]
// s=0.5859, q0=170, [0, 170, 255]
// s=0.3906, q0=0, [0, 127, 255]
input_data[0] = 0;
input_data[1] = 170;
input_data[2] = 250;
VLOG(0) << wrapper.ExecuteGraph(input_tensor, &output_tensor);
wrapper.PrintLog();
// -120.0~176.47, [17, 146, 229]
vector<uint8_t> expected {17, 146, 229};
for (int i = 0; i < output_tensor.size(); ++i) {
EXPECT_EQ(expected[i], output_data[i]);
}
VLOG(0) << wrapper.TeardownGraph();
wrapper.Finalize();
}
\ No newline at end of file
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/dsp/hexagon_control_wrapper.h"
#include "mace/dsp/util/quantize.h"
#include "mace/kernels/conv_pool_2d_util.h"
#include "mace/kernels/pooling.h"
#include "gtest/gtest.h"
using namespace mace;
static NetDef BuildNetDef(const vector<index_t> &input_shape,
const vector<index_t> &output_shape,
const vector<index_t> &filter_shape,
const vector<int> &stride,
Padding padding,
float input_min, float input_max) {
NetDef net;
net.set_name("quantized_maxpool_test");
// input op
OperatorDef *input_op = net.add_op();
input_op->set_name("input_node");
input_op->set_type("INPUT");
input_op->set_node_id(0);
input_op->set_padding(0);
input_op->add_out_max_byte_size(1000);
// maxpool op
OperatorDef *maxpool_op = net.add_op();
maxpool_op->set_name("maxpool");
maxpool_op->set_type("QuantizedMaxPool_8");
maxpool_op->set_node_id(1);
if (padding == Padding::SAME) {
maxpool_op->set_padding(1);
} else {
maxpool_op->set_padding(2);
}
maxpool_op->add_input("input_node");
maxpool_op->add_input("input_min");
maxpool_op->add_input("input_max");
maxpool_op->add_input("ksize");
maxpool_op->add_input("stride");
maxpool_op->add_output("maxpool:0");
maxpool_op->add_output("maxpool:1");
maxpool_op->add_output("maxpool:2");
NodeInput *input_node_input = maxpool_op->add_node_input();
input_node_input->set_node_id(0);
input_node_input->set_output_port(0);
input_node_input = maxpool_op->add_node_input();
input_node_input->set_node_id(10);
input_node_input->set_output_port(0);
input_node_input = maxpool_op->add_node_input();
input_node_input->set_node_id(11);
input_node_input = maxpool_op->add_node_input();
input_node_input->set_node_id(12);
input_node_input->set_output_port(0);
input_node_input = maxpool_op->add_node_input();
input_node_input->set_node_id(13);
input_node_input->set_output_port(0);
maxpool_op->add_out_max_byte_size(1000);
maxpool_op->add_out_max_byte_size(1000);
maxpool_op->add_out_max_byte_size(1000);
// output op
OperatorDef *output_op = net.add_op();
output_op->set_name("__output__");
output_op->set_type("OUTPUT");
output_op->set_op_id(2);
input_node_input = output_op->add_node_input();
input_node_input->set_node_id(1);
input_node_input->set_output_port(0);
// tensor
TensorProto *input_min_tensor = net.add_tensors();
input_min_tensor->set_name("input_min");
input_min_tensor->add_dims(1);
input_min_tensor->set_data_type(DataType::DT_FLOAT);
input_min_tensor->set_node_id(10);
input_min_tensor->add_float_data(input_min);
TensorProto *input_max_tensor = net.add_tensors();
input_max_tensor->set_name("input_max");
input_max_tensor->add_dims(1);
input_max_tensor->set_data_type(DataType::DT_FLOAT);
input_max_tensor->set_node_id(11);
input_max_tensor->add_float_data(input_max);
TensorProto *ksize_tensor = net.add_tensors();
ksize_tensor->set_name("ksize");
ksize_tensor->add_dims(filter_shape[0]);
ksize_tensor->add_dims(filter_shape[1]);
ksize_tensor->add_dims(filter_shape[2]);
ksize_tensor->add_dims(filter_shape[3]);
ksize_tensor->set_data_type(DataType::DT_INT32);
ksize_tensor->set_node_id(12);
TensorProto *stride_tensor = net.add_tensors();
stride_tensor->set_name("stride");
stride_tensor->add_dims(stride[0]);
stride_tensor->add_dims(stride[1]);
stride_tensor->add_dims(stride[2]);
stride_tensor->add_dims(stride[3]);
stride_tensor->set_data_type(DataType::DT_INT32);
stride_tensor->set_node_id(13);
// input & output info
InputInfo *input_info = net.add_input_info();
input_info->set_name("input_node");
input_info->set_node_id(0);
input_info->add_dims(input_shape[0]);
input_info->add_dims(input_shape[1]);
input_info->add_dims(input_shape[2]);
input_info->add_dims(input_shape[3]);
input_info->set_data_type(DataType::DT_UINT8);
input_info->set_max_byte_size(1000);
OutputInfo *output_info = net.add_output_info();
output_info->set_name("output_node");
output_info->set_node_id(1);
output_info->add_dims(output_shape[0]);
output_info->add_dims(output_shape[1]);
output_info->add_dims(output_shape[2]);
output_info->add_dims(output_shape[3]);
output_info->set_data_type(DataType::DT_UINT8);
output_info->set_max_byte_size(1000);
return net;
}
static void TestQuantizedMaxPool(Padding padding, int kernel_size, int stride_size) {
testing::internal::LogToStderr();
HexagonControlWrapper wrapper;
wrapper.Init();
wrapper.SetDebugLevel(3);
wrapper.Config();
vector<index_t> input_shape {1, 10, 10, 3};
vector<index_t> filter_shape {1, 3, 3, 1};
vector<int> stride {1, stride_size, stride_size, 1};
vector<int> dilation {1, 1, 1, 1};
vector<index_t> output_shape {input_shape[0], 0, 0, input_shape[3]};
vector<int> padding_size(2);
switch (padding) {
case VALID:
output_shape[1] = (input_shape[1] - filter_shape[1]) / stride[1] + 1;
output_shape[2] = (input_shape[2] - filter_shape[2]) / stride[2] + 1;
break;
case SAME:
output_shape[1] = (input_shape[1] - 1) / stride[1] + 1;
output_shape[2] = (input_shape[2] - 1) / stride[2] + 1;
break;
default:
ASSERT_TRUE(0);
}
for (int i = 0; i < 4; ++i) {
VLOG(0) << "! shape = " << output_shape[i];
}
NetDef net = BuildNetDef(input_shape, output_shape, filter_shape, stride,
padding, -50, 100);
VLOG(0) << wrapper.SetupGraph(net);
Allocator *cpu_allocator = GetDeviceAllocator(DeviceType::CPU);
Tensor original_tensor(cpu_allocator, DT_FLOAT);
Tensor input_tensor(cpu_allocator, DT_UINT8);
Tensor output_tensor(cpu_allocator, DT_UINT8);
Tensor dequantized_output_tensor(cpu_allocator, DT_FLOAT);
original_tensor.Resize(input_shape);
input_tensor.Resize(input_shape);
output_tensor.Resize(output_shape);
dequantized_output_tensor.Resize(output_shape);
float *original_data = original_tensor.mutable_data<float>();
uint8_t *input_data = input_tensor.mutable_data<uint8_t>();
const uint8_t *output_data = output_tensor.data<uint8_t>();
float *dequantized_output_data = dequantized_output_tensor.mutable_data<float>();
std::random_device rd;
std::mt19937 gen(rd());
std::normal_distribution<float> nd(-50, 50);
std::generate(original_data, original_data + original_tensor.size(),
[&gen, &nd] {
return nd(gen);
});
Quantizer quantizer;
float min_in, min_out;
quantizer.Quantize(original_tensor, &input_tensor, &min_in, &min_out);
VLOG(0) << wrapper.ExecuteGraph(input_tensor, &output_tensor);
quantizer.DeQuantize(output_tensor, min_in, min_out, &dequantized_output_tensor);
// debug original float input data
for (index_t c = 0; c < input_shape[3]; ++c) {
for (index_t i = 0; i < input_shape[1]; ++i) {
for (index_t j = 0; j < input_shape[2]; ++j) {
std::cout << original_data[i * input_shape[2] * input_shape[3] + j * input_shape[3] + c] << " ";
}
std::cout << std::endl;
}
std::cout << std::endl << std::endl;
}
// debug dequantized float output data
for (index_t c = 0; c < output_shape[3]; ++c) {
for (index_t i = 0; i < output_shape[1]; ++i) {
for (index_t j = 0; j < output_shape[2]; ++j) {
std::cout << dequantized_output_data[i * output_shape[2] * output_shape[3] + j * output_shape[3] + c] << " ";
}
std::cout << std::endl;
}
std::cout << std::endl << std::endl;
}
wrapper.PrintLog();
VLOG(0) << wrapper.TeardownGraph();
wrapper.Finalize();
}
TEST(QuantizedMaxPoolTest, QuantizedMaxPoolValidStride1) {
TestQuantizedMaxPool(Padding::VALID, 3, 1);
}
TEST(QuantizedMaxPoolTest, QuantizedMaxPoolValidStride2) {
TestQuantizedMaxPool(Padding::VALID, 3, 2);
}
TEST(QuantizedMaxPoolTest, QuantizedMaxPoolSameStride1) {
TestQuantizedMaxPool(Padding::SAME, 3, 1);
}
TEST(QuantizedMaxPoolTest, QuantizedMaxPoolSameStride2) {
TestQuantizedMaxPool(Padding::SAME, 3, 2);
}
\ No newline at end of file
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/dsp/hexagon_control_wrapper.h"
#include "gtest/gtest.h"
using namespace mace;
static NetDef BuildNetDef() {
NetDef net;
net.set_name("quantized_relu_test");
// input op
OperatorDef *input_op = net.add_op();
input_op->set_name("input_node");
input_op->set_type("INPUT");
input_op->set_node_id(0);
input_op->set_padding(0);
input_op->add_out_max_byte_size(1000);
// relu op
OperatorDef *relu_op = net.add_op();
relu_op->set_name("relu");
relu_op->set_type("QuantizedRelu_8");
relu_op->set_node_id(1);
relu_op->set_padding(0);
relu_op->add_input("input_node");
relu_op->add_input("input_min");
relu_op->add_input("input_max");
relu_op->add_output("relu:0");
relu_op->add_output("relu:1");
relu_op->add_output("relu:2");
NodeInput *input_node_input = relu_op->add_node_input();
input_node_input->set_node_id(0);
input_node_input->set_output_port(0);
input_node_input = relu_op->add_node_input();
input_node_input->set_node_id(10);
input_node_input->set_output_port(0);
input_node_input = relu_op->add_node_input();
input_node_input->set_node_id(11);
input_node_input->set_output_port(0);
relu_op->add_out_max_byte_size(1000);
relu_op->add_out_max_byte_size(1000);
relu_op->add_out_max_byte_size(1000);
// output op
OperatorDef *output_op = net.add_op();
output_op->set_name("__output__");
output_op->set_type("OUTPUT");
output_op->set_op_id(2);
input_node_input = output_op->add_node_input();
input_node_input->set_node_id(1);
input_node_input->set_output_port(0);
// tensor
TensorProto *input_min_tensor = net.add_tensors();
input_min_tensor->set_name("input_min");
input_min_tensor->add_dims(1);
input_min_tensor->set_data_type(DataType::DT_FLOAT);
input_min_tensor->set_node_id(10);
input_min_tensor->add_float_data(-100.0);
TensorProto *input_max_tensor = net.add_tensors();
input_max_tensor->set_name("input_max");
input_max_tensor->add_dims(1);
input_max_tensor->set_data_type(DataType::DT_FLOAT);
input_max_tensor->set_node_id(11);
input_max_tensor->add_float_data(100.0);
// input & output info
InputInfo *input_info = net.add_input_info();
input_info->set_name("input_node");
input_info->set_node_id(0);
input_info->add_dims(1);
input_info->add_dims(1);
input_info->add_dims(1);
input_info->add_dims(5);
input_info->set_data_type(DataType::DT_UINT8);
input_info->set_max_byte_size(1000);
OutputInfo *output_info = net.add_output_info();
output_info->set_name("output_node");
output_info->set_node_id(1);
output_info->add_dims(1);
output_info->add_dims(1);
output_info->add_dims(1);
output_info->add_dims(5);
output_info->set_data_type(DataType::DT_UINT8);
output_info->set_max_byte_size(1000);
return net;
}
TEST(QuantizedReluTest, QuantizedRelu) {
testing::internal::LogToStderr();
HexagonControlWrapper wrapper;
wrapper.Init();
wrapper.SetDebugLevel(3);
wrapper.Config();
NetDef net = BuildNetDef();
wrapper.SetupGraph(net);
Allocator *cpu_allocator = GetDeviceAllocator(DeviceType::CPU);
Tensor input_tensor(cpu_allocator, DT_UINT8);
Tensor output_tensor(cpu_allocator, DT_UINT8);
input_tensor.Resize({1, 1, 1, 5});
output_tensor.Resize({1, 1, 1, 5});
uint8_t *input_data = input_tensor.mutable_data<uint8_t>();
const uint8_t *output_data = output_tensor.data<uint8_t>();
// -100.0 -50.0 0 50.0 100.0 -> s=0.782, q0=int(-fmin/s)=128,
// q=q0+f/s -> 0, 64, 128, 192, 256
input_data[0] = 0;
input_data[1] = 64;
input_data[2] = 128;
input_data[3] = 192;
input_data[4] = 255;
// 0, 0, 0, 50, 100 -> s=0.782, q0=128
// q -> 128, 128, 128, 192, 255
VLOG(0) << wrapper.ExecuteGraph(input_tensor, &output_tensor);
wrapper.PrintLog();
vector<uint8_t> expected {128, 128, 128, 192, 255};
for (int i = 0; i < output_tensor.size(); ++i) {
EXPECT_EQ(expected[i], output_data[i]);
}
VLOG(0) << wrapper.TeardownGraph();
wrapper.Finalize();
}
\ No newline at end of file
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/dsp/hexagon_control_wrapper.h"
#include "gtest/gtest.h"
#define RESIZE_BILINEAR_TEST_CHANNELS 128
using namespace mace;
static NetDef BuildNetDef() {
std::cout << "Building net def" << std::endl;
NetDef net;
net.set_name("quantized_resize_bilinear_test");
// input op
OperatorDef *input_op = net.add_op();
input_op->set_name("input_node");
input_op->set_type("INPUT");
input_op->set_node_id(0);
input_op->set_padding(0);
input_op->add_out_max_byte_size(1200);
// relu op
OperatorDef *resize_bilinear_op = net.add_op();
resize_bilinear_op->set_name("relu");
resize_bilinear_op->set_type("QuantizedResizeBilinear_8");
resize_bilinear_op->set_node_id(1);
resize_bilinear_op->set_padding(0);
resize_bilinear_op->add_input("input_node");
resize_bilinear_op->add_input("new_dim");
resize_bilinear_op->add_input("input_min");
resize_bilinear_op->add_input("input_max");
resize_bilinear_op->add_output("resize_bilinear:0");
resize_bilinear_op->add_output("resize_bilinear:1");
resize_bilinear_op->add_output("resize_bilinear:2");
NodeInput *input_node_input = resize_bilinear_op->add_node_input();
input_node_input->set_node_id(0);
input_node_input->set_output_port(0);
input_node_input = resize_bilinear_op->add_node_input();
input_node_input->set_node_id(10);
input_node_input->set_output_port(0);
input_node_input = resize_bilinear_op->add_node_input();
input_node_input->set_node_id(11);
input_node_input->set_output_port(0);
input_node_input = resize_bilinear_op->add_node_input();
input_node_input->set_node_id(12);
input_node_input->set_output_port(0);
resize_bilinear_op->add_out_max_byte_size(1200);
resize_bilinear_op->add_out_max_byte_size(1000);
resize_bilinear_op->add_out_max_byte_size(1000);
// output op
OperatorDef *output_op = net.add_op();
output_op->set_name("__output__");
output_op->set_type("OUTPUT");
output_op->set_op_id(2);
input_node_input = output_op->add_node_input();
input_node_input->set_node_id(1);
input_node_input->set_output_port(0);
// tensor
TensorProto *new_dim_tensor = net.add_tensors();
new_dim_tensor->set_name("new_dim");
new_dim_tensor->add_dims(2);
new_dim_tensor->set_data_type(DataType::DT_INT32);
new_dim_tensor->set_node_id(10);
new_dim_tensor->add_int32_data(2);
new_dim_tensor->add_int32_data(2);
TensorProto *input_min_tensor = net.add_tensors();
input_min_tensor->set_name("input_min");
input_min_tensor->add_dims(1);
input_min_tensor->set_data_type(DataType::DT_FLOAT);
input_min_tensor->set_node_id(11);
input_min_tensor->add_float_data(-100.0);
TensorProto *input_max_tensor = net.add_tensors();
input_max_tensor->set_name("input_max");
input_max_tensor->add_dims(1);
input_max_tensor->set_data_type(DataType::DT_FLOAT);
input_max_tensor->set_node_id(12);
input_max_tensor->add_float_data(100.0);
// input & output info
InputInfo *input_info = net.add_input_info();
input_info->set_name("input_node");
input_info->set_node_id(0);
input_info->add_dims(1);
input_info->add_dims(3);
input_info->add_dims(3);
input_info->add_dims(RESIZE_BILINEAR_TEST_CHANNELS);
input_info->set_data_type(DataType::DT_UINT8);
input_info->set_max_byte_size(1200);
OutputInfo *output_info = net.add_output_info();
output_info->set_name("output_node");
output_info->set_node_id(1);
output_info->add_dims(1);
output_info->add_dims(2);
output_info->add_dims(2);
output_info->add_dims(RESIZE_BILINEAR_TEST_CHANNELS);
output_info->set_data_type(DataType::DT_UINT8);
output_info->set_max_byte_size(1200);
return net;
}
TEST(QuantizedResizeBilinearTest, QuantizedResizeBilinear) {
testing::internal::LogToStderr();
HexagonControlWrapper wrapper;
wrapper.Init();
wrapper.SetDebugLevel(3);
wrapper.Config();
NetDef net = BuildNetDef();
wrapper.SetupGraph(net);
Allocator *cpu_allocator = GetDeviceAllocator(DeviceType::CPU);
Tensor input_tensor(cpu_allocator, DT_UINT8);
Tensor output_tensor(cpu_allocator, DT_UINT8);
input_tensor.Resize({1, 3, 3, RESIZE_BILINEAR_TEST_CHANNELS});
output_tensor.Resize({1, 2, 2, RESIZE_BILINEAR_TEST_CHANNELS});
uint8_t *input_data = input_tensor.mutable_data<uint8_t>();
const uint8_t *output_data = output_tensor.data<uint8_t>();
for (int wh = 0; wh < 9; ++wh) {
for (int c = 0; c < RESIZE_BILINEAR_TEST_CHANNELS; ++c) {
input_data[wh * RESIZE_BILINEAR_TEST_CHANNELS + c] = 9 - wh;
}
}
VLOG(0) << wrapper.ExecuteGraph(input_tensor, &output_tensor);
wrapper.PrintLog();
vector<uint8_t> expected {9, 8, 5, 3};
for (int i = 0; i < 4; ++i) {
for (int c = 0; c < RESIZE_BILINEAR_TEST_CHANNELS; ++c)
EXPECT_EQ(expected[i],
output_data[i * RESIZE_BILINEAR_TEST_CHANNELS + c]);
}
std::cout << std::endl;
VLOG(0) << wrapper.TeardownGraph();
wrapper.Finalize();
}
\ No newline at end of file
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/dsp/hexagon_control_wrapper.h"
#include "gtest/gtest.h"
#include <math.h>
using namespace mace;
static NetDef BuildNetDef() {
NetDef net;
net.set_name("supernode_test");
// input op
OperatorDef *input_op = net.add_op();
input_op->set_name("input_node");
input_op->set_type("INPUT");
input_op->set_node_id(0);
input_op->set_padding(0);
input_op->add_out_max_byte_size(1000);
// add op
OperatorDef *supernode_op = net.add_op();
supernode_op->set_name("supernode");
supernode_op->set_type("Supernode_8x8p8to8");
supernode_op->set_node_id(1);
supernode_op->set_padding(0);
supernode_op->add_input("input_node");
supernode_op->add_input("filter_tensor");
supernode_op->add_input("input_min");
supernode_op->add_input("input_max");
supernode_op->add_input("filter_min");
supernode_op->add_input("filter_max");
supernode_op->add_input("stride_tensor");
supernode_op->add_input("bias_tensor");
supernode_op->add_input("bias_min");
supernode_op->add_input("bias_max");
supernode_op->add_input("min_val");
supernode_op->add_input("max_val");
supernode_op->add_output("supernode:0");
supernode_op->add_output("supernode:1");
supernode_op->add_output("supernode:2");
NodeInput *input_node_input = supernode_op->add_node_input();
input_node_input->set_node_id(0);
input_node_input->set_output_port(0);
input_node_input = supernode_op->add_node_input();
input_node_input->set_node_id(10);
input_node_input->set_output_port(0);
input_node_input = supernode_op->add_node_input();
input_node_input->set_node_id(11);
input_node_input->set_output_port(0);
input_node_input = supernode_op->add_node_input();
input_node_input->set_node_id(12);
input_node_input->set_output_port(0);
input_node_input = supernode_op->add_node_input();
input_node_input->set_node_id(13);
input_node_input->set_output_port(0);
input_node_input = supernode_op->add_node_input();
input_node_input->set_node_id(14);
input_node_input->set_output_port(0);
input_node_input = supernode_op->add_node_input();
input_node_input->set_node_id(15);
input_node_input->set_output_port(0);
input_node_input = supernode_op->add_node_input();
input_node_input->set_node_id(16);
input_node_input->set_output_port(0);
input_node_input = supernode_op->add_node_input();
input_node_input->set_node_id(17);
input_node_input->set_output_port(0);
input_node_input = supernode_op->add_node_input();
input_node_input->set_node_id(18);
input_node_input->set_output_port(0);
input_node_input = supernode_op->add_node_input();
input_node_input->set_node_id(19);
input_node_input->set_output_port(0);
input_node_input = supernode_op->add_node_input();
input_node_input->set_node_id(20);
input_node_input->set_output_port(0);
supernode_op->add_out_max_byte_size(1000);
supernode_op->add_out_max_byte_size(1000);
supernode_op->add_out_max_byte_size(1000);
// output op
OperatorDef *output_op = net.add_op();
output_op->set_name("__output__");
output_op->set_type("OUTPUT");
output_op->set_op_id(2);
input_node_input = output_op->add_node_input();
input_node_input->set_node_id(1);
input_node_input->set_output_port(0);
// tensor
TensorProto *filter_tensor = net.add_tensors();
filter_tensor->set_name("filter_tensor");
filter_tensor->add_dims(2);
filter_tensor->add_dims(2);
filter_tensor->add_dims(1);
filter_tensor->add_dims(1);
filter_tensor->set_data_type(DataType::DT_UINT8);
filter_tensor->set_node_id(10);
filter_tensor->add_int32_data(0);
filter_tensor->add_int32_data(127);
filter_tensor->add_int32_data(127);
filter_tensor->add_int32_data(255);
TensorProto *input_min_tensor = net.add_tensors();
input_min_tensor->set_name("input_min");
input_min_tensor->add_dims(1);
input_min_tensor->set_data_type(DataType::DT_FLOAT);
input_min_tensor->set_node_id(11);
input_min_tensor->add_float_data(-10.0);
TensorProto *input_max_tensor = net.add_tensors();
input_max_tensor->set_name("input_max");
input_max_tensor->add_dims(1);
input_max_tensor->set_data_type(DataType::DT_FLOAT);
input_max_tensor->set_node_id(12);
input_max_tensor->add_float_data(10.0787402);
TensorProto *filter_min_tensor = net.add_tensors();
filter_min_tensor->set_name("filter_min");
filter_min_tensor->add_dims(1);
filter_min_tensor->set_data_type(DataType::DT_FLOAT);
filter_min_tensor->set_node_id(13);
filter_min_tensor->add_float_data(-10.0);
TensorProto *filter_max_tensor = net.add_tensors();
filter_max_tensor->set_name("filter_max");
filter_max_tensor->add_dims(1);
filter_max_tensor->set_data_type(DataType::DT_FLOAT);
filter_max_tensor->set_node_id(14);
filter_max_tensor->add_float_data(10.0787402);
TensorProto *stride_tensor = net.add_tensors();
stride_tensor->set_name("stride");
stride_tensor->add_dims(1);
stride_tensor->add_dims(2);
stride_tensor->add_dims(2);
stride_tensor->add_dims(1);
stride_tensor->set_data_type(DataType::DT_INT32);
stride_tensor->set_node_id(15);
TensorProto *bias_tensor = net.add_tensors();
bias_tensor->set_name("bias");
bias_tensor->add_dims(1);
bias_tensor->set_data_type(DataType::DT_UINT8);
bias_tensor->set_node_id(16);
bias_tensor->add_int32_data(127);
TensorProto *bias_min_tensor = net.add_tensors();
bias_min_tensor->set_name("bias_min");
bias_min_tensor->add_dims(1);
bias_min_tensor->set_data_type(DataType::DT_FLOAT);
bias_min_tensor->set_node_id(17);
bias_min_tensor->add_float_data(-10.0);
TensorProto *bias_max_tensor = net.add_tensors();
bias_max_tensor->set_name("bias_max");
bias_max_tensor->add_dims(1);
bias_max_tensor->set_data_type(DataType::DT_FLOAT);
bias_max_tensor->set_node_id(18);
bias_max_tensor->add_float_data(10.0787402);
TensorProto *min_val_tensor = net.add_tensors();
min_val_tensor->set_name("min_val");
min_val_tensor->add_dims(1);
min_val_tensor->set_data_type(DataType::DT_FLOAT);
min_val_tensor->set_node_id(19);
min_val_tensor->add_float_data(-INFINITY);
TensorProto *max_val_tensor = net.add_tensors();
max_val_tensor->set_name("max_val");
max_val_tensor->add_dims(1);
max_val_tensor->set_data_type(DataType::DT_FLOAT);
max_val_tensor->set_node_id(20);
max_val_tensor->add_float_data(INFINITY);
// input & output info
InputInfo *input_info = net.add_input_info();
input_info->set_name("input_node");
input_info->set_node_id(0);
input_info->add_dims(1);
input_info->add_dims(4);
input_info->add_dims(4);
input_info->add_dims(1);
input_info->set_data_type(DataType::DT_UINT8);
input_info->set_max_byte_size(1000);
OutputInfo *output_info = net.add_output_info();
output_info->set_name("output_node");
output_info->set_node_id(1);
output_info->add_dims(1);
output_info->add_dims(2);
output_info->add_dims(2);
output_info->add_dims(1);
output_info->set_data_type(DataType::DT_UINT8);
output_info->set_max_byte_size(1000);
return net;
}
TEST(SupernodeTest, Supernode) {
testing::internal::LogToStderr();
HexagonControlWrapper wrapper;
wrapper.Init();
wrapper.SetDebugLevel(10);
wrapper.Config();
NetDef net = BuildNetDef();
wrapper.SetupGraph(net);
Allocator *cpu_allocator = GetDeviceAllocator(DeviceType::CPU);
Tensor input_tensor(cpu_allocator, DT_UINT8);
Tensor output_tensor(cpu_allocator, DT_UINT8);
input_tensor.Resize({1, 4, 4, 1});
output_tensor.Resize({1, 2, 2, 1});
uint8_t *input_data = input_tensor.mutable_data<uint8_t>();
const uint8_t *output_data = output_tensor.data<uint8_t>();
// input: [[-10, ..], [-5.03937, ..], [0, ..], [5.03937, ..]]
// filt: [[-10, 0], [0, 10.07874]]
// bias: 0.0
for (int h = 0; h < 4; ++h) {
for (int w = 0; w < 4; ++w)
input_data[h * 4 + w] = (uint8_t)((h == 0) ? 0 : h * 64 - 1);
}
VLOG(0) << wrapper.ExecuteGraph(input_tensor, &output_tensor);
wrapper.PrintLog();
// expect out: [[49.2095, 49.2095], [50.7905, 50.7905]]
// with output range(-0.5, 64.0), quantize to [[196, 196], [203, 203]]
vector<uint8_t> expected {196, 196, 203, 203};
for (int i = 0; i < output_tensor.size(); ++i) {
EXPECT_EQ(expected[i], output_data[i]);
}
std::cout << std::endl;
VLOG(0) << wrapper.TeardownGraph();
wrapper.Finalize();
}
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
/**
* Usage:
* mace_dsp_run --model=mobi_mace.pb \
* --input_shape=1,3,224,224 \
* --input_file=input_data \
* --output_file=mace.out
*/
#include <sys/time.h>
#include <fstream>
#include "mace/dsp/hexagon_control_wrapper.h"
#include "mace/core/net.h"
#include "mace/utils/command_line_flags.h"
using namespace std;
using namespace mace;
void ParseShape(const string &str, vector<index_t> *shape) {
string tmp = str;
while (!tmp.empty()) {
int dim = atoi(tmp.data());
shape->push_back(dim);
size_t next_offset = tmp.find(",");
if (next_offset == string::npos) {
break;
} else {
tmp = tmp.substr(next_offset + 1);
}
}
}
int main(int argc, char **argv) {
string model_file;
string input_shape;
string input_file;
string output_file;
int round = 1;
std::vector<Flag> flag_list = {
Flag("model", &model_file, "model file name"),
Flag("input_shape", &input_shape, "input shape, separated by comma"),
Flag("input_file", &input_file, "input file name"),
Flag("output_file", &output_file, "output file name"),
Flag("round", &round, "round"),
};
string usage = Flags::Usage(argv[0], flag_list);
const bool parse_result = Flags::Parse(&argc, argv, flag_list);
if (!parse_result) {
LOG(ERROR) << usage;
return -1;
}
VLOG(0) << "model: " << model_file << std::endl
<< "input_shape: " << input_shape << std::endl
<< "input_file: " << input_file << std::endl
<< "output_file: " << output_file << std::endl
<< "round: " << round << std::endl;
vector<index_t> shape;
ParseShape(input_shape, &shape);
// load input
Tensor input_tensor;
input_tensor.Resize(shape);
float *input_data = input_tensor.mutable_data<float>();
ifstream in_file(input_file, ios::in | ios::binary);
in_file.read(reinterpret_cast<char *>(input_data),
input_tensor.size() * sizeof(float));
in_file.close();
// execute
HexagonControlWrapper wrapper;
VLOG(0) << "version: " << wrapper.GetVersion();
wrapper.Init();
wrapper.SetDebugLevel(0);
wrapper.Config();
VLOG(0) << wrapper.SetupGraph(model_file);
wrapper.PrintGraph();
Tensor output_tensor;
timeval tv1, tv2;
gettimeofday(&tv1, NULL);
for (int i = 0; i < round; ++i) {
VLOG(0) << wrapper.ExecuteGraph(input_tensor, &output_tensor);
}
gettimeofday(&tv2, NULL);
cout << "avg duration: "
<< ((tv2.tv_sec - tv1.tv_sec) * 1000 +
(tv2.tv_usec - tv1.tv_usec) / 1000) /
round
<< endl;
wrapper.GetPerfInfo();
wrapper.PrintLog();
VLOG(0) << wrapper.TeardownGraph();
wrapper.Finalize();
// save output
ofstream out_file(output_file, ios::binary);
out_file.write((const char *) (output_tensor.data<float>()),
output_tensor.size() * sizeof(float));
out_file.flush();
out_file.close();
}
\ No newline at end of file
# Description:
# Mace dsp util.
#
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"]) # Apache 2.0
load("//mace:mace.bzl", "if_android")
cc_library(
name = "util",
srcs = glob([
"*.cc",
], exclude = [
"*_test.cc",
]),
hdrs = glob([
"*.h",
]),
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"],
deps = [
"//mace/core:core",
],
)
cc_test(
name = "util_test",
testonly = 1,
srcs = glob(["*_test.cc"]),
copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"],
linkopts = if_android([
"-ldl",
"-lm",
]),
linkstatic = 1,
deps = [
"@gtest//:gtest_main",
":util",
],
)
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/dsp/util/quantize.h"
#include "gtest/gtest.h"
using namespace mace;
TEST(QuantizeTest, QuantizeAndDequantize) {
testing::internal::LogToStderr();
Quantizer quantizer;
Allocator *allocator = GetDeviceAllocator(DeviceType::CPU);
Tensor in_tensor(allocator, DataType::DT_FLOAT);
vector<index_t> shape {5};
in_tensor.Resize(shape);
float *in_data = in_tensor.mutable_data<float>();
in_data[0] = -50.0;
in_data[1] = -10.0;
in_data[2] = 20.0;
in_data[3] = 80.0;
in_data[4] = 100.0;
Tensor quantized_tensor(allocator, DataType::DT_UINT8);
quantized_tensor.Resize(shape);
uint8_t *quantized_data = quantized_tensor.mutable_data<uint8_t>();
float min_out, max_out;
quantizer.Quantize(in_tensor, -50.0, 100.0, &quantized_tensor, &min_out, &max_out);
vector<uint8_t> expected_quantize_data {0, 68, 119, 220, 254};
for (int i = 0; i < quantized_tensor.size(); ++i) {
EXPECT_EQ(expected_quantize_data[i], quantized_data[i]);
}
Tensor dequantized_tensor(allocator, DataType::DT_FLOAT);
dequantized_tensor.Resize(shape);
float *dequantized_data = dequantized_tensor.mutable_data<float>();
quantizer.DeQuantize(quantized_tensor, min_out, max_out, &dequantized_tensor);
for (int i = 0; i < dequantized_tensor.size(); ++i) {
EXPECT_NEAR(in_data[i], dequantized_data[i], 1);
}
}
......@@ -52,6 +52,8 @@ DeviceType ParseDeviceType(const string &device_str) {
return DeviceType::NEON;
} else if (device_str.compare("OPENCL") == 0) {
return DeviceType::OPENCL;
} else if (device_str.compare("HEXAGON") == 0) {
return DeviceType::HEXAGON;
} else {
return DeviceType::CPU;
}
......@@ -105,9 +107,6 @@ struct mallinfo LogMallinfoChange(struct mallinfo prev) {
}
int main(int argc, char **argv) {
string model_file;
string input_node;
string output_node;
string input_shape;
string output_shape;
string input_file;
......@@ -117,9 +116,6 @@ int main(int argc, char **argv) {
int malloc_check_cycle = -1;
std::vector<Flag> flag_list = {
Flag("model", &model_file, "model file name"),
Flag("input", &input_node, "input node"),
Flag("output", &output_node, "output node"),
Flag("input_shape", &input_shape, "input shape, separated by comma"),
Flag("output_shape", &output_shape, "output shape, separated by comma"),
Flag("input_file", &input_file, "input file name"),
......@@ -140,9 +136,6 @@ int main(int argc, char **argv) {
VLOG(0) << "mace version: " << MaceVersion() << std::endl
<< "mace git version: " << MaceGitVersion() << std::endl
<< "model: " << model_file << std::endl
<< "input: " << input_node << std::endl
<< "output: " << output_node << std::endl
<< "input_shape: " << input_shape << std::endl
<< "output_shape: " << output_shape << std::endl
<< "input_file: " << input_file << std::endl
......
此差异已折叠。
......@@ -9,7 +9,7 @@
namespace {{tag}}{
alignas(4) unsigned char {{ tensor_info.name }}[] = {
{% if tensor_info.data_type != 'DT_UINT8' %} alignas(4) {% endif %} unsigned char {{ tensor_info.name }}[] = {
{% for d in tensor_info.data %}{{"0x%02X, " % d }}{%endfor%}
};
......@@ -32,12 +32,14 @@ void UpdateOp(mace::OperatorDef &op,
const std::string &type,
const std::vector<std::string> &inputs,
const std::vector<std::string> &outputs,
const std::vector<mace::DataType> &output_types) {
const std::vector<mace::DataType> &output_types,
uint32_t node_id) {
op.set_name(name);
op.set_type(type);
op.set_input(inputs);
op.set_output(outputs);
op.set_output_type(output_types);
op.set_node_id(node_id);
}
}
......@@ -78,13 +80,40 @@ void CreateOperator{{i}}(mace::OperatorDef &op) {
{% endif %}
{% for shape in net.op[i].output_shape %}
{% if shape.dims | length > 0 %}
op.add_output_shape(mace::OutputShape({ {{ shape.dims|join(', ') }} }));
{% endif %}
{% endfor %}
std::vector<int> output_types_int({ {{ net.op[i].output_type | join(', ') }} });
std::vector<mace::DataType> output_types({{ net.op[i].output_type | length }});
for (int k = 0; k < {{ net.op[i].output_type | length }}; ++k) {
output_types[k] = static_cast<mace::DataType>(output_types_int[k]);
}
UpdateOp(op, {{ net.op[i].name|tojson }}, {{ net.op[i].type|tojson}},
{ {{ net.op[i].input|stringfy }} },
{ {{ net.op[i].output|stringfy }} },
{ {{ net.op[i].output_type|join(', ') }} });
output_types,
{{ net.op[i].node_id }});
{% if runtime == 'dsp' %}
op.set_padding({{ net.op[i].padding }});
{% if net.op[i].node_input | length > 0 %}
std::vector<int> input_node_ids({ {{ net.op[i].node_input | map(attribute='node_id') | join(', ') }} });
std::vector<int> input_output_ports({ {{ net.op[i].node_input | map(attribute='output_port') | join(', ')}} });
for (size_t i = 0; i < {{ net.op[i].node_input | length }}; ++i) {
mace::NodeInput input(input_node_ids[i], input_output_ports[i]);
op.add_node_input(input);
}
{% endif %}
{% if net.op[i].out_max_byte_size | length > 0 %}
std::vector<int> out_max_byte_sizes {{ net.op[i].out_max_byte_size | replace('[', '{') | replace(']', '}') }};
for (size_t i = 0; i < {{ net.op[i].out_max_byte_size | length }}; ++i) {
op.add_out_max_byte_size(out_max_byte_sizes[i]);
}
{% endif %}
{% endif %}
}
......@@ -145,10 +174,25 @@ static void CreateNetArg(mace::NetDef &net_def) {
{% endif %}
{% endfor %}
}
{% endif %}
{% if net.output_info | length > 0 %}
static void CreateOutputInfo(mace::NetDef &net_def) {
std::vector<std::vector<int>> dims { {{net.output_info | map(attribute='dims') | join(', ') | replace('[', '{') | replace(']', '}') }} };
std::vector<int> data_types_int { {{ net.output_info | map(attribute='data_type') | join(', ') }} };
std::vector<mace::DataType> data_types({{ net.output_info | length }});
for (int k = 0; k < {{ net.output_info | length }}; ++k) {
data_types[k] = static_cast<mace::DataType>(data_types_int[k]);
}
net_def.mutable_output_info().resize({{ net.output_info | length }});
for (int i = 0; i < {{ net.output_info | length }}; ++i) {
net_def.mutable_output_info()[i].set_data_type(data_types[i]);
net_def.mutable_output_info()[i].set_dims(dims[i]);
}
}
{% endif %}
static void CreateOperators(std::vector<mace::OperatorDef> &ops) {
ops.resize({{ net.op|length }});
......@@ -205,6 +249,10 @@ NetDef {{'Create' + tag}}() {
CreateMemoryArena(net_def.mutable_mem_arena());
{% endif %}
{% if net.output_info | length > 0 %}
CreateOutputInfo(net_def);
{% endif %}
return net_def;
}
......
import struct
import os
import uuid
import numpy as np
from tensorflow import gfile
from mace.proto import mace_pb2
......@@ -74,15 +75,18 @@ def rename_tensor(net_def):
class TensorInfo:
def __init__(self, t):
self.name = t.name
self.data_type = mace_pb2.DataType.Name(t.data_type)
if t.data_type == mace_pb2.DT_FLOAT:
self.data = bytearray(struct.pack('%sf' % len(t.float_data), *t.float_data))
elif t.data_type == mace_pb2.DT_INT32:
self.data = bytearray(struct.pack('%si' % len(t.int32_data), *t.int32_data))
elif t.data_type == mace_pb2.DT_UINT8:
self.data = bytearray(np.array(t.int32_data).astype(np.uint8).tolist())
def stringfy(value):
return ', '.join('"{0}"'.format(w) for w in value)
def convert_to_source(net_def, template, confuse, model_tag, output):
def convert_to_source(net_def, template, confuse, model_tag, output, runtime):
if confuse:
confuse_name(net_def)
else:
......@@ -106,6 +110,7 @@ def convert_to_source(net_def, template, confuse, model_tag, output):
tensor = t,
tag = model_tag,
mode = 0,
runtime = runtime,
)
with gfile.GFile(output_dir + 'tensor' + str(counter) + '.cc', "wb") as f:
f.write(source)
......@@ -120,7 +125,8 @@ def convert_to_source(net_def, template, confuse, model_tag, output):
end = min(start+10, op_size),
net = net_def,
tag = model_tag,
mode = 1
mode = 1,
runtime = runtime,
)
with gfile.GFile(output_dir + 'op' + str(counter) + '.cc', "wb") as f:
f.write(source)
......@@ -132,7 +138,8 @@ def convert_to_source(net_def, template, confuse, model_tag, output):
tensors = tensors,
net = net_def,
tag = model_tag,
mode = 2
mode = 2,
runtime = runtime,
)
with gfile.GFile(output, "wb") as f:
f.write(source)
......@@ -30,7 +30,7 @@ def main(unused_args):
if FLAGS.output_type == 'source':
source_converter_lib.convert_to_source(output_graph_def, FLAGS.template, FLAGS.confuse,
FLAGS.model_tag, FLAGS.output)
FLAGS.model_tag, FLAGS.output, FLAGS.runtime)
else:
with gfile.GFile(FLAGS.output, "wb") as f:
f.write(output_graph_def.SerializeToString())
......@@ -72,8 +72,8 @@ def parse_args():
parser.add_argument(
"--prequantize",
type=bool,
default=False,
help="e.g., False")
default=True,
help="e.g., True")
parser.add_argument(
"--data_type",
type=str,
......
......@@ -391,6 +391,7 @@ def convert_to_mace_pb(input_graph_def, input_node, output_node, prequantize=Fal
# optimized_net_def = reverse_batch_to_space_and_biasadd(net_def)
if prequantize:
print('Prequantize ...')
net_def = strip_input_quantize_and_output_dequantize(net_def, input_node, output_node)
sorted_net_def = graph_util.sort_mace_graph(net_def, '__output__')
......
......@@ -66,9 +66,6 @@ build_and_run()
MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
MACE_KERNEL_PATH=$KERNEL_DIR \
${PHONE_DATA_DIR}/mace_run \
--model=${PHONE_DATA_DIR}/${MACE_MODEL_NAME} \
--input=mace_input_node \
--output=mace_output_node \
--input_shape="1,${IMAGE_SIZE},${IMAGE_SIZE},3"\
--output_shape="1,${IMAGE_SIZE},${IMAGE_SIZE},2"\
--input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME} \
......@@ -96,7 +93,7 @@ bazel-bin/mace/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \
--output_type=source \
--template=${MACE_SOURCE_DIR}/mace/python/tools/model.template \
--model_tag=${MODEL_TAG} \
--confuse=True
--confuse=False
echo "Step 3: Generate version source"
rm -rf ${VERSION_SOURCE_PATH}
......
#!/bin/bash
# Must run at root dir of mace project.
set +x
Usage() {
echo 'Usage: bash tools/validate_gcn.sh tools/gcn.config tf_model_path image_size [tuning]'
}
if [ $# -lt 2 ];then
Usage
exit -1
fi
source $1
VLOG_LEVEL=0
TF_MODEL_FILE_PATH=$2
MODEL_DIR=$(dirname ${TF_MODEL_FILE_PATH})
MACE_SOURCE_DIR=`/bin/pwd`
MACE_MODEL_NAME='mace_model.pb'
INPUT_FILE_NAME='model_input'
OUTPUT_FILE_NAME='gcn.out'
OUTPUT_LIST_FILE='gcn.list'
PHONE_DATA_DIR="/data/local/tmp/${MACE_MODEL_NAME}"
KERNEL_DIR="${PHONE_DATA_DIR}/cl/"
IMAGE_SIZE=$3
MODEL_TAG=GCN${IMAGE_SIZE}
CODEGEN_DIR=${MACE_SOURCE_DIR}/mace/codegen
MODEL_CODEGEN_DIR=${CODEGEN_DIR}/models/gcn-$IMAGE_SIZE
VERSION_SOURCE_PATH=${CODEGEN_DIR}/version
build_and_run()
{
bazel build -c opt --strip always mace/examples:mace_run \
--crosstool_top=//external:android/crosstool \
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--cpu=armeabi-v7a \
--copt=-DMACE_MODEL_FUNCTION=Create${MODEL_TAG}
adb shell "mkdir -p ${PHONE_DATA_DIR}"
adb push ${MODEL_DIR}/${INPUT_FILE_NAME} ${PHONE_DATA_DIR}
adb push bazel-bin/mace/examples/mace_run ${PHONE_DATA_DIR}
adb </dev/null shell \
MACE_CPP_MIN_VLOG_LEVEL=$VLOG_LEVEL \
MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
${PHONE_DATA_DIR}/mace_run \
--input_shape="1,${IMAGE_SIZE},${IMAGE_SIZE},3"\
--output_shape="1,${IMAGE_SIZE},${IMAGE_SIZE},2"\
--input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME} \
--output_file=${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} \
--device=HEXAGON \
--round=$round
}
echo "Step 1: Generate input data"
rm -rf ${MODEL_DIR}/${INPUT_FILE_NAME}
python tools/validate.py --generate_data true \
--input_file=${MODEL_DIR}/${INPUT_FILE_NAME} \
--input_shape="${IMAGE_SIZE},${IMAGE_SIZE},3"
echo "Step 2: Convert tf model to mace model and optimize memory"
bazel build //mace/python/tools:tf_converter
rm -rf ${MODEL_CODEGEN_DIR}
mkdir -p ${MODEL_CODEGEN_DIR}
bazel-bin/mace/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \
--output=${MODEL_CODEGEN_DIR}/mace_gcn${IMAGE_SIZE}.cc \
--input_node=${TF_INPUT_NODE} \
--output_node=${TF_OUTPUT_NODE} \
--data_type=DT_UINT8 \
--runtime=dsp \
--output_type=source \
--template=${MACE_SOURCE_DIR}/mace/python/tools/model.template \
--model_tag=${MODEL_TAG} \
--confuse=True
echo "Step 3: Generate version source"
rm -rf ${VERSION_SOURCE_PATH}
mkdir -p ${VERSION_SOURCE_PATH}
bash mace/tools/git/gen_version_source.sh ${VERSION_SOURCE_PATH}/version.cc
echo "Step 4: Run model on the phone with files"
build_and_run
echo "Step 5: Pull the mace run result."
rm -rf ${MODEL_DIR}/${OUTPUT_FILE_NAME}
adb </dev/null pull ${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} ${MODEL_DIR}
echo "Step 6: Validate the result"
python tools/validate.py --model_file ${TF_MODEL_FILE_PATH} \
--input_file ${MODEL_DIR}/${INPUT_FILE_NAME} \
--mace_out_file ${MODEL_DIR}/${OUTPUT_FILE_NAME} \
--input_node ${TF_INPUT_NODE} \
--output_node ${TF_OUTPUT_NODE} \
--input_shape "${IMAGE_SIZE},${IMAGE_SIZE},3" \
--output_shape "1,${IMAGE_SIZE},${IMAGE_SIZE},2"
\ No newline at end of file
......@@ -50,9 +50,6 @@ adb shell MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
MACE_KERNEL_PATH=$KERNEL_DIR \
OMP_NUM_THREADS=$num_threads \
${PHONE_DATA_DIR}/mace_run \
--model=${PHONE_DATA_DIR}/${MACE_MODEL_NAME} \
--input=input_node \
--output=icnet/Conv_11/BatchNorm/batchnorm/add_1 \
--input_shape=1,3,480,480\
--input_file=${PHONE_DATA_DIR}/${MACE_INPUT_FILE_NAME} \
--output_file=${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册