提交 efe8ac50 编写于 作者: L Liangliang He

Merge branch 'tuning-binary' into 'master'

Add MaceEngine API

See merge request !173
...@@ -4,6 +4,9 @@ ...@@ -4,6 +4,9 @@
#include "mace/core/mace.h" #include "mace/core/mace.h"
#include "mace/core/types.h" #include "mace/core/types.h"
#include "mace/core/net.h"
#include "mace/core/workspace.h"
#include "mace/utils/logging.h"
namespace mace { namespace mace {
...@@ -149,6 +152,17 @@ void Argument::set_strings(const std::vector<std::string> &value) { ...@@ -149,6 +152,17 @@ void Argument::set_strings(const std::vector<std::string> &value) {
std::copy(value.begin(), value.end(), strings_.begin()); std::copy(value.begin(), value.end(), strings_.begin());
} }
// Node Input
void NodeInput::CopyFrom(const NodeInput &from) {
node_id_ = from.node_id();
output_port_ = from.output_port();
}
int NodeInput::node_id() const {
return node_id_;
}
int NodeInput::output_port() const {
return output_port_;
}
// OutputShape // OutputShape
OutputShape::OutputShape() {} OutputShape::OutputShape() {}
...@@ -338,6 +352,51 @@ uint32_t MemoryBlock::y() const { ...@@ -338,6 +352,51 @@ uint32_t MemoryBlock::y() const {
return y_; return y_;
} }
// MemoryArena
const std::vector<MemoryBlock> &MemoryArena::mem_block() const {
return mem_block_;
}
std::vector<MemoryBlock> &MemoryArena::mutable_mem_block() {
return mem_block_;
}
int MemoryArena::mem_block_size() const {
return mem_block_.size();
}
// InputInfo
const std::string &InputInfo::name() const {
return name_;
}
int32_t InputInfo::node_id() const {
return node_id_;
}
int32_t InputInfo::max_byte_size() const {
return max_byte_size_;
}
DataType InputInfo::data_type() const {
return data_type_;
}
const std::vector<int32_t> &InputInfo::dims() const {
return dims_;
}
// OutputInfo
const std::string &OutputInfo::name() const {
return name_;
}
int32_t OutputInfo::node_id() const {
return node_id_;
}
int32_t OutputInfo::max_byte_size() const {
return max_byte_size_;
}
DataType OutputInfo::data_type() const {
return data_type_;
}
const std::vector<int32_t> &OutputInfo::dims() const {
return dims_;
}
// NetDef // NetDef
NetDef::NetDef() : has_bits_(0) {} NetDef::NetDef() : has_bits_(0) {}
...@@ -421,4 +480,49 @@ const OperatorDef &NetDef::op(const int idx) const { ...@@ -421,4 +480,49 @@ const OperatorDef &NetDef::op(const int idx) const {
MACE_CHECK(0 <= idx && idx < op_size()); MACE_CHECK(0 <= idx && idx < op_size());
return op_[idx]; return op_[idx];
} }
// Mace Engine
MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type):
device_type_(device_type), ws_(new Workspace()), net_(nullptr) {
ws_->LoadModelTensor(*net_def, device_type);
// Init model
auto net = CreateNet(*net_def, ws_.get(), device_type, NetMode::INIT);
if(!net->Run()) {
LOG(FATAL) << "Net init run failed";
}
ws_->CreateTensor("mace_input_node:0", GetDeviceAllocator(device_type_), DT_FLOAT);
net_ = std::move(CreateNet(*net_def, ws_.get(), device_type));
}
MaceEngine::~MaceEngine() {
}
const float *MaceEngine::Run(const float *input,
const std::vector<index_t> &input_shape,
std::vector<int64_t> &output_shape) {
Tensor *input_tensor =
ws_->CreateTensor("mace_input_node:0", GetDeviceAllocator(device_type_), DT_FLOAT);
input_tensor->Resize(input_shape);
{
Tensor::MappingGuard input_guard(input_tensor);
float *input_data = input_tensor->mutable_data<float>();
memcpy(input_data, input, input_tensor->size() * sizeof(float));
}
if(!net_->Run()) {
LOG(FATAL) << "Net run failed";
}
// save output
const Tensor *output = ws_->GetTensor("mace_output_node:0");
if (output != nullptr) {
Tensor::MappingGuard output_guard(output);
auto shape = output->shape();
output_shape.resize(shape.size());
std::copy(shape.begin(), shape.end(), output_shape.begin());
return output->data<float>();
} else {
return nullptr;
}
}
} // namespace mace } // namespace mace
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#include <cstdint> #include <cstdint>
#include <vector> #include <vector>
#include <string> #include <string>
#include "mace/utils/logging.h" #include <memory>
namespace mace { namespace mace {
...@@ -111,17 +111,10 @@ class Argument { ...@@ -111,17 +111,10 @@ class Argument {
class NodeInput { class NodeInput {
public: public:
void CopyFrom(const NodeInput &from) { void CopyFrom(const NodeInput &from);
node_id_ = from.node_id();
output_port_ = from.output_port();
}
public: public:
int node_id() const { int node_id() const;
return node_id_; int output_port() const;
}
int output_port() const {
return output_port_;
}
private: private:
int node_id_; int node_id_;
int output_port_; int output_port_;
...@@ -218,15 +211,9 @@ class MemoryBlock { ...@@ -218,15 +211,9 @@ class MemoryBlock {
class MemoryArena { class MemoryArena {
public: public:
inline const std::vector<MemoryBlock> &mem_block() const { const std::vector<MemoryBlock> &mem_block() const;
return mem_block_; std::vector<MemoryBlock> &mutable_mem_block();
} int mem_block_size() const;
inline std::vector<MemoryBlock> &mutable_mem_block() {
return mem_block_;
}
inline int mem_block_size() const {
return mem_block_.size();
}
private: private:
std::vector<MemoryBlock> mem_block_; std::vector<MemoryBlock> mem_block_;
...@@ -235,21 +222,11 @@ class MemoryArena { ...@@ -235,21 +222,11 @@ class MemoryArena {
// for hexagon mace-nnlib // for hexagon mace-nnlib
class InputInfo { class InputInfo {
public: public:
const std::string &name() const { const std::string &name() const;
return name_; int32_t node_id() const;
} int32_t max_byte_size() const;
int32_t node_id() const { DataType data_type() const;
return node_id_; const std::vector<int32_t> &dims() const;
}
int32_t max_byte_size() const {
return max_byte_size_;
}
DataType data_type() const {
return data_type_;
}
const std::vector<int32_t> &dims() const {
return dims_;
}
private: private:
std::string name_; std::string name_;
int32_t node_id_; int32_t node_id_;
...@@ -260,21 +237,11 @@ class InputInfo { ...@@ -260,21 +237,11 @@ class InputInfo {
class OutputInfo { class OutputInfo {
public: public:
const std::string &name() const { const std::string &name() const;
return name_; int32_t node_id() const;
} int32_t max_byte_size() const;
int32_t node_id() const { DataType data_type() const;
return node_id_; const std::vector<int32_t> &dims() const;
}
int32_t max_byte_size() const {
return max_byte_size_;
}
DataType data_type() const {
return data_type_;
}
const std::vector<int32_t> &dims() const {
return dims_;
}
private: private:
std::string name_; std::string name_;
int32_t node_id_; int32_t node_id_;
...@@ -333,5 +300,21 @@ class NetDef { ...@@ -333,5 +300,21 @@ class NetDef {
uint32_t has_bits_; uint32_t has_bits_;
}; };
class Workspace;
class NetBase;
class MaceEngine {
public:
explicit MaceEngine(const NetDef *net_def, DeviceType device_type);
~MaceEngine();
const float *Run(const float *input,
const std::vector<int64_t> &input_shape,
std::vector<int64_t> &output_shape);
private:
DeviceType device_type_;
std::unique_ptr<Workspace> ws_;
std::unique_ptr<NetBase> net_;
};
} // namespace mace } // namespace mace
#endif // MACE_CORE_MACE_H_ #endif // MACE_CORE_MACE_H_
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
// //
#include "mace/core/net.h" #include "mace/core/net.h"
#include "mace/core/operator.h"
#include "mace/core/workspace.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
namespace mace { namespace mace {
......
...@@ -6,12 +6,14 @@ ...@@ -6,12 +6,14 @@
#define MACE_CORE_NET_H_ #define MACE_CORE_NET_H_
#include "mace/core/common.h" #include "mace/core/common.h"
#include "mace/core/operator.h"
#include "mace/core/workspace.h"
#include "mace/core/mace.h" #include "mace/core/mace.h"
namespace mace { namespace mace {
class RunMetadata;
class OperatorBase;
class Workspace;
class NetBase { class NetBase {
public: public:
NetBase(const std::shared_ptr<const NetDef> &net_def, NetBase(const std::shared_ptr<const NetDef> &net_def,
......
...@@ -12,9 +12,14 @@ ...@@ -12,9 +12,14 @@
* --output_file=mace.out \ * --output_file=mace.out \
* --device=NEON * --device=NEON
*/ */
#include <sys/time.h>
#include <fstream> #include <fstream>
#include "mace/core/net.h" #include <numeric>
#include <iostream>
#include <cstdlib>
#include "mace/utils/command_line_flags.h" #include "mace/utils/command_line_flags.h"
#include "mace/core/mace.h"
#include "mace/utils/logging.h"
#include "mace/utils/env_time.h" #include "mace/utils/env_time.h"
using namespace std; using namespace std;
...@@ -23,7 +28,7 @@ using namespace mace; ...@@ -23,7 +28,7 @@ using namespace mace;
namespace mace { namespace mace {
extern NetDef MACE_MODEL_FUNCTION(); extern NetDef MACE_MODEL_FUNCTION();
} }
void ParseShape(const string &str, vector<index_t> *shape) { void ParseShape(const string &str, vector<int64_t> *shape) {
string tmp = str; string tmp = str;
while (!tmp.empty()) { while (!tmp.empty()) {
int dim = atoi(tmp.data()); int dim = atoi(tmp.data());
...@@ -87,86 +92,71 @@ int main(int argc, char **argv) { ...@@ -87,86 +92,71 @@ int main(int argc, char **argv) {
<< "device: " << device << std::endl << "device: " << device << std::endl
<< "round: " << round << std::endl; << "round: " << round << std::endl;
vector<index_t> shape; vector<int64_t> shape;
ParseShape(input_shape, &shape); ParseShape(input_shape, &shape);
// load model
int64_t t0 = utils::NowMicros(); int64_t t0 = utils::NowMicros();
NetDef net_def = mace::MACE_MODEL_FUNCTION(); NetDef net_def = mace::MACE_MODEL_FUNCTION();
int64_t t1 = utils::NowMicros(); int64_t t1 = utils::NowMicros();
LOG(INFO) << "CreateNetDef duration: " << t1 - t0 << "us"; LOG(INFO) << "CreateNetDef duration: " << t1 - t0 << "us";
int64_t init_micros = t1 - t0; int64_t init_micros = t1 - t0;
t0 = utils::NowMicros();
DeviceType device_type = ParseDeviceType(device); DeviceType device_type = ParseDeviceType(device);
VLOG(1) << "Device Type" << device_type; VLOG(1) << "Device Type" << device_type;
Workspace ws; int64_t input_size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int64_t>());
ws.LoadModelTensor(net_def, device_type); std::unique_ptr<float[]> input_data(new float[input_size]);
Tensor *input_tensor =
ws.CreateTensor(input_node + ":0", GetDeviceAllocator(device_type), DT_FLOAT); // load input
input_tensor->Resize(shape); ifstream in_file(input_file, ios::in | ios::binary);
t1 = utils::NowMicros(); in_file.read(reinterpret_cast<char *>(input_data.get()),
init_micros += t1 - t0; input_size * sizeof(float));
LOG(INFO) << "CreateWorkspaceTensor duration: " << t1 - t0 << "us"; in_file.close();
// Init model // Init model
VLOG(0) << "Run init"; VLOG(0) << "Run init";
t0 = utils::NowMicros(); t0 = utils::NowMicros();
auto net = CreateNet(net_def, &ws, device_type, NetMode::INIT); mace::MaceEngine engine(&net_def, device_type);
net->Run();
t1 = utils::NowMicros(); t1 = utils::NowMicros();
init_micros += t1 - t0; init_micros += t1 - t0;
LOG(INFO) << "Net init duration: " << t1 - t0 << "us"; LOG(INFO) << "Net init duration: " << t1 - t0 << "us";
// run model
t0 = utils::NowMicros();
net = CreateNet(net_def, &ws, device_type);
t1 = utils::NowMicros();
init_micros += t1 - t0;
LOG(INFO) << "Total init duration: " << init_micros << "us"; LOG(INFO) << "Total init duration: " << init_micros << "us";
{ std::vector<int64_t> output_shape;
Tensor::MappingGuard input_guard(input_tensor); VLOG(0) << "warm up";
float *input_data = input_tensor->mutable_data<float>();
// load input
ifstream in_file(input_file, ios::in | ios::binary);
in_file.read(reinterpret_cast<char *>(input_data),
input_tensor->size() * sizeof(float));
in_file.close();
}
// warm up // warm up
VLOG(0) << "Warm up"; for (int i = 0; i < 1; ++i) {
t0 = utils::NowMicros(); engine.Run(input_data.get(), shape, output_shape);
net->Run(); }
t1 = utils::NowMicros();
LOG(INFO) << "1st run duration: " << t1 - t0 << "us";
VLOG(0) << "Run"; VLOG(0) << "Run model";
t0 = utils::NowMicros(); timeval tv1, tv2;
gettimeofday(&tv1, NULL);
for (int i = 0; i < round; ++i) { for (int i = 0; i < round; ++i) {
net->Run(); engine.Run(input_data.get(), shape, output_shape);
} }
t1 = utils::NowMicros(); gettimeofday(&tv2, NULL);
LOG(INFO) << "Average duration: " << (t1 - t0) / round << "us"; std::cout << "avg duration: "
<< ((tv2.tv_sec - tv1.tv_sec) * 1000 +
// save output (tv2.tv_usec - tv1.tv_usec) / 1000) /
const Tensor *output = ws.GetTensor(output_node + ":0"); round
<< endl;
std::remove(output_file.c_str());
const float *output = engine.Run(input_data.get(), shape, output_shape);
if (output != nullptr) { if (output != nullptr) {
Tensor::MappingGuard output_guard(output);
ofstream out_file(output_file, ios::binary); ofstream out_file(output_file, ios::binary);
out_file.write((const char *)(output->data<float>()), int64_t output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies<int64_t>());
output->size() * sizeof(float)); out_file.write((const char *) (output),
output_size * sizeof(float));
out_file.flush(); out_file.flush();
out_file.close(); out_file.close();
stringstream ss; stringstream ss;
ss << "Output shape: ["; ss << "Output shape: [";
for (int i = 0; i < output->dim_size(); ++i) { for (auto i : output_shape) {
ss << output->dim(i) << ", "; ss << i << ", ";
} }
ss << "]"; ss << "]";
VLOG(0) << ss.str(); VLOG(0) << ss.str();
} }
} }
\ No newline at end of file
...@@ -71,7 +71,7 @@ build_and_run() ...@@ -71,7 +71,7 @@ build_and_run()
--input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME} \ --input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME} \
--output_file=${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} \ --output_file=${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} \
--device=OPENCL \ --device=OPENCL \
--round=1 --round=2
} }
echo "Step 1: Generate input data" echo "Step 1: Generate input data"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册