Merge branch 'tuning-binary' into 'master'

Add MaceEngine API See merge request !173

Merge branch 'tuning-binary' into 'master'
Add MaceEngine API See merge request !173
efe8ac50 · Liangliang He · ad9642ef · 6d89ad77 · efe8ac50 · efe8ac50
6 changed file
--- a/mace/core/mace.cc
+++ b/mace/core/mace.cc
@@ -4,6 +4,9 @@
 #include "mace/core/mace.h"
 #include "mace/core/types.h"
+#include "mace/core/net.h"
+#include "mace/core/workspace.h"
+#include "mace/utils/logging.h"
 namespace mace {
@@ -149,6 +152,17 @@ void Argument::set_strings(const std::vector<std::string> &value) {
  std::copy(value.begin(), value.end(), strings_.begin());
 }
+// Node Input
+void NodeInput::CopyFrom(const NodeInput &from) {
+  node_id_ = from.node_id();
+  output_port_ = from.output_port();
+}
+int NodeInput::node_id() const {
+  return node_id_;
+}
+int NodeInput::output_port() const {
+  return output_port_;
+}
 // OutputShape
 OutputShape::OutputShape() {}
@@ -338,6 +352,51 @@ uint32_t MemoryBlock::y() const {
  return y_;
 }
+// MemoryArena
+const std::vector<MemoryBlock> &MemoryArena::mem_block() const {
+  return mem_block_;
+}
+std::vector<MemoryBlock> &MemoryArena::mutable_mem_block() {
+  return mem_block_;
+}
+int MemoryArena::mem_block_size() const {
+  return mem_block_.size();
+}
+// InputInfo
+const std::string &InputInfo::name() const {
+  return name_;
+}
+int32_t InputInfo::node_id() const {
+  return node_id_;
+}
+int32_t InputInfo::max_byte_size() const {
+  return max_byte_size_;
+}
+DataType InputInfo::data_type() const {
+  return data_type_;
+}
+const std::vector<int32_t> &InputInfo::dims() const {
+  return dims_;
+}
+// OutputInfo
+const std::string &OutputInfo::name() const {
+  return name_;
+}
+int32_t OutputInfo::node_id() const {
+  return node_id_;
+}
+int32_t OutputInfo::max_byte_size() const {
+  return max_byte_size_;
+}
+DataType OutputInfo::data_type() const {
+  return data_type_;
+}
+const std::vector<int32_t> &OutputInfo::dims() const {
+  return dims_;
+}
 // NetDef
 NetDef::NetDef() : has_bits_(0) {}
@@ -421,4 +480,49 @@ const OperatorDef &NetDef::op(const int idx) const {
  MACE_CHECK(0 <= idx && idx < op_size());
  return op_[idx];
 }
+// Mace Engine
+MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type):
+    device_type_(device_type), ws_(new Workspace()), net_(nullptr) {
+  ws_->LoadModelTensor(*net_def, device_type);
+  // Init model
+  auto net = CreateNet(*net_def, ws_.get(), device_type, NetMode::INIT);
+  if(!net->Run()) {
+    LOG(FATAL) << "Net init run failed";
+  }
+  ws_->CreateTensor("mace_input_node:0", GetDeviceAllocator(device_type_), DT_FLOAT);
+  net_ = std::move(CreateNet(*net_def, ws_.get(), device_type));
+}
+MaceEngine::~MaceEngine() {
+}
+const float *MaceEngine::Run(const float *input,
+                             const std::vector<index_t> &input_shape,
+                             std::vector<int64_t> &output_shape) {
+  Tensor *input_tensor =
+      ws_->CreateTensor("mace_input_node:0", GetDeviceAllocator(device_type_), DT_FLOAT);
+  input_tensor->Resize(input_shape);
+  {
+    Tensor::MappingGuard input_guard(input_tensor);
+    float *input_data = input_tensor->mutable_data<float>();
+    memcpy(input_data, input, input_tensor->size() * sizeof(float));
+  }
+  if(!net_->Run()) {
+    LOG(FATAL) << "Net run failed";
+  }
+  // save output
+  const Tensor *output = ws_->GetTensor("mace_output_node:0");
+  if (output != nullptr) {
+    Tensor::MappingGuard output_guard(output);
+    auto shape = output->shape();
+    output_shape.resize(shape.size());
+    std::copy(shape.begin(), shape.end(), output_shape.begin());
+    return output->data<float>();
+  } else {
+    return nullptr;
+  }
+}
 } //  namespace mace
--- a/mace/core/mace.h
+++ b/mace/core/mace.h
@@ -7,7 +7,7 @@
 #include <cstdint>
 #include <vector>
 #include <string>
-#include "mace/utils/logging.h"
+#include <memory>
 namespace mace {
@@ -111,17 +111,10 @@ class Argument {
 class NodeInput {
 public:
-  void CopyFrom(const NodeInput &from) {
+  void CopyFrom(const NodeInput &from);
-    node_id_ = from.node_id();
-    output_port_ = from.output_port();
-  }
 public:
-  int node_id() const {
+  int node_id() const;
-    return node_id_;
+  int output_port() const;
-  }
-  int output_port() const {
-    return output_port_;
-  }
 private:
  int node_id_;
  int output_port_;
@@ -218,15 +211,9 @@ class MemoryBlock {
 class MemoryArena {
 public:
-  inline const std::vector<MemoryBlock> &mem_block() const {
+  const std::vector<MemoryBlock> &mem_block() const;
-    return mem_block_;
+  std::vector<MemoryBlock> &mutable_mem_block();
-  }
+  int mem_block_size() const;
-  inline std::vector<MemoryBlock> &mutable_mem_block() {
-    return mem_block_;
-  }
-  inline int mem_block_size() const {
-    return mem_block_.size();
-  }
 private:
  std::vector<MemoryBlock> mem_block_;
@@ -235,21 +222,11 @@ class MemoryArena {
 // for hexagon mace-nnlib
 class InputInfo {
 public:
-  const std::string &name() const {
+  const std::string &name() const;
-    return name_;
+  int32_t node_id() const;
-  }
+  int32_t max_byte_size() const;
-  int32_t node_id() const {
+  DataType data_type() const;
-    return node_id_;
+  const std::vector<int32_t> &dims() const;
-  }
-  int32_t max_byte_size() const {
-    return max_byte_size_;
-  }
-  DataType data_type() const {
-    return data_type_;
-  }
-  const std::vector<int32_t> &dims() const {
-    return dims_;
-  }
 private:
  std::string name_;
  int32_t node_id_;
@@ -260,21 +237,11 @@ class InputInfo {
 class OutputInfo {
 public:
-  const std::string &name() const {
+  const std::string &name() const;
-    return name_;
+  int32_t node_id() const;
-  }
+  int32_t max_byte_size() const;
-  int32_t node_id() const {
+  DataType data_type() const;
-    return node_id_;
+  const std::vector<int32_t> &dims() const;
-  }
-  int32_t max_byte_size() const {
-    return max_byte_size_;
-  }
-  DataType data_type() const {
-    return data_type_;
-  }
-  const std::vector<int32_t> &dims() const {
-    return dims_;
-  }
 private:
  std::string name_;
  int32_t node_id_;
@@ -333,5 +300,21 @@ class NetDef {
  uint32_t has_bits_;
 };
+class Workspace;
+class NetBase;
+class MaceEngine {
+ public:
+  explicit MaceEngine(const NetDef *net_def, DeviceType device_type);
+  ~MaceEngine();
+  const float *Run(const float *input,
+                   const std::vector<int64_t> &input_shape,
+                   std::vector<int64_t> &output_shape);
+ private:
+  DeviceType device_type_;
+  std::unique_ptr<Workspace> ws_;
+  std::unique_ptr<NetBase> net_;
+};
 } //  namespace mace
 #endif //  MACE_CORE_MACE_H_
--- a/mace/core/net.cc
+++ b/mace/core/net.cc
@@ -3,6 +3,8 @@
 //
 #include "mace/core/net.h"
+#include "mace/core/operator.h"
+#include "mace/core/workspace.h"
 #include "mace/utils/utils.h"
 namespace mace {

--- a/mace/core/net.h
+++ b/mace/core/net.h
@@ -6,12 +6,14 @@
 #define MACE_CORE_NET_H_
 #include "mace/core/common.h"
-#include "mace/core/operator.h"
-#include "mace/core/workspace.h"
 #include "mace/core/mace.h"
 namespace mace {
+class RunMetadata;
+class OperatorBase;
+class Workspace;
 class NetBase {
 public:
  NetBase(const std::shared_ptr<const NetDef> &net_def,

--- a/mace/examples/mace_run.cc
+++ b/mace/examples/mace_run.cc
@@ -12,9 +12,14 @@
 *          --output_file=mace.out  \
 *          --device=NEON
 */
+#include <sys/time.h>
 #include <fstream>
-#include "mace/core/net.h"
+#include <numeric>
+#include <iostream>
+#include <cstdlib>
 #include "mace/utils/command_line_flags.h"
+#include "mace/core/mace.h"
+#include "mace/utils/logging.h"
 #include "mace/utils/env_time.h"
 using namespace std;
@@ -23,7 +28,7 @@ using namespace mace;
 namespace mace {
 extern NetDef MACE_MODEL_FUNCTION();
 }
-void ParseShape(const string &str, vector<index_t> *shape) {
+void ParseShape(const string &str, vector<int64_t> *shape) {
  string tmp = str;
  while (!tmp.empty()) {
    int dim = atoi(tmp.data());
@@ -87,86 +92,71 @@ int main(int argc, char **argv) {
          << "device: " << device << std::endl
          << "round: " << round << std::endl;
-  vector<index_t> shape;
+  vector<int64_t> shape;
  ParseShape(input_shape, &shape);
+  // load model
  int64_t t0 = utils::NowMicros();
  NetDef net_def = mace::MACE_MODEL_FUNCTION();
  int64_t t1 = utils::NowMicros();
  LOG(INFO) << "CreateNetDef duration: " << t1 - t0 << "us";
  int64_t init_micros = t1 - t0;
-  t0 = utils::NowMicros();
  DeviceType device_type = ParseDeviceType(device);
  VLOG(1) << "Device Type" << device_type;
-  Workspace ws;
+  int64_t input_size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int64_t>());
-  ws.LoadModelTensor(net_def, device_type);
+  std::unique_ptr<float[]> input_data(new float[input_size]);
-  Tensor *input_tensor =
-      ws.CreateTensor(input_node + ":0", GetDeviceAllocator(device_type), DT_FLOAT);
+  // load input
-  input_tensor->Resize(shape);
+  ifstream in_file(input_file, ios::in | ios::binary);
-  t1 = utils::NowMicros();
+  in_file.read(reinterpret_cast<char *>(input_data.get()),
-  init_micros += t1 - t0;
+               input_size * sizeof(float));
-  LOG(INFO) << "CreateWorkspaceTensor duration: " << t1 - t0 << "us";
+  in_file.close();
  // Init model
  VLOG(0) << "Run init";
  t0 = utils::NowMicros();
-  auto net = CreateNet(net_def, &ws, device_type, NetMode::INIT);
+  mace::MaceEngine engine(&net_def, device_type);
-  net->Run();
  t1 = utils::NowMicros();
  init_micros += t1 - t0;
  LOG(INFO) << "Net init duration: " << t1 - t0 << "us";
-  // run model
-  t0 = utils::NowMicros();
-  net = CreateNet(net_def, &ws, device_type);
-  t1 = utils::NowMicros();
-  init_micros += t1 - t0;
  LOG(INFO) << "Total init duration: " << init_micros << "us";
-  {
+  std::vector<int64_t> output_shape;
-    Tensor::MappingGuard input_guard(input_tensor);
+  VLOG(0) << "warm up";
-    float *input_data = input_tensor->mutable_data<float>();
-    // load input
-    ifstream in_file(input_file, ios::in | ios::binary);
-    in_file.read(reinterpret_cast<char *>(input_data),
-                 input_tensor->size() * sizeof(float));
-    in_file.close();
-  }
  // warm up
-  VLOG(0) << "Warm up";
+  for (int i = 0; i < 1; ++i) {
-  t0 = utils::NowMicros();
+    engine.Run(input_data.get(), shape, output_shape);
-  net->Run();
+  }
-  t1 = utils::NowMicros();
-  LOG(INFO) << "1st run duration: " << t1 - t0 << "us";
-  VLOG(0) << "Run";
+  VLOG(0) << "Run model";
-  t0 = utils::NowMicros();
+  timeval tv1, tv2;
+  gettimeofday(&tv1, NULL);
  for (int i = 0; i < round; ++i) {
-    net->Run();
+    engine.Run(input_data.get(), shape, output_shape);
  }
-  t1 = utils::NowMicros();
+  gettimeofday(&tv2, NULL);
-  LOG(INFO) << "Average duration: " << (t1 - t0) / round << "us";
+  std::cout << "avg duration: "
+       << ((tv2.tv_sec - tv1.tv_sec) * 1000 +
-  // save output
+           (tv2.tv_usec - tv1.tv_usec) / 1000) /
-  const Tensor *output = ws.GetTensor(output_node + ":0");
+              round
+       << endl;
-  std::remove(output_file.c_str());
+  const float *output = engine.Run(input_data.get(), shape, output_shape);
  if (output != nullptr) {
-    Tensor::MappingGuard output_guard(output);
    ofstream out_file(output_file, ios::binary);
-    out_file.write((const char *)(output->data<float>()),
+    int64_t output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies<int64_t>());
-                   output->size() * sizeof(float));
+    out_file.write((const char *) (output),
+                   output_size * sizeof(float));
    out_file.flush();
    out_file.close();
    stringstream ss;
    ss << "Output shape: [";
-    for (int i = 0; i < output->dim_size(); ++i) {
+    for (auto i : output_shape) {
-      ss << output->dim(i) << ", ";
+      ss << i << ", ";
    }
    ss << "]";
    VLOG(0) << ss.str();
  }
 }
\ No newline at end of file
--- a/tools/validate_gcn.sh
+++ b/tools/validate_gcn.sh
@@ -71,7 +71,7 @@ build_and_run()
    --input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME} \
    --output_file=${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} \
    --device=OPENCL   \
-    --round=1
+    --round=2
 }
 echo "Step 1: Generate input data"