Add MaceEngine API and refactor mace.h.

650c9393 · liuqi · 6e617b6d · 650c9393 · 650c9393 · 650c9393
8 changed file
--- a/mace/core/mace.cc
+++ b/mace/core/mace.cc
@@ -4,6 +4,9 @@

 #include "mace/core/mace.h"
 #include "mace/core/types.h"
+#include "mace/core/net.h"
+#include "mace/core/workspace.h"
+#include "mace/utils/logging.h"

 namespace mace {

@@ -149,6 +152,17 @@ void Argument::set_strings(const std::vector<std::string> &value) {
  std::copy(value.begin(), value.end(), strings_.begin());
 }

+// Node Input
+void NodeInput::CopyFrom(const NodeInput &from) {
+  node_id_ = from.node_id();
+  output_port_ = from.output_port();
+}
+int NodeInput::node_id() const {
+  return node_id_;
+}
+int NodeInput::output_port() const {
+  return output_port_;
+}

 // OutputShape
 OutputShape::OutputShape() {}
@@ -338,6 +352,51 @@ uint32_t MemoryBlock::y() const {
  return y_;
 }

+// MemoryArena
+const std::vector<MemoryBlock> &MemoryArena::mem_block() const {
+  return mem_block_;
+}
+std::vector<MemoryBlock> &MemoryArena::mutable_mem_block() {
+  return mem_block_;
+}
+int MemoryArena::mem_block_size() const {
+  return mem_block_.size();
+}
+
+// InputInfo
+const std::string &InputInfo::name() const {
+  return name_;
+}
+int32_t InputInfo::node_id() const {
+  return node_id_;
+}
+int32_t InputInfo::max_byte_size() const {
+  return max_byte_size_;
+}
+DataType InputInfo::data_type() const {
+  return data_type_;
+}
+const std::vector<int32_t> &InputInfo::dims() const {
+  return dims_;
+}
+
+// OutputInfo
+const std::string &OutputInfo::name() const {
+  return name_;
+}
+int32_t OutputInfo::node_id() const {
+  return node_id_;
+}
+int32_t OutputInfo::max_byte_size() const {
+  return max_byte_size_;
+}
+DataType OutputInfo::data_type() const {
+  return data_type_;
+}
+const std::vector<int32_t> &OutputInfo::dims() const {
+  return dims_;
+}
+
 // NetDef
 NetDef::NetDef() : has_bits_(0) {}

@@ -421,4 +480,49 @@ const OperatorDef &NetDef::op(const int idx) const {
  MACE_CHECK(0 <= idx && idx < op_size());
  return op_[idx];
 }
+
+// Mace Engine
+MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type):
+    device_type_(device_type), ws_(new Workspace()), net_(nullptr) {
+
+  ws_->LoadModelTensor(*net_def, device_type);
+
+  // Init model
+  auto net = CreateNet(*net_def, ws_.get(), device_type, NetMode::INIT);
+  if(!net->Run()) {
+    LOG(FATAL) << "Net init run failed";
+  }
+  ws_->CreateTensor("mace_input_node:0", GetDeviceAllocator(device_type_), DT_FLOAT);
+  net_ = std::move(CreateNet(*net_def, ws_.get(), device_type));
+}
+MaceEngine::~MaceEngine() {
+}
+const float *MaceEngine::Run(const float *input,
+                             const std::vector<index_t> &input_shape,
+                             std::vector<int64_t> &output_shape) {
+  Tensor *input_tensor =
+      ws_->CreateTensor("mace_input_node:0", GetDeviceAllocator(device_type_), DT_FLOAT);
+  input_tensor->Resize(input_shape);
+  {
+    Tensor::MappingGuard input_guard(input_tensor);
+    float *input_data = input_tensor->mutable_data<float>();
+    memcpy(input_data, input, input_tensor->size() * sizeof(float));
+  }
+  if(!net_->Run()) {
+    LOG(FATAL) << "Net run failed";
+  }
+  // save output
+  const Tensor *output = ws_->GetTensor("mace_output_node:0");
+
+  if (output != nullptr) {
+    Tensor::MappingGuard output_guard(output);
+    auto shape = output->shape();
+    output_shape.resize(shape.size());
+    std::copy(shape.begin(), shape.end(), output_shape.begin());
+    return output->data<float>();
+  } else {
+    return nullptr;
+  }
+}
+
 } //  namespace mace
--- a/mace/core/mace.h
+++ b/mace/core/mace.h
@@ -7,7 +7,7 @@
 #include <cstdint>
 #include <vector>
 #include <string>
-#include "mace/utils/logging.h"
+#include <memory>

 namespace mace {

@@ -111,17 +111,10 @@ class Argument {

 class NodeInput {
 public:
-  void CopyFrom(const NodeInput &from) {
-    node_id_ = from.node_id();
-    output_port_ = from.output_port();
-  }
+  void CopyFrom(const NodeInput &from);
 public:
-  int node_id() const {
-    return node_id_;
-  }
-  int output_port() const {
-    return output_port_;
-  }
+  int node_id() const;
+  int output_port() const;
 private:
  int node_id_;
  int output_port_;
@@ -218,15 +211,9 @@ class MemoryBlock {

 class MemoryArena {
 public:
-  inline const std::vector<MemoryBlock> &mem_block() const {
-    return mem_block_;
-  }
-  inline std::vector<MemoryBlock> &mutable_mem_block() {
-    return mem_block_;
-  }
-  inline int mem_block_size() const {
-    return mem_block_.size();
-  }
+  const std::vector<MemoryBlock> &mem_block() const;
+  std::vector<MemoryBlock> &mutable_mem_block();
+  int mem_block_size() const;
 private:
  std::vector<MemoryBlock> mem_block_;

@@ -235,21 +222,11 @@ class MemoryArena {
 // for hexagon mace-nnlib
 class InputInfo {
 public:
-  const std::string &name() const {
-    return name_;
-  }
-  int32_t node_id() const {
-    return node_id_;
-  }
-  int32_t max_byte_size() const {
-    return max_byte_size_;
-  }
-  DataType data_type() const {
-    return data_type_;
-  }
-  const std::vector<int32_t> &dims() const {
-    return dims_;
-  }
+  const std::string &name() const;
+  int32_t node_id() const;
+  int32_t max_byte_size() const;
+  DataType data_type() const;
+  const std::vector<int32_t> &dims() const;
 private:
  std::string name_;
  int32_t node_id_;
@@ -260,21 +237,11 @@ class InputInfo {

 class OutputInfo {
 public:
-  const std::string &name() const {
-    return name_;
-  }
-  int32_t node_id() const {
-    return node_id_;
-  }
-  int32_t max_byte_size() const {
-    return max_byte_size_;
-  }
-  DataType data_type() const {
-    return data_type_;
-  }
-  const std::vector<int32_t> &dims() const {
-    return dims_;
-  }
+  const std::string &name() const;
+  int32_t node_id() const;
+  int32_t max_byte_size() const;
+  DataType data_type() const;
+  const std::vector<int32_t> &dims() const;
 private:
  std::string name_;
  int32_t node_id_;
@@ -333,5 +300,21 @@ class NetDef {
  uint32_t has_bits_;
 };

+class Workspace;
+class NetBase;
+
+class MaceEngine {
+ public:
+  explicit MaceEngine(const NetDef *net_def, DeviceType device_type);
+  ~MaceEngine();
+  const float *Run(const float *input,
+                   const std::vector<int64_t> &input_shape,
+                   std::vector<int64_t> &output_shape);
+ private:
+  DeviceType device_type_;
+  std::unique_ptr<Workspace> ws_;
+  std::unique_ptr<NetBase> net_;
+};
+
 } //  namespace mace
 #endif //  MACE_CORE_MACE_H_
--- a/mace/core/net.cc
+++ b/mace/core/net.cc
@@ -3,6 +3,8 @@
 //

 #include "mace/core/net.h"
+#include "mace/core/operator.h"
+#include "mace/core/workspace.h"
 #include "mace/utils/utils.h"

 namespace mace {

--- a/mace/core/net.h
+++ b/mace/core/net.h
@@ -6,12 +6,14 @@
 #define MACE_CORE_NET_H_

 #include "mace/core/common.h"
-#include "mace/core/operator.h"
-#include "mace/core/workspace.h"
 #include "mace/core/mace.h"

 namespace mace {

+class RunMetadata;
+class OperatorBase;
+class Workspace;
+
 class NetBase {
 public:
  NetBase(const std::shared_ptr<const NetDef> &net_def,

--- a/mace/examples/BUILD
+++ b/mace/examples/BUILD
@@ -39,3 +39,15 @@ cc_binary(
        "//mace/utils:command_line_flags",
    ],
 )
+
+cc_binary(
+    name = "example",
+    srcs = ["example.cc"],
+    copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"],
+    linkopts = ["-fopenmp"],
+    linkstatic = 1,
+    deps = [
+        "//mace/codegen:generated_models_lib",
+        "//mace/utils:command_line_flags",
+    ],
+)
--- a/mace/examples/example.cc
+++ b/mace/examples/example.cc
+//
+// Copyright (c) 2017 XiaoMi All rights reserved.
+//
+
+/**
+ * Usage:
+ * mace_run --model=mobi_mace.pb \
+ *          --input=input_node  \
+ *          --output=MobilenetV1/Logits/conv2d/convolution  \
+ *          --input_shape=1,3,224,224   \
+ *          --input_file=input_data \
+ *          --output_file=mace.out  \
+ *          --device=NEON
+ */
+#include <sys/time.h>
+#include <fstream>
+#include <numeric>
+#include <iostream>
+#include <cstdlib>
+#include "mace/utils/command_line_flags.h"
+#include "mace/core/mace.h"
+#include "mace/utils/logging.h"
+
+using namespace std;
+using namespace mace;
+
+namespace mace {
+extern NetDef MACE_MODEL_FUNCTION();
+}
+void ParseShape(const string &str, vector<int64_t> *shape) {
+  string tmp = str;
+  while (!tmp.empty()) {
+    int dim = atoi(tmp.data());
+    shape->push_back(dim);
+    size_t next_offset = tmp.find(",");
+    if (next_offset == string::npos) {
+      break;
+    } else {
+      tmp = tmp.substr(next_offset + 1);
+    }
+  }
+}
+
+DeviceType ParseDeviceType(const string &device_str) {
+  if(device_str.compare("CPU") == 0) {
+    return DeviceType::CPU;
+  } else if (device_str.compare("NEON") == 0) {
+    return DeviceType::NEON;
+  } else if (device_str.compare("OPENCL") == 0) {
+    return DeviceType::OPENCL;
+  } else {
+    return DeviceType::CPU;
+  }
+}
+
+int main(int argc, char **argv) {
+  string model_file;
+  string input_node;
+  string output_node;
+  string input_shape;
+  string input_file;
+  string output_file;
+  string device;
+  int round = 1;
+
+  std::vector<Flag> flag_list = {
+      Flag("model", &model_file, "model file name"),
+      Flag("input", &input_node, "input node"),
+      Flag("output", &output_node, "output node"),
+      Flag("input_shape", &input_shape, "input shape, separated by comma"),
+      Flag("input_file", &input_file, "input file name"),
+      Flag("output_file", &output_file, "output file name"),
+      Flag("device", &device, "CPU/NEON"),
+      Flag("round", &round, "round"),
+  };
+
+  string usage = Flags::Usage(argv[0], flag_list);
+  const bool parse_result = Flags::Parse(&argc, argv, flag_list);
+
+  if (!parse_result) {
+    LOG(ERROR) << usage;
+    return -1;
+  }
+
+  VLOG(0) << "model: " << model_file << std::endl
+          << "input: " << input_node << std::endl
+          << "output: " << output_node << std::endl
+          << "input_shape: " << input_shape << std::endl
+          << "input_file: " << input_file << std::endl
+          << "output_file: " << output_file << std::endl
+          << "device: " << device << std::endl
+          << "round: " << round << std::endl;
+
+  vector<int64_t> shape;
+  ParseShape(input_shape, &shape);
+
+  // load model
+  NetDef net_def = mace::MACE_MODEL_FUNCTION();
+
+  DeviceType device_type = ParseDeviceType(device);
+  VLOG(0) << device_type;
+  int64_t input_size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int64_t>());
+  std::unique_ptr<float[]> input_data(new float[input_size]);
+
+  // load input
+  ifstream in_file(input_file, ios::in | ios::binary);
+  in_file.read(reinterpret_cast<char *>(input_data.get()),
+               input_size * sizeof(float));
+  in_file.close();
+
+  // Init model
+  VLOG(0) << "Run init";
+  mace::MaceEngine engine(&net_def, device_type);
+
+  VLOG(0) << "Run model";
+  std::vector<int64_t> output_shape;
+  timeval tv1, tv2;
+  gettimeofday(&tv1, NULL);
+  for (int i = 0; i < round; ++i) {
+    engine.Run(input_data.get(), shape, output_shape);
+  }
+  gettimeofday(&tv2, NULL);
+  std::cout << "avg duration: "
+       << ((tv2.tv_sec - tv1.tv_sec) * 1000 +
+           (tv2.tv_usec - tv1.tv_usec) / 1000) /
+              round
+       << endl;
+
+  const float *output = engine.Run(input_data.get(), shape, output_shape);
+  if (output != nullptr) {
+    ofstream out_file(output_file, ios::binary);
+    int64_t output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies<int64_t>());
+    out_file.write((const char *) (output),
+                   output_size * sizeof(float));
+    out_file.flush();
+    out_file.close();
+    stringstream ss;
+    ss << "Output shape: [";
+    for (auto i : output_shape) {
+      ss << i << ", ";
+    }
+    ss << "]";
+    VLOG(0) << ss.str();
+  }
+}
\ No newline at end of file
--- a/mace/examples/mace_run.cc
+++ b/mace/examples/mace_run.cc
@@ -14,6 +14,7 @@
 */
 #include <fstream>
 #include "mace/core/net.h"
+#include "mace/core/workspace.h"
 #include "mace/utils/command_line_flags.h"
 #include "mace/utils/env_time.h"


--- a/tools/validate_example.sh
+++ b/tools/validate_example.sh
+#!/bin/bash
+# Must run at root dir of mace project.
+set +x
+Usage() {
+  echo 'Usage: bash tools/validate_gcn.sh tf_model_path image_size [tuning]'
+}
+
+if [ $# -lt 2 ];then
+  Usage
+  exit -1
+fi
+
+TF_MODEL_FILE_PATH=$1
+MODEL_DIR=$(dirname ${TF_MODEL_FILE_PATH})
+MACE_SOURCE_DIR=`/bin/pwd`
+MACE_MODEL_NAME='mace_model.pb'
+INPUT_FILE_NAME='model_input'
+OUTPUT_FILE_NAME='gcn.out'
+OUTPUT_LIST_FILE='gcn.list'
+PHONE_DATA_DIR="/data/local/tmp/${MACE_MODEL_NAME}"
+KERNEL_DIR="${PHONE_DATA_DIR}/cl/"
+IMAGE_SIZE=$2
+MODEL_TAG=GCN${IMAGE_SIZE}
+CODEGEN_DIR=${MACE_SOURCE_DIR}/mace/codegen
+MODEL_CODEGEN_DIR=${CODEGEN_DIR}/models/gcn-$IMAGE_SIZE
+CL_CODEGEN_DIR=${CODEGEN_DIR}/opencl
+CL_BIN_DIR=${CODEGEN_DIR}/opencl_bin
+TUNING_CODEGEN_DIR=${CODEGEN_DIR}/tuning
+TUNING_OR_NOT=${3:-0}
+
+build_and_run()
+{
+  EMBED_OPENCL_BINARY=$1
+  if [ "$EMBED_OPENCL_BINARY" = true ]; then
+    EMBED_OPENCL_BINARY_BUILD_FLAGS="--define embed_binary_program=true"
+  fi
+
+  bazel build -c opt --strip always mace/examples:example \
+    --crosstool_top=//external:android/crosstool \
+    --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
+    --cpu=arm64-v8a \
+    $EMBED_OPENCL_BINARY_BUILD_FLAGS \
+    --copt=-DMACE_MODEL_FUNCTION=Create${MODEL_TAG}
+
+  adb shell "mkdir -p ${PHONE_DATA_DIR}"
+  if [ "$EMBED_OPENCL_BINARY" = false ]; then
+    adb shell "rm -rf ${KERNEL_DIR}"
+    adb shell "mkdir -p ${KERNEL_DIR}"
+    adb push mace/kernels/opencl/cl/. ${KERNEL_DIR}
+  fi
+  adb push ${MODEL_DIR}/${INPUT_FILE_NAME} ${PHONE_DATA_DIR}
+  adb push bazel-bin/mace/examples/example ${PHONE_DATA_DIR}
+
+  num_threads=${1:-4}
+  if [[ "${TUNING_OR_NOT}" != "0" && "$EMBED_OPENCL_BINARY" != true ]];then
+    tuning_flag=1
+  else
+    tuning_flag=0
+  fi
+
+  adb </dev/null shell MACE_TUNING=${tuning_flag} \
+    MACE_CPP_MIN_VLOG_LEVEL=0 \
+    MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
+    MACE_KERNEL_PATH=$KERNEL_DIR \
+    OMP_NUM_THREADS=$num_threads \
+    ${PHONE_DATA_DIR}/example \
+    --model=${PHONE_DATA_DIR}/${MACE_MODEL_NAME} \
+    --input=mace_input_node \
+    --output=mace_output_node \
+    --input_shape="1,${IMAGE_SIZE},${IMAGE_SIZE},3"\
+    --input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME} \
+    --output_file=${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} \
+    --device=OPENCL   \
+    --round=2
+}
+
+echo "Step 1: Generate input data"
+python tools/validate.py --generate_data true --random_seed 1 \
+ --input_file=${MODEL_DIR}/${INPUT_FILE_NAME} \
+ --input_shape="${IMAGE_SIZE},${IMAGE_SIZE},3"
+
+echo "Step 2: Convert tf model to mace model and optimize memory"
+bazel build //mace/python/tools:tf_converter
+rm -rf ${CODEGEN_DIR}/models
+mkdir -p ${MODEL_CODEGEN_DIR}
+bazel-bin/mace/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \
+                                         --output=${MODEL_CODEGEN_DIR}/mace_gcn${IMAGE_SIZE}.cc \
+                                         --input_node=input \
+                                         --output_node=GCN/br_result_2/fcn_br \
+                                         --data_type=DT_HALF \
+                                         --runtime=gpu \
+                                         --output_type=source \
+                                         --template=${MACE_SOURCE_DIR}/mace/python/tools/model.template \
+                                         --model_tag=${MODEL_TAG} \
+                                         --confuse=True
+
+echo "Step 3: Run model on the phone with files"
+build_and_run false
+
+echo "Step 4: Generate OpenCL binary program and config code"
+rm -rf ${CL_BIN_DIR}
+adb pull ${KERNEL_DIR} ${CL_BIN_DIR}
+rm -rf ${CL_CODEGEN_DIR}
+mkdir -p ${CL_CODEGEN_DIR}
+python mace/python/tools/opencl_codegen.py \
+  --cl_binary_dir=${CL_BIN_DIR} --output_path=${CL_CODEGEN_DIR}/opencl_compiled_program.cc
+
+echo "Step 5: Generate tuning source file"
+adb pull ${PHONE_DATA_DIR}/mace_run.config ${CL_BIN_DIR}
+mkdir -p ${TUNING_CODEGEN_DIR}
+python mace/python/tools/binary_codegen.py \
+  --binary_file=${CL_BIN_DIR}/mace_run.config --output_path=${TUNING_CODEGEN_DIR}/tuning_params.cc
+
+echo "Step 6: Run model on the phone using binary"
+build_and_run true
+
+echo "Step 7: Pull the mace run result."
+rm -rf ${MODEL_DIR}/${OUTPUT_FILE_NAME}
+adb </dev/null pull ${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} ${MODEL_DIR}
+
+echo "Step 8: Validate the result"
+python tools/validate.py --model_file ${TF_MODEL_FILE_PATH} \
+    --input_file ${MODEL_DIR}/${INPUT_FILE_NAME} \
+    --mace_out_file ${MODEL_DIR}/${OUTPUT_FILE_NAME} \
+    --input_node input \
+    --output_node GCN/br_result_2/fcn_br\
+    --input_shape "${IMAGE_SIZE},${IMAGE_SIZE},3" \
+    --output_shape "1,${IMAGE_SIZE},${IMAGE_SIZE},2"