Add memory allocation logging and checking

44544afc · Liangliang He · ee725558 · 44544afc · 44544afc · 44544afc
6 changed file
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,8 @@ tags
 .idea/
 cmake-build-debug/
 *.pyc
+mace/codegen/models/
+mace/codegen/opencl/
+mace/codegen/opencl_bin/
+mace/codegen/tuning/
+mace/codegen/version/
--- a/mace/core/allocator.h
+++ b/mace/core/allocator.h
@@ -53,6 +53,7 @@ class CPUAllocator : public Allocator {
 public:
  ~CPUAllocator() override {}
  void *New(size_t nbytes) override {
+    VLOG(3) << "Allocate CPU buffer: " << nbytes;
    void *data = nullptr;
 #ifdef __ANDROID__
    data = memalign(kMaceAlignment, nbytes);
@@ -67,11 +68,18 @@ class CPUAllocator : public Allocator {
  void *NewImage(const std::vector<size_t> &shape,
                 const DataType dt) override {
+    LOG(FATAL) << "Allocate CPU image";
    return nullptr;
  }
-  void Delete(void *data) override { free(data); }
+  void Delete(void *data) override {
-  void DeleteImage(void *data) override { free(data); };
+    VLOG(3) << "Free CPU buffer";
+    free(data);
+  }
+  void DeleteImage(void *data) override {
+    LOG(FATAL) << "Free CPU image";
+    free(data);
+  };
  void *Map(void *buffer, size_t nbytes) override { return buffer; }
  void *MapImage(void *buffer,
                 const std::vector<size_t> &image_shape,

--- a/mace/core/runtime/opencl/opencl_allocator.cc
+++ b/mace/core/runtime/opencl/opencl_allocator.cc
@@ -36,6 +36,7 @@ OpenCLAllocator::OpenCLAllocator() {}
 OpenCLAllocator::~OpenCLAllocator() {}
 void *OpenCLAllocator::New(size_t nbytes) {
+  VLOG(3) << "Allocate OpenCL buffer: " << nbytes;
  cl_int error;
  cl::Buffer *buffer = new cl::Buffer(OpenCLRuntime::Global()->context(),
                                      CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
@@ -48,6 +49,7 @@ void *OpenCLAllocator::New(size_t nbytes) {
 void *OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
                                const DataType dt) {
  MACE_CHECK(image_shape.size() == 2) << "Image shape's size must equal 2";
+  VLOG(3) << "Allocate OpenCL image: " << image_shape[0] << ", " << image_shape[1];
  cl::ImageFormat img_format(CL_RGBA, DataTypeToCLChannelType(dt));
@@ -64,6 +66,7 @@ void *OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
 }
 void OpenCLAllocator::Delete(void *buffer) {
+  VLOG(3) << "Free OpenCL buffer";
  if (buffer != nullptr) {
    cl::Buffer *cl_buffer = static_cast<cl::Buffer *>(buffer);
    delete cl_buffer;
@@ -71,6 +74,7 @@ void OpenCLAllocator::Delete(void *buffer) {
 }
 void OpenCLAllocator::DeleteImage(void *buffer) {
+  VLOG(3) << "Free OpenCL image";
  if (buffer != nullptr) {
    cl::Image2D *cl_image = static_cast<cl::Image2D *>(buffer);
    delete cl_image;

--- a/mace/core/runtime/opencl/opencl_wrapper.cc
+++ b/mace/core/runtime/opencl/opencl_wrapper.cc
@@ -337,13 +337,13 @@ OpenCLLibraryImpl *openclLibraryImpl = nullptr;
 }  // namespace
 void LoadOpenCLLibrary() {
-  if (openclLibraryImpl == nullptr) {
+  MACE_CHECK(openclLibraryImpl == nullptr);
-    openclLibraryImpl = new OpenCLLibraryImpl();
+  openclLibraryImpl = new OpenCLLibraryImpl();
-    MACE_CHECK(openclLibraryImpl->Load());
+  MACE_CHECK(openclLibraryImpl->Load());
-  }
 }
 void UnloadOpenCLLibrary() {
+  MACE_CHECK_NOTNULL(openclLibraryImpl);
  openclLibraryImpl->Unload();
  delete openclLibraryImpl;
  openclLibraryImpl = nullptr;

--- a/mace/examples/mace_run.cc
+++ b/mace/examples/mace_run.cc
@@ -12,13 +12,14 @@
 *          --output_file=mace.out  \
 *          --device=NEON
 */
+#include <malloc.h>
+#include <cstdlib>
 #include <fstream>
-#include <numeric>
 #include <iostream>
-#include <cstdlib>
+#include <numeric>
 #include "mace/utils/command_line_flags.h"
-#include "mace/utils/logging.h"
 #include "mace/utils/env_time.h"
+#include "mace/utils/logging.h"
 #include "mace/core/public/mace.h"
 #include "mace/core/public/version.h"
@@ -44,7 +45,7 @@ void ParseShape(const string &str, vector<int64_t> *shape) {
 }
 DeviceType ParseDeviceType(const string &device_str) {
-  if(device_str.compare("CPU") == 0) {
+  if (device_str.compare("CPU") == 0) {
    return DeviceType::CPU;
  } else if (device_str.compare("NEON") == 0) {
    return DeviceType::NEON;
@@ -55,6 +56,53 @@ DeviceType ParseDeviceType(const string &device_str) {
  }
 }
+struct mallinfo LogMallinfoChange(struct mallinfo prev) {
+  struct mallinfo curr = mallinfo();
+  if (prev.arena != curr.arena) {
+    LOG(INFO) << "Non-mmapped space allocated (bytes): " << curr.arena
+              << ", diff: " << ((int64_t)curr.arena - (int64_t)prev.arena);
+  }
+  if (prev.ordblks != curr.ordblks) {
+    LOG(INFO) << "Number of free chunks: " << curr.ordblks
+              << ", diff: " << ((int64_t)curr.ordblks - (int64_t)prev.ordblks);
+  }
+  if (prev.smblks != curr.smblks) {
+    LOG(INFO) << "Number of free fastbin blocks: " << curr.smblks
+              << ", diff: " << ((int64_t)curr.smblks - (int64_t)prev.smblks);
+  }
+  if (prev.hblks != curr.hblks) {
+    LOG(INFO) << "Number of mmapped regions: " << curr.hblks
+              << ", diff: " << ((int64_t)curr.hblks - (int64_t)prev.hblks);
+  }
+  if (prev.hblkhd != curr.hblkhd) {
+    LOG(INFO) << "Space allocated in mmapped regions (bytes): " << curr.hblkhd
+              << ", diff: " << ((int64_t)curr.hblkhd - (int64_t)prev.hblkhd);
+  }
+  if (prev.usmblks != curr.usmblks) {
+    LOG(INFO) << "Maximum total allocated space (bytes): " << curr.usmblks
+              << ", diff: " << ((int64_t)curr.usmblks - (int64_t)prev.usmblks);
+  }
+  if (prev.fsmblks != curr.fsmblks) {
+    LOG(INFO) << "Space in freed fastbin blocks (bytes): " << curr.fsmblks
+              << ", diff: " << ((int64_t)curr.fsmblks - (int64_t)prev.fsmblks);
+  }
+  if (prev.uordblks != curr.uordblks) {
+    LOG(INFO) << "Total allocated space (bytes): " << curr.uordblks
+              << ", diff: "
+              << ((int64_t)curr.uordblks - (int64_t)prev.uordblks);
+  }
+  if (prev.fordblks != curr.fordblks) {
+    LOG(INFO) << "Total free space (bytes): " << curr.fordblks << ", diff: "
+              << ((int64_t)curr.fordblks - (int64_t)prev.fordblks);
+  }
+  if (prev.keepcost != curr.keepcost) {
+    LOG(INFO) << "Top-most, releasable space (bytes): " << curr.keepcost
+              << ", diff: "
+              << ((int64_t)curr.keepcost - (int64_t)prev.keepcost);
+  }
+  return curr;
+}
 int main(int argc, char **argv) {
  string model_file;
  string input_node;
@@ -64,6 +112,7 @@ int main(int argc, char **argv) {
  string output_file;
  string device;
  int round = 1;
+  int malloc_check_cycle = -1;
  std::vector<Flag> flag_list = {
      Flag("model", &model_file, "model file name"),
@@ -74,6 +123,8 @@ int main(int argc, char **argv) {
      Flag("output_file", &output_file, "output file name"),
      Flag("device", &device, "CPU/NEON"),
      Flag("round", &round, "round"),
+      Flag("malloc_check_cycle", &malloc_check_cycle,
+           "malloc debug check cycle, -1 to disable"),
  };
  string usage = Flags::Usage(argv[0], flag_list);
@@ -107,7 +158,8 @@ int main(int argc, char **argv) {
  DeviceType device_type = ParseDeviceType(device);
  VLOG(1) << "Device Type" << device_type;
-  int64_t input_size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int64_t>());
+  int64_t input_size = std::accumulate(shape.begin(), shape.end(), 1,
+                                       std::multiplies<int64_t>());
  std::unique_ptr<float[]> input_data(new float[input_size]);
  // load input
@@ -136,8 +188,13 @@ int main(int argc, char **argv) {
  if (round > 0) {
    VLOG(0) << "Run model";
    t0 = utils::NowMicros();
+    struct mallinfo prev = mallinfo();
    for (int i = 0; i < round; ++i) {
      engine.Run(input_data.get(), shape, output_shape);
+      if (malloc_check_cycle >= 1 && i % malloc_check_cycle == 0) {
+        LOG(INFO) << "=== check malloc info change #" << i << " ===";
+        prev = LogMallinfoChange(prev);
+      }
    }
    t1 = utils::NowMicros();
    LOG(INFO) << "Avg duration: " << (t1 - t0) / round << " us";
@@ -146,9 +203,10 @@ int main(int argc, char **argv) {
  const float *output = engine.Run(input_data.get(), shape, output_shape);
  if (output != nullptr) {
    ofstream out_file(output_file, ios::binary);
-    int64_t output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies<int64_t>());
+    int64_t output_size =
-    out_file.write((const char *) (output),
+        std::accumulate(output_shape.begin(), output_shape.end(), 1,
-                   output_size * sizeof(float));
+                        std::multiplies<int64_t>());
+    out_file.write((const char *)(output), output_size * sizeof(float));
    out_file.flush();
    out_file.close();
    stringstream ss;

--- a/tools/validate_gcn.sh
+++ b/tools/validate_gcn.sh
@@ -10,6 +10,7 @@ if [ $# -lt 2 ];then
  exit -1
 fi
+VLOG_LEVEL=0
 TF_MODEL_FILE_PATH=$1
 MODEL_DIR=$(dirname ${TF_MODEL_FILE_PATH})
 MACE_SOURCE_DIR=`/bin/pwd`
@@ -60,7 +61,7 @@ build_and_run()
  fi
  adb </dev/null shell MACE_TUNING=${tuning_flag} \
-    MACE_CPP_MIN_VLOG_LEVEL=0 \
+    MACE_CPP_MIN_VLOG_LEVEL=$VLOG_LEVEL \
    MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
    MACE_KERNEL_PATH=$KERNEL_DIR \
    ${PHONE_DATA_DIR}/mace_run \
@@ -81,7 +82,7 @@ python tools/validate.py --generate_data true --random_seed 1 \
 echo "Step 2: Convert tf model to mace model and optimize memory"
 bazel build //mace/python/tools:tf_converter
-rm -rf ${CODEGEN_DIR}/models
+rm -rf ${MODEL_CODEGEN_DIR}
 mkdir -p ${MODEL_CODEGEN_DIR}
 bazel-bin/mace/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \
                                         --output=${MODEL_CODEGEN_DIR}/mace_gcn${IMAGE_SIZE}.cc \