提交 44544afc 编写于 作者: L Liangliang He

Add memory allocation logging and checking

上级 ee725558
...@@ -3,3 +3,8 @@ tags ...@@ -3,3 +3,8 @@ tags
.idea/ .idea/
cmake-build-debug/ cmake-build-debug/
*.pyc *.pyc
mace/codegen/models/
mace/codegen/opencl/
mace/codegen/opencl_bin/
mace/codegen/tuning/
mace/codegen/version/
...@@ -53,6 +53,7 @@ class CPUAllocator : public Allocator { ...@@ -53,6 +53,7 @@ class CPUAllocator : public Allocator {
public: public:
~CPUAllocator() override {} ~CPUAllocator() override {}
void *New(size_t nbytes) override { void *New(size_t nbytes) override {
VLOG(3) << "Allocate CPU buffer: " << nbytes;
void *data = nullptr; void *data = nullptr;
#ifdef __ANDROID__ #ifdef __ANDROID__
data = memalign(kMaceAlignment, nbytes); data = memalign(kMaceAlignment, nbytes);
...@@ -67,11 +68,18 @@ class CPUAllocator : public Allocator { ...@@ -67,11 +68,18 @@ class CPUAllocator : public Allocator {
void *NewImage(const std::vector<size_t> &shape, void *NewImage(const std::vector<size_t> &shape,
const DataType dt) override { const DataType dt) override {
LOG(FATAL) << "Allocate CPU image";
return nullptr; return nullptr;
} }
void Delete(void *data) override { free(data); } void Delete(void *data) override {
void DeleteImage(void *data) override { free(data); }; VLOG(3) << "Free CPU buffer";
free(data);
}
void DeleteImage(void *data) override {
LOG(FATAL) << "Free CPU image";
free(data);
};
void *Map(void *buffer, size_t nbytes) override { return buffer; } void *Map(void *buffer, size_t nbytes) override { return buffer; }
void *MapImage(void *buffer, void *MapImage(void *buffer,
const std::vector<size_t> &image_shape, const std::vector<size_t> &image_shape,
......
...@@ -36,6 +36,7 @@ OpenCLAllocator::OpenCLAllocator() {} ...@@ -36,6 +36,7 @@ OpenCLAllocator::OpenCLAllocator() {}
OpenCLAllocator::~OpenCLAllocator() {} OpenCLAllocator::~OpenCLAllocator() {}
void *OpenCLAllocator::New(size_t nbytes) { void *OpenCLAllocator::New(size_t nbytes) {
VLOG(3) << "Allocate OpenCL buffer: " << nbytes;
cl_int error; cl_int error;
cl::Buffer *buffer = new cl::Buffer(OpenCLRuntime::Global()->context(), cl::Buffer *buffer = new cl::Buffer(OpenCLRuntime::Global()->context(),
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
...@@ -48,6 +49,7 @@ void *OpenCLAllocator::New(size_t nbytes) { ...@@ -48,6 +49,7 @@ void *OpenCLAllocator::New(size_t nbytes) {
void *OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape, void *OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
const DataType dt) { const DataType dt) {
MACE_CHECK(image_shape.size() == 2) << "Image shape's size must equal 2"; MACE_CHECK(image_shape.size() == 2) << "Image shape's size must equal 2";
VLOG(3) << "Allocate OpenCL image: " << image_shape[0] << ", " << image_shape[1];
cl::ImageFormat img_format(CL_RGBA, DataTypeToCLChannelType(dt)); cl::ImageFormat img_format(CL_RGBA, DataTypeToCLChannelType(dt));
...@@ -64,6 +66,7 @@ void *OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape, ...@@ -64,6 +66,7 @@ void *OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
} }
void OpenCLAllocator::Delete(void *buffer) { void OpenCLAllocator::Delete(void *buffer) {
VLOG(3) << "Free OpenCL buffer";
if (buffer != nullptr) { if (buffer != nullptr) {
cl::Buffer *cl_buffer = static_cast<cl::Buffer *>(buffer); cl::Buffer *cl_buffer = static_cast<cl::Buffer *>(buffer);
delete cl_buffer; delete cl_buffer;
...@@ -71,6 +74,7 @@ void OpenCLAllocator::Delete(void *buffer) { ...@@ -71,6 +74,7 @@ void OpenCLAllocator::Delete(void *buffer) {
} }
void OpenCLAllocator::DeleteImage(void *buffer) { void OpenCLAllocator::DeleteImage(void *buffer) {
VLOG(3) << "Free OpenCL image";
if (buffer != nullptr) { if (buffer != nullptr) {
cl::Image2D *cl_image = static_cast<cl::Image2D *>(buffer); cl::Image2D *cl_image = static_cast<cl::Image2D *>(buffer);
delete cl_image; delete cl_image;
......
...@@ -337,13 +337,13 @@ OpenCLLibraryImpl *openclLibraryImpl = nullptr; ...@@ -337,13 +337,13 @@ OpenCLLibraryImpl *openclLibraryImpl = nullptr;
} // namespace } // namespace
void LoadOpenCLLibrary() { void LoadOpenCLLibrary() {
if (openclLibraryImpl == nullptr) { MACE_CHECK(openclLibraryImpl == nullptr);
openclLibraryImpl = new OpenCLLibraryImpl(); openclLibraryImpl = new OpenCLLibraryImpl();
MACE_CHECK(openclLibraryImpl->Load()); MACE_CHECK(openclLibraryImpl->Load());
}
} }
void UnloadOpenCLLibrary() { void UnloadOpenCLLibrary() {
MACE_CHECK_NOTNULL(openclLibraryImpl);
openclLibraryImpl->Unload(); openclLibraryImpl->Unload();
delete openclLibraryImpl; delete openclLibraryImpl;
openclLibraryImpl = nullptr; openclLibraryImpl = nullptr;
......
...@@ -12,13 +12,14 @@ ...@@ -12,13 +12,14 @@
* --output_file=mace.out \ * --output_file=mace.out \
* --device=NEON * --device=NEON
*/ */
#include <malloc.h>
#include <cstdlib>
#include <fstream> #include <fstream>
#include <numeric>
#include <iostream> #include <iostream>
#include <cstdlib> #include <numeric>
#include "mace/utils/command_line_flags.h" #include "mace/utils/command_line_flags.h"
#include "mace/utils/logging.h"
#include "mace/utils/env_time.h" #include "mace/utils/env_time.h"
#include "mace/utils/logging.h"
#include "mace/core/public/mace.h" #include "mace/core/public/mace.h"
#include "mace/core/public/version.h" #include "mace/core/public/version.h"
...@@ -44,7 +45,7 @@ void ParseShape(const string &str, vector<int64_t> *shape) { ...@@ -44,7 +45,7 @@ void ParseShape(const string &str, vector<int64_t> *shape) {
} }
DeviceType ParseDeviceType(const string &device_str) { DeviceType ParseDeviceType(const string &device_str) {
if(device_str.compare("CPU") == 0) { if (device_str.compare("CPU") == 0) {
return DeviceType::CPU; return DeviceType::CPU;
} else if (device_str.compare("NEON") == 0) { } else if (device_str.compare("NEON") == 0) {
return DeviceType::NEON; return DeviceType::NEON;
...@@ -55,6 +56,53 @@ DeviceType ParseDeviceType(const string &device_str) { ...@@ -55,6 +56,53 @@ DeviceType ParseDeviceType(const string &device_str) {
} }
} }
struct mallinfo LogMallinfoChange(struct mallinfo prev) {
struct mallinfo curr = mallinfo();
if (prev.arena != curr.arena) {
LOG(INFO) << "Non-mmapped space allocated (bytes): " << curr.arena
<< ", diff: " << ((int64_t)curr.arena - (int64_t)prev.arena);
}
if (prev.ordblks != curr.ordblks) {
LOG(INFO) << "Number of free chunks: " << curr.ordblks
<< ", diff: " << ((int64_t)curr.ordblks - (int64_t)prev.ordblks);
}
if (prev.smblks != curr.smblks) {
LOG(INFO) << "Number of free fastbin blocks: " << curr.smblks
<< ", diff: " << ((int64_t)curr.smblks - (int64_t)prev.smblks);
}
if (prev.hblks != curr.hblks) {
LOG(INFO) << "Number of mmapped regions: " << curr.hblks
<< ", diff: " << ((int64_t)curr.hblks - (int64_t)prev.hblks);
}
if (prev.hblkhd != curr.hblkhd) {
LOG(INFO) << "Space allocated in mmapped regions (bytes): " << curr.hblkhd
<< ", diff: " << ((int64_t)curr.hblkhd - (int64_t)prev.hblkhd);
}
if (prev.usmblks != curr.usmblks) {
LOG(INFO) << "Maximum total allocated space (bytes): " << curr.usmblks
<< ", diff: " << ((int64_t)curr.usmblks - (int64_t)prev.usmblks);
}
if (prev.fsmblks != curr.fsmblks) {
LOG(INFO) << "Space in freed fastbin blocks (bytes): " << curr.fsmblks
<< ", diff: " << ((int64_t)curr.fsmblks - (int64_t)prev.fsmblks);
}
if (prev.uordblks != curr.uordblks) {
LOG(INFO) << "Total allocated space (bytes): " << curr.uordblks
<< ", diff: "
<< ((int64_t)curr.uordblks - (int64_t)prev.uordblks);
}
if (prev.fordblks != curr.fordblks) {
LOG(INFO) << "Total free space (bytes): " << curr.fordblks << ", diff: "
<< ((int64_t)curr.fordblks - (int64_t)prev.fordblks);
}
if (prev.keepcost != curr.keepcost) {
LOG(INFO) << "Top-most, releasable space (bytes): " << curr.keepcost
<< ", diff: "
<< ((int64_t)curr.keepcost - (int64_t)prev.keepcost);
}
return curr;
}
int main(int argc, char **argv) { int main(int argc, char **argv) {
string model_file; string model_file;
string input_node; string input_node;
...@@ -64,6 +112,7 @@ int main(int argc, char **argv) { ...@@ -64,6 +112,7 @@ int main(int argc, char **argv) {
string output_file; string output_file;
string device; string device;
int round = 1; int round = 1;
int malloc_check_cycle = -1;
std::vector<Flag> flag_list = { std::vector<Flag> flag_list = {
Flag("model", &model_file, "model file name"), Flag("model", &model_file, "model file name"),
...@@ -74,6 +123,8 @@ int main(int argc, char **argv) { ...@@ -74,6 +123,8 @@ int main(int argc, char **argv) {
Flag("output_file", &output_file, "output file name"), Flag("output_file", &output_file, "output file name"),
Flag("device", &device, "CPU/NEON"), Flag("device", &device, "CPU/NEON"),
Flag("round", &round, "round"), Flag("round", &round, "round"),
Flag("malloc_check_cycle", &malloc_check_cycle,
"malloc debug check cycle, -1 to disable"),
}; };
string usage = Flags::Usage(argv[0], flag_list); string usage = Flags::Usage(argv[0], flag_list);
...@@ -107,7 +158,8 @@ int main(int argc, char **argv) { ...@@ -107,7 +158,8 @@ int main(int argc, char **argv) {
DeviceType device_type = ParseDeviceType(device); DeviceType device_type = ParseDeviceType(device);
VLOG(1) << "Device Type" << device_type; VLOG(1) << "Device Type" << device_type;
int64_t input_size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int64_t>()); int64_t input_size = std::accumulate(shape.begin(), shape.end(), 1,
std::multiplies<int64_t>());
std::unique_ptr<float[]> input_data(new float[input_size]); std::unique_ptr<float[]> input_data(new float[input_size]);
// load input // load input
...@@ -136,8 +188,13 @@ int main(int argc, char **argv) { ...@@ -136,8 +188,13 @@ int main(int argc, char **argv) {
if (round > 0) { if (round > 0) {
VLOG(0) << "Run model"; VLOG(0) << "Run model";
t0 = utils::NowMicros(); t0 = utils::NowMicros();
struct mallinfo prev = mallinfo();
for (int i = 0; i < round; ++i) { for (int i = 0; i < round; ++i) {
engine.Run(input_data.get(), shape, output_shape); engine.Run(input_data.get(), shape, output_shape);
if (malloc_check_cycle >= 1 && i % malloc_check_cycle == 0) {
LOG(INFO) << "=== check malloc info change #" << i << " ===";
prev = LogMallinfoChange(prev);
}
} }
t1 = utils::NowMicros(); t1 = utils::NowMicros();
LOG(INFO) << "Avg duration: " << (t1 - t0) / round << " us"; LOG(INFO) << "Avg duration: " << (t1 - t0) / round << " us";
...@@ -146,9 +203,10 @@ int main(int argc, char **argv) { ...@@ -146,9 +203,10 @@ int main(int argc, char **argv) {
const float *output = engine.Run(input_data.get(), shape, output_shape); const float *output = engine.Run(input_data.get(), shape, output_shape);
if (output != nullptr) { if (output != nullptr) {
ofstream out_file(output_file, ios::binary); ofstream out_file(output_file, ios::binary);
int64_t output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies<int64_t>()); int64_t output_size =
out_file.write((const char *) (output), std::accumulate(output_shape.begin(), output_shape.end(), 1,
output_size * sizeof(float)); std::multiplies<int64_t>());
out_file.write((const char *)(output), output_size * sizeof(float));
out_file.flush(); out_file.flush();
out_file.close(); out_file.close();
stringstream ss; stringstream ss;
......
...@@ -10,6 +10,7 @@ if [ $# -lt 2 ];then ...@@ -10,6 +10,7 @@ if [ $# -lt 2 ];then
exit -1 exit -1
fi fi
VLOG_LEVEL=0
TF_MODEL_FILE_PATH=$1 TF_MODEL_FILE_PATH=$1
MODEL_DIR=$(dirname ${TF_MODEL_FILE_PATH}) MODEL_DIR=$(dirname ${TF_MODEL_FILE_PATH})
MACE_SOURCE_DIR=`/bin/pwd` MACE_SOURCE_DIR=`/bin/pwd`
...@@ -60,7 +61,7 @@ build_and_run() ...@@ -60,7 +61,7 @@ build_and_run()
fi fi
adb </dev/null shell MACE_TUNING=${tuning_flag} \ adb </dev/null shell MACE_TUNING=${tuning_flag} \
MACE_CPP_MIN_VLOG_LEVEL=0 \ MACE_CPP_MIN_VLOG_LEVEL=$VLOG_LEVEL \
MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \ MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
MACE_KERNEL_PATH=$KERNEL_DIR \ MACE_KERNEL_PATH=$KERNEL_DIR \
${PHONE_DATA_DIR}/mace_run \ ${PHONE_DATA_DIR}/mace_run \
...@@ -81,7 +82,7 @@ python tools/validate.py --generate_data true --random_seed 1 \ ...@@ -81,7 +82,7 @@ python tools/validate.py --generate_data true --random_seed 1 \
echo "Step 2: Convert tf model to mace model and optimize memory" echo "Step 2: Convert tf model to mace model and optimize memory"
bazel build //mace/python/tools:tf_converter bazel build //mace/python/tools:tf_converter
rm -rf ${CODEGEN_DIR}/models rm -rf ${MODEL_CODEGEN_DIR}
mkdir -p ${MODEL_CODEGEN_DIR} mkdir -p ${MODEL_CODEGEN_DIR}
bazel-bin/mace/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \ bazel-bin/mace/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \
--output=${MODEL_CODEGEN_DIR}/mace_gcn${IMAGE_SIZE}.cc \ --output=${MODEL_CODEGEN_DIR}/mace_gcn${IMAGE_SIZE}.cc \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册