diff --git a/.gitignore b/.gitignore index 7543585c33046943ce017c07a07882bdbd989968..e24bb3325979c9e6151ae090b853f733d2151dea 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,8 @@ tags .idea/ cmake-build-debug/ *.pyc +mace/codegen/models/ +mace/codegen/opencl/ +mace/codegen/opencl_bin/ +mace/codegen/tuning/ +mace/codegen/version/ diff --git a/mace/core/allocator.h b/mace/core/allocator.h index d20c5cef7eb1ad76f24a98d71106277db8ed4324..36ef202935037af7f0b2dc46e7bae8d8c4aa9efd 100644 --- a/mace/core/allocator.h +++ b/mace/core/allocator.h @@ -53,6 +53,7 @@ class CPUAllocator : public Allocator { public: ~CPUAllocator() override {} void *New(size_t nbytes) override { + VLOG(3) << "Allocate CPU buffer: " << nbytes; void *data = nullptr; #ifdef __ANDROID__ data = memalign(kMaceAlignment, nbytes); @@ -67,11 +68,18 @@ class CPUAllocator : public Allocator { void *NewImage(const std::vector &shape, const DataType dt) override { + LOG(FATAL) << "Allocate CPU image"; return nullptr; } - void Delete(void *data) override { free(data); } - void DeleteImage(void *data) override { free(data); }; + void Delete(void *data) override { + VLOG(3) << "Free CPU buffer"; + free(data); + } + void DeleteImage(void *data) override { + LOG(FATAL) << "Free CPU image"; + free(data); + }; void *Map(void *buffer, size_t nbytes) override { return buffer; } void *MapImage(void *buffer, const std::vector &image_shape, diff --git a/mace/core/runtime/opencl/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc index 9c8b5ceee552a0a2d8df5bf62eb6608fdd44c47d..280b84d38659605c29ee71c6f479747bd506abac 100644 --- a/mace/core/runtime/opencl/opencl_allocator.cc +++ b/mace/core/runtime/opencl/opencl_allocator.cc @@ -36,6 +36,7 @@ OpenCLAllocator::OpenCLAllocator() {} OpenCLAllocator::~OpenCLAllocator() {} void *OpenCLAllocator::New(size_t nbytes) { + VLOG(3) << "Allocate OpenCL buffer: " << nbytes; cl_int error; cl::Buffer *buffer = new cl::Buffer(OpenCLRuntime::Global()->context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, @@ -48,6 +49,7 @@ void *OpenCLAllocator::New(size_t nbytes) { void *OpenCLAllocator::NewImage(const std::vector &image_shape, const DataType dt) { MACE_CHECK(image_shape.size() == 2) << "Image shape's size must equal 2"; + VLOG(3) << "Allocate OpenCL image: " << image_shape[0] << ", " << image_shape[1]; cl::ImageFormat img_format(CL_RGBA, DataTypeToCLChannelType(dt)); @@ -64,6 +66,7 @@ void *OpenCLAllocator::NewImage(const std::vector &image_shape, } void OpenCLAllocator::Delete(void *buffer) { + VLOG(3) << "Free OpenCL buffer"; if (buffer != nullptr) { cl::Buffer *cl_buffer = static_cast(buffer); delete cl_buffer; @@ -71,6 +74,7 @@ void OpenCLAllocator::Delete(void *buffer) { } void OpenCLAllocator::DeleteImage(void *buffer) { + VLOG(3) << "Free OpenCL image"; if (buffer != nullptr) { cl::Image2D *cl_image = static_cast(buffer); delete cl_image; diff --git a/mace/core/runtime/opencl/opencl_wrapper.cc b/mace/core/runtime/opencl/opencl_wrapper.cc index 34d8da3156934b48d481fbe2b67a4cb8b4764fbc..cb64f77991fd14bd33281889a6660481e2f9a1b3 100644 --- a/mace/core/runtime/opencl/opencl_wrapper.cc +++ b/mace/core/runtime/opencl/opencl_wrapper.cc @@ -337,13 +337,13 @@ OpenCLLibraryImpl *openclLibraryImpl = nullptr; } // namespace void LoadOpenCLLibrary() { - if (openclLibraryImpl == nullptr) { - openclLibraryImpl = new OpenCLLibraryImpl(); - MACE_CHECK(openclLibraryImpl->Load()); - } + MACE_CHECK(openclLibraryImpl == nullptr); + openclLibraryImpl = new OpenCLLibraryImpl(); + MACE_CHECK(openclLibraryImpl->Load()); } void UnloadOpenCLLibrary() { + MACE_CHECK_NOTNULL(openclLibraryImpl); openclLibraryImpl->Unload(); delete openclLibraryImpl; openclLibraryImpl = nullptr; diff --git a/mace/examples/mace_run.cc b/mace/examples/mace_run.cc index 7a02b8efca692a67ef1167f1e2032500a8a83732..3de72de9289a419b98f9122c0f196f2fdb101b75 100644 --- a/mace/examples/mace_run.cc +++ b/mace/examples/mace_run.cc @@ -12,13 +12,14 @@ * --output_file=mace.out \ * --device=NEON */ +#include +#include #include -#include #include -#include +#include #include "mace/utils/command_line_flags.h" -#include "mace/utils/logging.h" #include "mace/utils/env_time.h" +#include "mace/utils/logging.h" #include "mace/core/public/mace.h" #include "mace/core/public/version.h" @@ -44,7 +45,7 @@ void ParseShape(const string &str, vector *shape) { } DeviceType ParseDeviceType(const string &device_str) { - if(device_str.compare("CPU") == 0) { + if (device_str.compare("CPU") == 0) { return DeviceType::CPU; } else if (device_str.compare("NEON") == 0) { return DeviceType::NEON; @@ -55,6 +56,53 @@ DeviceType ParseDeviceType(const string &device_str) { } } +struct mallinfo LogMallinfoChange(struct mallinfo prev) { + struct mallinfo curr = mallinfo(); + if (prev.arena != curr.arena) { + LOG(INFO) << "Non-mmapped space allocated (bytes): " << curr.arena + << ", diff: " << ((int64_t)curr.arena - (int64_t)prev.arena); + } + if (prev.ordblks != curr.ordblks) { + LOG(INFO) << "Number of free chunks: " << curr.ordblks + << ", diff: " << ((int64_t)curr.ordblks - (int64_t)prev.ordblks); + } + if (prev.smblks != curr.smblks) { + LOG(INFO) << "Number of free fastbin blocks: " << curr.smblks + << ", diff: " << ((int64_t)curr.smblks - (int64_t)prev.smblks); + } + if (prev.hblks != curr.hblks) { + LOG(INFO) << "Number of mmapped regions: " << curr.hblks + << ", diff: " << ((int64_t)curr.hblks - (int64_t)prev.hblks); + } + if (prev.hblkhd != curr.hblkhd) { + LOG(INFO) << "Space allocated in mmapped regions (bytes): " << curr.hblkhd + << ", diff: " << ((int64_t)curr.hblkhd - (int64_t)prev.hblkhd); + } + if (prev.usmblks != curr.usmblks) { + LOG(INFO) << "Maximum total allocated space (bytes): " << curr.usmblks + << ", diff: " << ((int64_t)curr.usmblks - (int64_t)prev.usmblks); + } + if (prev.fsmblks != curr.fsmblks) { + LOG(INFO) << "Space in freed fastbin blocks (bytes): " << curr.fsmblks + << ", diff: " << ((int64_t)curr.fsmblks - (int64_t)prev.fsmblks); + } + if (prev.uordblks != curr.uordblks) { + LOG(INFO) << "Total allocated space (bytes): " << curr.uordblks + << ", diff: " + << ((int64_t)curr.uordblks - (int64_t)prev.uordblks); + } + if (prev.fordblks != curr.fordblks) { + LOG(INFO) << "Total free space (bytes): " << curr.fordblks << ", diff: " + << ((int64_t)curr.fordblks - (int64_t)prev.fordblks); + } + if (prev.keepcost != curr.keepcost) { + LOG(INFO) << "Top-most, releasable space (bytes): " << curr.keepcost + << ", diff: " + << ((int64_t)curr.keepcost - (int64_t)prev.keepcost); + } + return curr; +} + int main(int argc, char **argv) { string model_file; string input_node; @@ -64,6 +112,7 @@ int main(int argc, char **argv) { string output_file; string device; int round = 1; + int malloc_check_cycle = -1; std::vector flag_list = { Flag("model", &model_file, "model file name"), @@ -74,6 +123,8 @@ int main(int argc, char **argv) { Flag("output_file", &output_file, "output file name"), Flag("device", &device, "CPU/NEON"), Flag("round", &round, "round"), + Flag("malloc_check_cycle", &malloc_check_cycle, + "malloc debug check cycle, -1 to disable"), }; string usage = Flags::Usage(argv[0], flag_list); @@ -107,7 +158,8 @@ int main(int argc, char **argv) { DeviceType device_type = ParseDeviceType(device); VLOG(1) << "Device Type" << device_type; - int64_t input_size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); + int64_t input_size = std::accumulate(shape.begin(), shape.end(), 1, + std::multiplies()); std::unique_ptr input_data(new float[input_size]); // load input @@ -136,8 +188,13 @@ int main(int argc, char **argv) { if (round > 0) { VLOG(0) << "Run model"; t0 = utils::NowMicros(); + struct mallinfo prev = mallinfo(); for (int i = 0; i < round; ++i) { engine.Run(input_data.get(), shape, output_shape); + if (malloc_check_cycle >= 1 && i % malloc_check_cycle == 0) { + LOG(INFO) << "=== check malloc info change #" << i << " ==="; + prev = LogMallinfoChange(prev); + } } t1 = utils::NowMicros(); LOG(INFO) << "Avg duration: " << (t1 - t0) / round << " us"; @@ -146,9 +203,10 @@ int main(int argc, char **argv) { const float *output = engine.Run(input_data.get(), shape, output_shape); if (output != nullptr) { ofstream out_file(output_file, ios::binary); - int64_t output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); - out_file.write((const char *) (output), - output_size * sizeof(float)); + int64_t output_size = + std::accumulate(output_shape.begin(), output_shape.end(), 1, + std::multiplies()); + out_file.write((const char *)(output), output_size * sizeof(float)); out_file.flush(); out_file.close(); stringstream ss; diff --git a/tools/validate_gcn.sh b/tools/validate_gcn.sh index d31d098626ad6d6531660daaa824e514147b7b30..645df8b586e255394900dbb6f6b5fa2b9e6f7675 100644 --- a/tools/validate_gcn.sh +++ b/tools/validate_gcn.sh @@ -10,6 +10,7 @@ if [ $# -lt 2 ];then exit -1 fi +VLOG_LEVEL=0 TF_MODEL_FILE_PATH=$1 MODEL_DIR=$(dirname ${TF_MODEL_FILE_PATH}) MACE_SOURCE_DIR=`/bin/pwd` @@ -60,7 +61,7 @@ build_and_run() fi adb