提交 ea6193aa 编写于 作者: Y yejianwu

merge with master

......@@ -3,3 +3,8 @@ tags
.idea/
cmake-build-debug/
*.pyc
mace/codegen/models/
mace/codegen/opencl/
mace/codegen/opencl_bin/
mace/codegen/tuning/
mace/codegen/version/
......@@ -53,6 +53,7 @@ class CPUAllocator : public Allocator {
public:
~CPUAllocator() override {}
void *New(size_t nbytes) override {
VLOG(3) << "Allocate CPU buffer: " << nbytes;
void *data = nullptr;
#ifdef __ANDROID__
data = memalign(kMaceAlignment, nbytes);
......@@ -67,11 +68,18 @@ class CPUAllocator : public Allocator {
void *NewImage(const std::vector<size_t> &shape,
const DataType dt) override {
LOG(FATAL) << "Allocate CPU image";
return nullptr;
}
void Delete(void *data) override { free(data); }
void DeleteImage(void *data) override { free(data); };
void Delete(void *data) override {
VLOG(3) << "Free CPU buffer";
free(data);
}
void DeleteImage(void *data) override {
LOG(FATAL) << "Free CPU image";
free(data);
};
void *Map(void *buffer, size_t nbytes) override { return buffer; }
void *MapImage(void *buffer,
const std::vector<size_t> &image_shape,
......
......@@ -36,6 +36,7 @@ OpenCLAllocator::OpenCLAllocator() {}
OpenCLAllocator::~OpenCLAllocator() {}
void *OpenCLAllocator::New(size_t nbytes) {
VLOG(3) << "Allocate OpenCL buffer: " << nbytes;
cl_int error;
cl::Buffer *buffer = new cl::Buffer(OpenCLRuntime::Global()->context(),
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
......@@ -48,6 +49,7 @@ void *OpenCLAllocator::New(size_t nbytes) {
void *OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
const DataType dt) {
MACE_CHECK(image_shape.size() == 2) << "Image shape's size must equal 2";
VLOG(3) << "Allocate OpenCL image: " << image_shape[0] << ", " << image_shape[1];
cl::ImageFormat img_format(CL_RGBA, DataTypeToCLChannelType(dt));
......@@ -64,6 +66,7 @@ void *OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
}
void OpenCLAllocator::Delete(void *buffer) {
VLOG(3) << "Free OpenCL buffer";
if (buffer != nullptr) {
cl::Buffer *cl_buffer = static_cast<cl::Buffer *>(buffer);
delete cl_buffer;
......@@ -71,6 +74,7 @@ void OpenCLAllocator::Delete(void *buffer) {
}
void OpenCLAllocator::DeleteImage(void *buffer) {
VLOG(3) << "Free OpenCL image";
if (buffer != nullptr) {
cl::Image2D *cl_image = static_cast<cl::Image2D *>(buffer);
delete cl_image;
......
......@@ -337,13 +337,13 @@ OpenCLLibraryImpl *openclLibraryImpl = nullptr;
} // namespace
void LoadOpenCLLibrary() {
if (openclLibraryImpl == nullptr) {
openclLibraryImpl = new OpenCLLibraryImpl();
MACE_CHECK(openclLibraryImpl->Load());
}
MACE_CHECK(openclLibraryImpl == nullptr);
openclLibraryImpl = new OpenCLLibraryImpl();
MACE_CHECK(openclLibraryImpl->Load());
}
void UnloadOpenCLLibrary() {
MACE_CHECK_NOTNULL(openclLibraryImpl);
openclLibraryImpl->Unload();
delete openclLibraryImpl;
openclLibraryImpl = nullptr;
......
......@@ -13,13 +13,14 @@
* --output_file=mace.out \
* --device=NEON
*/
#include <malloc.h>
#include <cstdlib>
#include <fstream>
#include <numeric>
#include <iostream>
#include <cstdlib>
#include <numeric>
#include "mace/utils/command_line_flags.h"
#include "mace/utils/logging.h"
#include "mace/utils/env_time.h"
#include "mace/utils/logging.h"
#include "mace/core/public/mace.h"
#include "mace/core/public/version.h"
......@@ -45,7 +46,7 @@ void ParseShape(const string &str, vector<int64_t> *shape) {
}
DeviceType ParseDeviceType(const string &device_str) {
if(device_str.compare("CPU") == 0) {
if (device_str.compare("CPU") == 0) {
return DeviceType::CPU;
} else if (device_str.compare("NEON") == 0) {
return DeviceType::NEON;
......@@ -56,6 +57,53 @@ DeviceType ParseDeviceType(const string &device_str) {
}
}
struct mallinfo LogMallinfoChange(struct mallinfo prev) {
struct mallinfo curr = mallinfo();
if (prev.arena != curr.arena) {
LOG(INFO) << "Non-mmapped space allocated (bytes): " << curr.arena
<< ", diff: " << ((int64_t)curr.arena - (int64_t)prev.arena);
}
if (prev.ordblks != curr.ordblks) {
LOG(INFO) << "Number of free chunks: " << curr.ordblks
<< ", diff: " << ((int64_t)curr.ordblks - (int64_t)prev.ordblks);
}
if (prev.smblks != curr.smblks) {
LOG(INFO) << "Number of free fastbin blocks: " << curr.smblks
<< ", diff: " << ((int64_t)curr.smblks - (int64_t)prev.smblks);
}
if (prev.hblks != curr.hblks) {
LOG(INFO) << "Number of mmapped regions: " << curr.hblks
<< ", diff: " << ((int64_t)curr.hblks - (int64_t)prev.hblks);
}
if (prev.hblkhd != curr.hblkhd) {
LOG(INFO) << "Space allocated in mmapped regions (bytes): " << curr.hblkhd
<< ", diff: " << ((int64_t)curr.hblkhd - (int64_t)prev.hblkhd);
}
if (prev.usmblks != curr.usmblks) {
LOG(INFO) << "Maximum total allocated space (bytes): " << curr.usmblks
<< ", diff: " << ((int64_t)curr.usmblks - (int64_t)prev.usmblks);
}
if (prev.fsmblks != curr.fsmblks) {
LOG(INFO) << "Space in freed fastbin blocks (bytes): " << curr.fsmblks
<< ", diff: " << ((int64_t)curr.fsmblks - (int64_t)prev.fsmblks);
}
if (prev.uordblks != curr.uordblks) {
LOG(INFO) << "Total allocated space (bytes): " << curr.uordblks
<< ", diff: "
<< ((int64_t)curr.uordblks - (int64_t)prev.uordblks);
}
if (prev.fordblks != curr.fordblks) {
LOG(INFO) << "Total free space (bytes): " << curr.fordblks << ", diff: "
<< ((int64_t)curr.fordblks - (int64_t)prev.fordblks);
}
if (prev.keepcost != curr.keepcost) {
LOG(INFO) << "Top-most, releasable space (bytes): " << curr.keepcost
<< ", diff: "
<< ((int64_t)curr.keepcost - (int64_t)prev.keepcost);
}
return curr;
}
int main(int argc, char **argv) {
string model_file;
string input_node;
......@@ -66,6 +114,7 @@ int main(int argc, char **argv) {
string output_file;
string device;
int round = 1;
int malloc_check_cycle = -1;
std::vector<Flag> flag_list = {
Flag("model", &model_file, "model file name"),
......@@ -77,6 +126,8 @@ int main(int argc, char **argv) {
Flag("output_file", &output_file, "output file name"),
Flag("device", &device, "CPU/NEON"),
Flag("round", &round, "round"),
Flag("malloc_check_cycle", &malloc_check_cycle,
"malloc debug check cycle, -1 to disable"),
};
string usage = Flags::Usage(argv[0], flag_list);
......@@ -145,8 +196,13 @@ int main(int argc, char **argv) {
if (round > 0) {
VLOG(0) << "Run model";
t0 = utils::NowMicros();
struct mallinfo prev = mallinfo();
for (int i = 0; i < round; ++i) {
engine.Run(input_data.get(), input_shape_vec, output_data.get());
if (malloc_check_cycle >= 1 && i % malloc_check_cycle == 0) {
LOG(INFO) << "=== check malloc info change #" << i << " ===";
prev = LogMallinfoChange(prev);
}
}
t1 = utils::NowMicros();
LOG(INFO) << "Avg duration: " << (t1 - t0) / round << " us";
......
......@@ -10,6 +10,7 @@ if [ $# -lt 2 ];then
exit -1
fi
VLOG_LEVEL=0
TF_MODEL_FILE_PATH=$1
MODEL_DIR=$(dirname ${TF_MODEL_FILE_PATH})
MACE_SOURCE_DIR=`/bin/pwd`
......@@ -60,7 +61,7 @@ build_and_run()
fi
adb </dev/null shell MACE_TUNING=${tuning_flag} \
MACE_CPP_MIN_VLOG_LEVEL=0 \
MACE_CPP_MIN_VLOG_LEVEL=$VLOG_LEVEL \
MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
MACE_KERNEL_PATH=$KERNEL_DIR \
${PHONE_DATA_DIR}/mace_run \
......@@ -82,7 +83,7 @@ python tools/validate.py --generate_data true --random_seed 1 \
echo "Step 2: Convert tf model to mace model and optimize memory"
bazel build //mace/python/tools:tf_converter
rm -rf ${CODEGEN_DIR}/models
rm -rf ${MODEL_CODEGEN_DIR}
mkdir -p ${MODEL_CODEGEN_DIR}
bazel-bin/mace/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \
--output=${MODEL_CODEGEN_DIR}/mace_gcn${IMAGE_SIZE}.cc \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册