From 7915815b97f96500cf084090bc10b9e0439c76bc Mon Sep 17 00:00:00 2001 From: wangliu Date: Fri, 20 Jul 2018 11:23:10 +0800 Subject: [PATCH] add quantification tool to compress binary size --- CMakeLists.txt | 5 + src/framework/program/program.h | 1 + src/io/executor.cpp | 25 +++- src/io/loader.cpp | 10 +- src/io/loader.h | 5 +- src/io/paddle_mobile.cpp | 8 +- src/io/paddle_mobile.h | 4 +- test/net/test_googlenet.cpp | 2 +- tools/quantification/CMakeLists.txt | 5 + tools/quantification/convert.cpp | 202 ++++++++++++++++++++++++++++ 10 files changed, 250 insertions(+), 17 deletions(-) create mode 100644 tools/quantification/CMakeLists.txt create mode 100644 tools/quantification/convert.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index aa547e6d85..e0b495b96f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,7 @@ option(LOG_PROFILE "log profile" ON) option(CPU "armv7 with neon" ON) option(MALI_GPU "mali gpu" OFF) option(FPGA "fpga" OFF) +option(QUANTI "quantification" OFF) file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm) file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h) @@ -153,3 +154,7 @@ if(DEBUGING) endif() endif() +if (QUANTI) + add_subdirectory(tools/quantification) +endif () + diff --git a/src/framework/program/program.h b/src/framework/program/program.h index 5760efc826..e500d50034 100644 --- a/src/framework/program/program.h +++ b/src/framework/program/program.h @@ -30,6 +30,7 @@ class Program { std::string model_path; std::string para_path; bool combined = false; + bool quantification = false; private: }; diff --git a/src/io/executor.cpp b/src/io/executor.cpp index 480f48290c..08eb787bcb 100644 --- a/src/io/executor.cpp +++ b/src/io/executor.cpp @@ -154,7 +154,7 @@ void Executor::LoadMemory(const framework::VarDesc var_desc, tensor->Resize(framework::make_ddim(desc.Dims())); - void *memory = tensor; + void *memory = nullptr; int type_size = 0; switch (desc.DataType()) { case framework::VARTYPE_TYPE_FP16: @@ -179,11 +179,26 @@ void Executor::LoadMemory(const framework::VarDesc var_desc, default: break; } - - for (int n = 0; n < memory_size * type_size; ++n) { - static_cast(memory)[n] = (*data)[n]; + if (program_.quantification) { + float min_value; + float max_value; + + memcpy(&min_value, *data, sizeof(float)); + memcpy(&max_value, *data + sizeof(float) , sizeof(float)); + *data += 2 * sizeof(float); + const float factor = (max_value - min_value) / 255.0; + uint8_t *uint8_data = (uint8_t *) (*data); + for (int k = 0; k < memory_size; ++k) { + static_cast(memory)[k] = uint8_data[k] * factor + min_value; + } + *data += (memory_size * sizeof(uint8_t)); + } else { + for (int n = 0; n < memory_size * type_size; ++n) { + static_cast(memory)[n] = (*data)[n]; + } + (*data) += (sizeof(char) * memory_size * type_size); } - (*data) += (sizeof(char) * memory_size * type_size); + } template diff --git a/src/io/loader.cpp b/src/io/loader.cpp index 51e007a6ab..c9c8c59749 100644 --- a/src/io/loader.cpp +++ b/src/io/loader.cpp @@ -44,9 +44,9 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) { template const framework::Program Loader::Load( - const std::string &dirname, bool optimize, bool can_add_split) { + const std::string &dirname, bool optimize,bool quantification, bool can_add_split) { auto program = - this->LoadProgram(dirname + "/__model__", optimize, can_add_split); + this->LoadProgram(dirname + "/__model__", optimize,quantification, can_add_split); program.model_path = dirname; return program; } @@ -54,16 +54,17 @@ const framework::Program Loader::Load( template const framework::Program Loader::Load( const std::string &model_path, const std::string ¶_path, - bool optimize) { + bool optimize, bool quantification) { auto program = this->LoadProgram(model_path, optimize); program.para_path = para_path; program.combined = true; + program.quantification = quantification; return program; } template const framework::Program Loader::LoadProgram( - const std::string &model_path, bool optimize, bool can_add_split) { + const std::string &model_path, bool optimize, bool quantification, bool can_add_split) { std::string model_filename = model_path; PaddleMobile__Framework__Proto__ProgramDesc *c_program; uint8_t *buf = NULL; @@ -82,6 +83,7 @@ const framework::Program Loader::LoadProgram( framework::Program program; program.originProgram = originProgramDesc; + program.quantification = quantification; auto scope = std::make_shared(); program.scope = scope; diff --git a/src/io/loader.h b/src/io/loader.h index 5e3c53dc9d..512cee831f 100644 --- a/src/io/loader.h +++ b/src/io/loader.h @@ -30,6 +30,7 @@ class Loader { * */ const framework::Program Load(const std::string &dirname, bool optimize = false, + bool quantification = false, bool can_add_split = false); /* @@ -38,11 +39,13 @@ class Loader { * */ const framework::Program Load(const std::string &model_path, const std::string ¶_path, - bool optimize = false); + bool optimize = false, + bool quantification = false); private: const framework::Program LoadProgram(const std::string &model_path, bool optimize = false, + bool quantification = false, bool can_add_split = false); }; diff --git a/src/io/paddle_mobile.cpp b/src/io/paddle_mobile.cpp index cabdd799a0..e455f9acd2 100644 --- a/src/io/paddle_mobile.cpp +++ b/src/io/paddle_mobile.cpp @@ -25,7 +25,7 @@ void PaddleMobile::SetThreadNum(int num) { }; template -bool PaddleMobile::Load(const std::string &dirname, bool optimize, +bool PaddleMobile::Load(const std::string &dirname, bool optimize, bool quantification, int batch_size) { if (loader_.get() == nullptr) { loader_ = std::make_shared>(); @@ -35,7 +35,7 @@ bool PaddleMobile::Load(const std::string &dirname, bool optimize, if (executor_.get() == nullptr) { executor_ = std::make_shared>( - loader_->Load(dirname, optimize), batch_size, optimize); + loader_->Load(dirname, optimize,quantification), batch_size, optimize); } else { LOG(kLOG_INFO) << "executor inited"; } @@ -45,7 +45,7 @@ bool PaddleMobile::Load(const std::string &dirname, bool optimize, template bool PaddleMobile::Load(const std::string &model_path, - const std::string ¶_path, bool optimize, + const std::string ¶_path, bool optimize, bool quantification, int batch_size) { if (loader_.get() == nullptr) { loader_ = std::make_shared>(); @@ -55,7 +55,7 @@ bool PaddleMobile::Load(const std::string &model_path, if (executor_.get() == nullptr) { executor_ = std::make_shared>( - loader_->Load(model_path, para_path, optimize), batch_size, optimize); + loader_->Load(model_path, para_path, optimize, quantification), batch_size, optimize); } else { LOG(kLOG_INFO) << "executor inited"; } diff --git a/src/io/paddle_mobile.h b/src/io/paddle_mobile.h index 74c1147156..b035bc5204 100644 --- a/src/io/paddle_mobile.h +++ b/src/io/paddle_mobile.h @@ -38,7 +38,7 @@ class PaddleMobile { * @b load separate format fluid model * @b 加载分开形式的 fluid 模型 * */ - bool Load(const std::string &dirname, bool optimize = false, + bool Load(const std::string &dirname, bool optimize = false, bool quantification = false, int batch_size = 1); /* @@ -46,7 +46,7 @@ class PaddleMobile { * @b 加载结合在一起格式的模型 * */ bool Load(const std::string &model_path, const std::string ¶_path, - bool optimize = false, int batch_size = 1); + bool optimize = false,bool quantification = false, int batch_size = 1); void SetThreadNum(int num); /* diff --git a/test/net/test_googlenet.cpp b/test/net/test_googlenet.cpp index d230b94692..76ae028ac8 100644 --- a/test/net/test_googlenet.cpp +++ b/test/net/test_googlenet.cpp @@ -21,7 +21,7 @@ int main() { paddle_mobile.SetThreadNum(4); bool optimize = true; auto time1 = time(); - if (paddle_mobile.Load(g_googlenet, optimize)) { + if (paddle_mobile.Load(g_googlenet, optimize, true)) { auto time2 = time(); DLOG << "load cost: " << time_diff(time1, time1) << "ms"; std::vector input; diff --git a/tools/quantification/CMakeLists.txt b/tools/quantification/CMakeLists.txt new file mode 100644 index 0000000000..1dfb9ee056 --- /dev/null +++ b/tools/quantification/CMakeLists.txt @@ -0,0 +1,5 @@ +set(dir ${CMAKE_CURRENT_SOURCE_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${dir}/build") + +ADD_EXECUTABLE(convert convert.cpp) +target_link_libraries(convert paddle-mobile) \ No newline at end of file diff --git a/tools/quantification/convert.cpp b/tools/quantification/convert.cpp new file mode 100644 index 0000000000..7a9511a654 --- /dev/null +++ b/tools/quantification/convert.cpp @@ -0,0 +1,202 @@ + + +#include "io/paddle_mobile.h" +#include +using std::string; + +static const std::string g_googlenet_combine = "../models/googlenet_combine"; +static const std::string g_googlenet = "../models/googlenet"; +using paddle_mobile::Executor; +using paddle_mobile::framework::Program; + + char *Get_binary_data(std::string filename) { + FILE *file = fopen(filename.c_str(), "rb"); + PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ", + filename.c_str()); + fseek(file, 0, SEEK_END); + int64_t size = ftell(file); + PADDLE_MOBILE_ENFORCE(size > 0, "size is too small"); + rewind(file); + char *data = new char[size]; + size_t bytes_read = fread(data, 1, size, file); + PADDLE_MOBILE_ENFORCE(bytes_read == size, + "read binary file bytes do not match with fseek"); + DLOG << "Get_binary_data end"; + fclose(file); + return data; + } + + void LoadWithDump(const paddle_mobile::framework::VarDesc var_desc, + paddle_mobile::framework::LoDTensor *tensor, char **data, FILE *out_file) { + // 1. version + uint32_t version = *reinterpret_cast(*data); + // write version + fwrite(&version, sizeof(uint32_t), 1, out_file ); + (*data) += sizeof(uint32_t); + // 2 Lod information + uint64_t *lod_level_ptr = new uint64_t(); + memcpy(lod_level_ptr, (*data), sizeof(uint64_t)); + uint64_t lod_level = 0; + // write lod Information + fwrite(&lod_level, sizeof(uint64_t), 1, out_file); + delete lod_level_ptr; + (*data) += sizeof(uint64_t); + auto &lod = *tensor->mutable_lod(); + lod.resize(lod_level); + for (uint64_t i = 0; i < lod_level; ++i) { + uint64_t size = *reinterpret_cast(*data); + // write lod size + fwrite(&size, sizeof(uint64_t), 1, out_file); + (*data) += sizeof(uint64_t); + std::vector tmp(size / sizeof(size_t)); + for (int k = 0; k < tmp.size(); ++k) { + tmp[k] = *reinterpret_cast(*data); + (*data) += sizeof(size_t); + } + // write lod size vector + fwrite(&tmp, sizeof(size_t), tmp.size(), out_file ); + + lod[i] = tmp; + } + + // 3. tensor version + uint32_t tensor_version = *reinterpret_cast(*data); + // write tensor version + fwrite(&tensor_version, sizeof(uint32_t), 1, out_file); + (*data) += sizeof(uint32_t); + + // 4. tensor desc + int32_t size = *reinterpret_cast(*data); + // write tensor desc + fwrite(&size, sizeof(int32_t), 1, out_file); + (*data) += sizeof(int32_t); + + std::unique_ptr buf(new char[size]); + for (int m = 0; m < size; ++m) { + buf.get()[m] = (*data)[m]; + } + fwrite(buf.get(), sizeof(char), size, out_file); + (*data) += (sizeof(char) * size); + + const paddle_mobile::framework::TensorDesc &desc = var_desc.Tensor_desc(); + int memory_size = 1; + for (auto l : desc.Dims()) { + memory_size *= l; + } + tensor->Resize(paddle_mobile::framework::make_ddim(desc.Dims())); + + void *memory = tensor; + int type_size = 0; + switch (desc.DataType()) { + case paddle_mobile::framework::VARTYPE_TYPE_FP16: + type_size = 2; + break; + case paddle_mobile::framework::VARTYPE_TYPE_FP32: + type_size = 4; + memory = tensor->mutable_data(); + break; + case paddle_mobile::framework::VARTYPE_TYPE_FP64: + type_size = 8; + break; + case paddle_mobile::framework::VARTYPE_TYPE_INT32: + type_size = 4; + break; + case paddle_mobile::framework::VARTYPE_TYPE_INT64: + type_size = 8; + break; + case paddle_mobile::framework::VARTYPE_TYPE_BOOL: + type_size = 1; + break; + default: + break; + } + for (int n = 0; n < memory_size * type_size; ++n) { + static_cast(memory)[n] = (*data)[n]; + } + (*data) += (sizeof(char) * memory_size * type_size); + // for float 32 + float min_value = std::numeric_limits::max(); + float max_value = std::numeric_limits::min(); + for (int k = 0; k < memory_size; ++k) { + min_value = std::min(min_value, static_cast (memory)[k]); + max_value = std::max(max_value, static_cast (memory)[k]); + } + fwrite(&min_value, sizeof(float), 1, out_file); + fwrite(&max_value, sizeof(float), 1, out_file); + for (int g = 0; g < memory_size; ++g) { + float value = static_cast (memory)[g]; + uint8_t factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255); + fwrite(&factor, sizeof(uint8_t), 1, out_file); + } + + + } + + void quantificate_combined(std::string model_path, std::string param_path, std::string param_min_path){ + paddle_mobile::Loader loader; + bool optimize = true; + auto program = loader.Load(model_path, param_path, optimize); + char *origin_data = Get_binary_data(program.para_path); + char *data = origin_data; + FILE *out_file = fopen(param_min_path.c_str(), "wb"); + for (const auto &block : program.originProgram->Blocks()) { + for (const auto &var_desc : block->Vars()) { + auto var = program.scope->Var(var_desc->Name()); + if(var_desc ->Persistable()) { + auto tensor = var->template GetMutable(); + if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { + continue; + } + LoadWithDump(*var_desc, tensor, &data,out_file); + } + } + } + fclose(out_file); + delete origin_data; + + } + void quantificate_seperated(std::string model_dir, std::string param_min_path) { + paddle_mobile::Loader loader; + bool optimize = true; + auto program = loader.Load(model_dir, optimize); + std::string shell_command = "mkdir "+param_min_path; + system(shell_command.c_str()); + for (const auto &block : program.originProgram->Blocks()) { + for (const auto &var_desc : block->Vars()) { + auto var = program.scope->Var(var_desc->Name()); + if(var_desc ->Persistable()) { + auto tensor = var->template GetMutable(); + if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { + continue; + } + std::string file_name = param_min_path +"/"+ var_desc->Name(); + + FILE *out_file = fopen(file_name.c_str(), "wb"); + char *origin_data = + Get_binary_data(program.model_path + "/" + var_desc->Name()); + char *data = origin_data; + LoadWithDump(*var_desc, tensor, &data,out_file); + delete origin_data; + fclose(out_file); + } + } + } + + } + int main() { + std::string filename = "params_min"; + std::string model_path = g_googlenet_combine + "/model"; + std::string param_path = g_googlenet_combine + "/params"; + std::string dirname = "param_min_dir"; + std::string model_dir = g_googlenet; +// quantificate_combined(model_path, param_path,filename); + quantificate_seperated(model_dir, dirname); + + return 0; + } + + + + + + -- GitLab