diff --git a/CMakeLists.txt b/CMakeLists.txt index 27bc208bae35f2f10b4331512c3668993adf1a80..b54878b365463d4daae8b6e52e81dd58d7d89817 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,7 @@ option(LOG_PROFILE "log profile" ON) option(CPU "armv7 with neon" ON) option(MALI_GPU "mali gpu" OFF) option(FPGA "fpga" OFF) +option(QUANTI "quantification" OFF) file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm) file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h) @@ -152,3 +153,7 @@ if(DEBUGING) endif() endif() +if (QUANTI) + add_subdirectory(tools/quantification) +endif () + diff --git a/src/framework/program/program.h b/src/framework/program/program.h index 5760efc826667d805695118b12e41efa0305553b..e500d500344d83204bf388401541259b90ea2f78 100644 --- a/src/framework/program/program.h +++ b/src/framework/program/program.h @@ -30,6 +30,7 @@ class Program { std::string model_path; std::string para_path; bool combined = false; + bool quantification = false; private: }; diff --git a/src/io/executor.cpp b/src/io/executor.cpp index 480f48290cc1bbf4888832d76187a13a4915ec40..65f019d1e3c3f6f6bdb8a18a9ff99bb7ecb2012c 100644 --- a/src/io/executor.cpp +++ b/src/io/executor.cpp @@ -154,7 +154,7 @@ void Executor::LoadMemory(const framework::VarDesc var_desc, tensor->Resize(framework::make_ddim(desc.Dims())); - void *memory = tensor; + void *memory = nullptr; int type_size = 0; switch (desc.DataType()) { case framework::VARTYPE_TYPE_FP16: @@ -179,11 +179,25 @@ void Executor::LoadMemory(const framework::VarDesc var_desc, default: break; } - - for (int n = 0; n < memory_size * type_size; ++n) { - static_cast(memory)[n] = (*data)[n]; + if (program_.quantification) { + float min_value; + float max_value; + + memcpy(&min_value, *data, sizeof(float)); + memcpy(&max_value, *data + sizeof(float), sizeof(float)); + *data += 2 * sizeof(float); + const float factor = (max_value - min_value) / 255.0; + uint8_t *uint8_data = (uint8_t *)(*data); + for (int k = 0; k < memory_size; ++k) { + static_cast(memory)[k] = uint8_data[k] * factor + min_value; + } + *data += (memory_size * sizeof(uint8_t)); + } else { + for (int n = 0; n < memory_size * type_size; ++n) { + static_cast(memory)[n] = (*data)[n]; + } + (*data) += (sizeof(char) * memory_size * type_size); } - (*data) += (sizeof(char) * memory_size * type_size); } template diff --git a/src/io/loader.cpp b/src/io/loader.cpp index 51e007a6ab4bce415628649a40f711903bceee92..9ed877d05d51dfbe7139ea2289fdb6480c62f88f 100644 --- a/src/io/loader.cpp +++ b/src/io/loader.cpp @@ -44,26 +44,29 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) { template const framework::Program Loader::Load( - const std::string &dirname, bool optimize, bool can_add_split) { - auto program = - this->LoadProgram(dirname + "/__model__", optimize, can_add_split); + const std::string &dirname, bool optimize, bool quantification, + bool can_add_split) { + auto program = this->LoadProgram(dirname + "/__model__", optimize, + quantification, can_add_split); program.model_path = dirname; return program; } template const framework::Program Loader::Load( - const std::string &model_path, const std::string ¶_path, - bool optimize) { + const std::string &model_path, const std::string ¶_path, bool optimize, + bool quantification) { auto program = this->LoadProgram(model_path, optimize); program.para_path = para_path; program.combined = true; + program.quantification = quantification; return program; } template const framework::Program Loader::LoadProgram( - const std::string &model_path, bool optimize, bool can_add_split) { + const std::string &model_path, bool optimize, bool quantification, + bool can_add_split) { std::string model_filename = model_path; PaddleMobile__Framework__Proto__ProgramDesc *c_program; uint8_t *buf = NULL; @@ -82,6 +85,7 @@ const framework::Program Loader::LoadProgram( framework::Program program; program.originProgram = originProgramDesc; + program.quantification = quantification; auto scope = std::make_shared(); program.scope = scope; diff --git a/src/io/loader.h b/src/io/loader.h index 5e3c53dc9db858f506a13d2105339038340344a6..512cee831f0a09f8223c07c531eb9d1c74e75d92 100644 --- a/src/io/loader.h +++ b/src/io/loader.h @@ -30,6 +30,7 @@ class Loader { * */ const framework::Program Load(const std::string &dirname, bool optimize = false, + bool quantification = false, bool can_add_split = false); /* @@ -38,11 +39,13 @@ class Loader { * */ const framework::Program Load(const std::string &model_path, const std::string ¶_path, - bool optimize = false); + bool optimize = false, + bool quantification = false); private: const framework::Program LoadProgram(const std::string &model_path, bool optimize = false, + bool quantification = false, bool can_add_split = false); }; diff --git a/src/io/paddle_mobile.cpp b/src/io/paddle_mobile.cpp index cabdd799a0e7d561d8bc56c0913f1389c38f8907..5e2e209d64aa7a00b56a5bdbbff88cb3097b7b94 100644 --- a/src/io/paddle_mobile.cpp +++ b/src/io/paddle_mobile.cpp @@ -26,7 +26,7 @@ void PaddleMobile::SetThreadNum(int num) { template bool PaddleMobile::Load(const std::string &dirname, bool optimize, - int batch_size) { + bool quantification, int batch_size) { if (loader_.get() == nullptr) { loader_ = std::make_shared>(); } else { @@ -35,7 +35,7 @@ bool PaddleMobile::Load(const std::string &dirname, bool optimize, if (executor_.get() == nullptr) { executor_ = std::make_shared>( - loader_->Load(dirname, optimize), batch_size, optimize); + loader_->Load(dirname, optimize, quantification), batch_size, optimize); } else { LOG(kLOG_INFO) << "executor inited"; } @@ -46,7 +46,7 @@ bool PaddleMobile::Load(const std::string &dirname, bool optimize, template bool PaddleMobile::Load(const std::string &model_path, const std::string ¶_path, bool optimize, - int batch_size) { + bool quantification, int batch_size) { if (loader_.get() == nullptr) { loader_ = std::make_shared>(); } else { @@ -55,7 +55,8 @@ bool PaddleMobile::Load(const std::string &model_path, if (executor_.get() == nullptr) { executor_ = std::make_shared>( - loader_->Load(model_path, para_path, optimize), batch_size, optimize); + loader_->Load(model_path, para_path, optimize, quantification), + batch_size, optimize); } else { LOG(kLOG_INFO) << "executor inited"; } diff --git a/src/io/paddle_mobile.h b/src/io/paddle_mobile.h index 0c4dfa9782458b03c7d5c4660e15e89937142dc7..5dc3ccb21dd7e67fbe9b5032d01046b12728dc64 100644 --- a/src/io/paddle_mobile.h +++ b/src/io/paddle_mobile.h @@ -39,15 +39,15 @@ class PaddleMobile { * @b 加载分开形式的 fluid 模型 * */ bool Load(const std::string &dirname, bool optimize = false, - int batch_size = 1); + bool quantification = false, int batch_size = 1); /* * @b load combine format fluid mode * @b 加载结合在一起格式的模型 * */ bool Load(const std::string &model_path, const std::string ¶_path, - bool optimize = false, int batch_size = 1); - + bool optimize = false, bool quantification = false, + int batch_size = 1); /* * @b 设置线程数, 当 cmake 中开启 openmp 时生效 * */ diff --git a/tools/quantification/CMakeLists.txt b/tools/quantification/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..1dfb9ee056a4126f65c2ab6fac4c1417039f66ec --- /dev/null +++ b/tools/quantification/CMakeLists.txt @@ -0,0 +1,5 @@ +set(dir ${CMAKE_CURRENT_SOURCE_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${dir}/build") + +ADD_EXECUTABLE(convert convert.cpp) +target_link_libraries(convert paddle-mobile) \ No newline at end of file diff --git a/tools/quantification/convert.cpp b/tools/quantification/convert.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7a9511a654f3de9ac9ace5d3b9621c360bd86ad9 --- /dev/null +++ b/tools/quantification/convert.cpp @@ -0,0 +1,202 @@ + + +#include "io/paddle_mobile.h" +#include +using std::string; + +static const std::string g_googlenet_combine = "../models/googlenet_combine"; +static const std::string g_googlenet = "../models/googlenet"; +using paddle_mobile::Executor; +using paddle_mobile::framework::Program; + + char *Get_binary_data(std::string filename) { + FILE *file = fopen(filename.c_str(), "rb"); + PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ", + filename.c_str()); + fseek(file, 0, SEEK_END); + int64_t size = ftell(file); + PADDLE_MOBILE_ENFORCE(size > 0, "size is too small"); + rewind(file); + char *data = new char[size]; + size_t bytes_read = fread(data, 1, size, file); + PADDLE_MOBILE_ENFORCE(bytes_read == size, + "read binary file bytes do not match with fseek"); + DLOG << "Get_binary_data end"; + fclose(file); + return data; + } + + void LoadWithDump(const paddle_mobile::framework::VarDesc var_desc, + paddle_mobile::framework::LoDTensor *tensor, char **data, FILE *out_file) { + // 1. version + uint32_t version = *reinterpret_cast(*data); + // write version + fwrite(&version, sizeof(uint32_t), 1, out_file ); + (*data) += sizeof(uint32_t); + // 2 Lod information + uint64_t *lod_level_ptr = new uint64_t(); + memcpy(lod_level_ptr, (*data), sizeof(uint64_t)); + uint64_t lod_level = 0; + // write lod Information + fwrite(&lod_level, sizeof(uint64_t), 1, out_file); + delete lod_level_ptr; + (*data) += sizeof(uint64_t); + auto &lod = *tensor->mutable_lod(); + lod.resize(lod_level); + for (uint64_t i = 0; i < lod_level; ++i) { + uint64_t size = *reinterpret_cast(*data); + // write lod size + fwrite(&size, sizeof(uint64_t), 1, out_file); + (*data) += sizeof(uint64_t); + std::vector tmp(size / sizeof(size_t)); + for (int k = 0; k < tmp.size(); ++k) { + tmp[k] = *reinterpret_cast(*data); + (*data) += sizeof(size_t); + } + // write lod size vector + fwrite(&tmp, sizeof(size_t), tmp.size(), out_file ); + + lod[i] = tmp; + } + + // 3. tensor version + uint32_t tensor_version = *reinterpret_cast(*data); + // write tensor version + fwrite(&tensor_version, sizeof(uint32_t), 1, out_file); + (*data) += sizeof(uint32_t); + + // 4. tensor desc + int32_t size = *reinterpret_cast(*data); + // write tensor desc + fwrite(&size, sizeof(int32_t), 1, out_file); + (*data) += sizeof(int32_t); + + std::unique_ptr buf(new char[size]); + for (int m = 0; m < size; ++m) { + buf.get()[m] = (*data)[m]; + } + fwrite(buf.get(), sizeof(char), size, out_file); + (*data) += (sizeof(char) * size); + + const paddle_mobile::framework::TensorDesc &desc = var_desc.Tensor_desc(); + int memory_size = 1; + for (auto l : desc.Dims()) { + memory_size *= l; + } + tensor->Resize(paddle_mobile::framework::make_ddim(desc.Dims())); + + void *memory = tensor; + int type_size = 0; + switch (desc.DataType()) { + case paddle_mobile::framework::VARTYPE_TYPE_FP16: + type_size = 2; + break; + case paddle_mobile::framework::VARTYPE_TYPE_FP32: + type_size = 4; + memory = tensor->mutable_data(); + break; + case paddle_mobile::framework::VARTYPE_TYPE_FP64: + type_size = 8; + break; + case paddle_mobile::framework::VARTYPE_TYPE_INT32: + type_size = 4; + break; + case paddle_mobile::framework::VARTYPE_TYPE_INT64: + type_size = 8; + break; + case paddle_mobile::framework::VARTYPE_TYPE_BOOL: + type_size = 1; + break; + default: + break; + } + for (int n = 0; n < memory_size * type_size; ++n) { + static_cast(memory)[n] = (*data)[n]; + } + (*data) += (sizeof(char) * memory_size * type_size); + // for float 32 + float min_value = std::numeric_limits::max(); + float max_value = std::numeric_limits::min(); + for (int k = 0; k < memory_size; ++k) { + min_value = std::min(min_value, static_cast (memory)[k]); + max_value = std::max(max_value, static_cast (memory)[k]); + } + fwrite(&min_value, sizeof(float), 1, out_file); + fwrite(&max_value, sizeof(float), 1, out_file); + for (int g = 0; g < memory_size; ++g) { + float value = static_cast (memory)[g]; + uint8_t factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255); + fwrite(&factor, sizeof(uint8_t), 1, out_file); + } + + + } + + void quantificate_combined(std::string model_path, std::string param_path, std::string param_min_path){ + paddle_mobile::Loader loader; + bool optimize = true; + auto program = loader.Load(model_path, param_path, optimize); + char *origin_data = Get_binary_data(program.para_path); + char *data = origin_data; + FILE *out_file = fopen(param_min_path.c_str(), "wb"); + for (const auto &block : program.originProgram->Blocks()) { + for (const auto &var_desc : block->Vars()) { + auto var = program.scope->Var(var_desc->Name()); + if(var_desc ->Persistable()) { + auto tensor = var->template GetMutable(); + if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { + continue; + } + LoadWithDump(*var_desc, tensor, &data,out_file); + } + } + } + fclose(out_file); + delete origin_data; + + } + void quantificate_seperated(std::string model_dir, std::string param_min_path) { + paddle_mobile::Loader loader; + bool optimize = true; + auto program = loader.Load(model_dir, optimize); + std::string shell_command = "mkdir "+param_min_path; + system(shell_command.c_str()); + for (const auto &block : program.originProgram->Blocks()) { + for (const auto &var_desc : block->Vars()) { + auto var = program.scope->Var(var_desc->Name()); + if(var_desc ->Persistable()) { + auto tensor = var->template GetMutable(); + if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { + continue; + } + std::string file_name = param_min_path +"/"+ var_desc->Name(); + + FILE *out_file = fopen(file_name.c_str(), "wb"); + char *origin_data = + Get_binary_data(program.model_path + "/" + var_desc->Name()); + char *data = origin_data; + LoadWithDump(*var_desc, tensor, &data,out_file); + delete origin_data; + fclose(out_file); + } + } + } + + } + int main() { + std::string filename = "params_min"; + std::string model_path = g_googlenet_combine + "/model"; + std::string param_path = g_googlenet_combine + "/params"; + std::string dirname = "param_min_dir"; + std::string model_dir = g_googlenet; +// quantificate_combined(model_path, param_path,filename); + quantificate_seperated(model_dir, dirname); + + return 0; + } + + + + + +