From 7e9bb98a37cb703b320eff078128a3d82aec9b11 Mon Sep 17 00:00:00 2001 From: Yanzhan Yang Date: Wed, 9 Oct 2019 21:20:40 +0800 Subject: [PATCH] add n-fold quantification algorithm (#2164) * 1. add quantification_fold parameter. 2. support quantification test in run.py. * implement n-fold quantification --- mobile/src/framework/executor.cpp | 84 ++++++++--------- mobile/src/framework/loader.cpp | 25 +++-- mobile/src/framework/loader.h | 19 ++-- mobile/src/framework/program/program.h | 1 + mobile/src/io/paddle_inference_api.h | 1 + mobile/src/io/paddle_mobile.cpp | 27 +++--- mobile/src/io/paddle_mobile.h | 7 +- mobile/test/net/test_net.cpp | 6 +- mobile/test/net/test_op_in_net.cpp | 2 +- mobile/tools/pre-commit.hooks/cpplint.hook | 2 +- mobile/tools/python/fluidtools/run.py | 27 +++++- mobile/tools/quantification/convert.cpp | 105 ++++++++++++--------- 12 files changed, 181 insertions(+), 125 deletions(-) diff --git a/mobile/src/framework/executor.cpp b/mobile/src/framework/executor.cpp index c1ff6ee29b..0d25596af0 100644 --- a/mobile/src/framework/executor.cpp +++ b/mobile/src/framework/executor.cpp @@ -173,24 +173,33 @@ void Executor::InitFeedFetchList() { } template -static void LoadMemInternal(void **data, LoDTensor *tensor, - bool quant_uint8 = false) { - char **data_buf = reinterpret_cast(data); - int64_t size = tensor->numel(); - T *tensor_data = tensor->mutable_data(); +static void LoadMemInternal(void **in_data, void *out_data, int64_t size, + bool quant_uint8 = false, int quant_fold = 1) { + char **data_buf = reinterpret_cast(in_data); + T *tensor_data = reinterpret_cast(out_data); if (quant_uint8) { - // should be moved into operator init function - float min_value; - float max_value; - memory::Copy(&min_value, *data_buf, sizeof(float)); - memory::Copy(&max_value, *data_buf + sizeof(float), sizeof(float)); - *data_buf += 2 * sizeof(float); - const float factor = (max_value - min_value) / 255.0; - const uint8_t *uint8_data = reinterpret_cast(*data_buf); - for (int k = 0; k < size; ++k) { - tensor_data[k] = uint8_data[k] * factor + min_value; + int step = fmax(size / quant_fold, 1); + int visited_fold = 0; + while (visited_fold * step < size) { + // should be moved into operator init function + float min_value; + float max_value; + memory::Copy(&min_value, *data_buf, sizeof(float)); + memory::Copy(&max_value, *data_buf + sizeof(float), sizeof(float)); + *data_buf += 2 * sizeof(float); + const float factor = (max_value - min_value) / 255.0; + const uint8_t *uint8_data = reinterpret_cast(*data_buf); + int k = 0; + for (; k < step; ++k) { + int tensor_data_idx = visited_fold * step + k; + if (tensor_data_idx >= size) { + break; + } + tensor_data[tensor_data_idx] = uint8_data[k] * factor + min_value; + } + *data_buf += k * sizeof(uint8_t); + visited_fold++; } - *data_buf += size * sizeof(uint8_t); } else { memory::Copy(tensor_data, *data_buf, size * sizeof(T)); *data_buf += size * sizeof(T); @@ -235,14 +244,20 @@ void Executor::LoadMemory(void **data, // parse tensor from stream switch (tensor_desc.DataType()) { case VARTYPE_TYPE_FP32: - LoadMemInternal(reinterpret_cast(data_buf), tensor, - program_.quantification); + LoadMemInternal( + reinterpret_cast(data_buf), + reinterpret_cast(tensor->mutable_data()), tensor->numel(), + program_.quantification, program_.quantification_fold); break; case VARTYPE_TYPE_INT8: - LoadMemInternal(reinterpret_cast(data_buf), tensor); + LoadMemInternal( + reinterpret_cast(data_buf), + reinterpret_cast(tensor->mutable_data()), tensor->numel()); break; case VARTYPE_TYPE_INT32: - LoadMemInternal(reinterpret_cast(data_buf), tensor); + LoadMemInternal(reinterpret_cast(data_buf), + reinterpret_cast(tensor->mutable_data()), + tensor->numel()); break; default: LOG(kLOG_ERROR) << "data type is not supported"; @@ -944,31 +959,10 @@ void Executor::LoadMemory(const VarDesc var_desc, void *memory = nullptr; int type_size = 4; memory = tensorInput; - if (program_.quantification) { - float min_value; - float max_value; - - memcpy(&min_value, *data, sizeof(float)); - memcpy(&max_value, *data + sizeof(float), sizeof(float)); - *data += 2 * sizeof(float); - const float factor = (max_value - min_value) / 255.0; - uint8_t *uint8_data = reinterpret_cast(*data); - for (int k = 0; k < memory_size; ++k) { - static_cast(memory)[k] = uint8_data[k] * factor + min_value; - } - *data += (memory_size * sizeof(uint8_t)); - } else { - for (int n = 0; n < memory_size; n++) { - float value; - memcpy(&value, *data + n * type_size, type_size); - if (value < 1e-30 && value > -1e-30) { - static_cast(memory)[n] = 0.0; - } else { - static_cast(memory)[n] = value; - } - } - (*data) += (sizeof(char) * memory_size * type_size); - } + + LoadMemInternal(reinterpret_cast(data), + reinterpret_cast(memory), memory_size, + program_.quantification, program_.quantification_fold); } template <> diff --git a/mobile/src/framework/loader.cpp b/mobile/src/framework/loader.cpp index 4350fda969..34cf6253cb 100644 --- a/mobile/src/framework/loader.cpp +++ b/mobile/src/framework/loader.cpp @@ -87,7 +87,8 @@ void Loader::InitMemoryFromProgram( template <> const Program Loader::LoadCombinedMemory( size_t read_size, const uint8_t *buf, size_t combined_params_len, - uint8_t *combined_params_buf, bool optimize, bool quantification) { + uint8_t *combined_params_buf, bool optimize, bool quantification, + int quantification_fold) { bool can_add_split = false; PaddleMobile__Framework__Proto__ProgramDesc *c_program; @@ -109,6 +110,7 @@ const Program Loader::LoadCombinedMemory( program.quantification = quantification; program.combined_params_len = combined_params_len; program.combined_params_buf = combined_params_buf; + program.quantification_fold = quantification_fold; auto scope = std::make_shared(); program.scope = scope; @@ -187,9 +189,11 @@ template const Program Loader::Load(const std::string &dirname, bool optimize, bool quantification, - bool can_add_split) { - auto program = this->LoadProgram(dirname + "/__model__", optimize, - quantification, can_add_split); + bool can_add_split, + int quantification_fold) { + auto program = + this->LoadProgram(dirname + "/__model__", optimize, quantification, + can_add_split, quantification_fold); program.model_path = dirname; return program; } @@ -198,8 +202,10 @@ template const Program Loader::Load(const std::string &model_path, const std::string ¶_path, bool optimize, - bool quantification) { - auto program = this->LoadProgram(model_path, optimize, quantification); + bool quantification, + int quantification_fold) { + auto program = this->LoadProgram(model_path, optimize, quantification, false, + quantification_fold); program.para_path = para_path; program.combined = true; @@ -210,7 +216,7 @@ const Program Loader::Load(const std::string &model_path, template const Program Loader::LoadProgram( const std::string &model_path, bool optimize, bool quantification, - bool can_add_split) { + bool can_add_split, int quantification_fold) { std::string model_filename = model_path; PaddleMobile__Framework__Proto__ProgramDesc *c_program; uint8_t *buf = NULL; @@ -232,6 +238,7 @@ const Program Loader::LoadProgram( program.quantification = quantification; program.combined_params_len = 0; program.combined_params_buf = nullptr; + program.quantification_fold = quantification_fold; auto scope = std::make_shared(); program.scope = scope; @@ -248,7 +255,8 @@ const Program Loader::LoadProgram( template const Program Loader::LoadCombinedMemory( size_t read_size, const uint8_t *buf, size_t combined_params_len, - uint8_t *combined_params_buf, bool optimize, bool quantification) { + uint8_t *combined_params_buf, bool optimize, bool quantification, + int quantification_fold) { bool can_add_split = false; PaddleMobile__Framework__Proto__ProgramDesc *c_program; @@ -270,6 +278,7 @@ const Program Loader::LoadCombinedMemory( program.quantification = quantification; program.combined_params_len = combined_params_len; program.combined_params_buf = combined_params_buf; + program.quantification_fold = quantification_fold; auto scope = std::make_shared(); program.scope = scope; diff --git a/mobile/src/framework/loader.h b/mobile/src/framework/loader.h index bd4dfa1556..40ded643d5 100644 --- a/mobile/src/framework/loader.h +++ b/mobile/src/framework/loader.h @@ -32,7 +32,8 @@ class Loader { const Program Load(const std::string &dirname, bool optimize = false, bool quantification = false, - bool can_add_split = false); + bool can_add_split = false, + int quantification_fold = 1); /* * @b load combine format fluid mode @@ -41,20 +42,20 @@ class Loader { const Program Load(const std::string &model_path, const std::string ¶_path, bool optimize = false, - bool quantification = false); + bool quantification = false, + int quantification_fold = 1); - const Program LoadCombinedMemory(size_t model_len, - const uint8_t *model_buf, - size_t combined_params_len, - uint8_t *combined_params_buf, - bool optimize = false, - bool quantification = false); + const Program LoadCombinedMemory( + size_t model_len, const uint8_t *model_buf, size_t combined_params_len, + uint8_t *combined_params_buf, bool optimize = false, + bool quantification = false, int quantification_fold = 1); private: const Program LoadProgram(const std::string &model_path, bool optimize = false, bool quantification = false, - bool can_add_split = false); + bool can_add_split = false, + int quantification_fold = 1); void InitMemoryFromProgram( const std::shared_ptr &originProgramDesc, diff --git a/mobile/src/framework/program/program.h b/mobile/src/framework/program/program.h index f05aba8565..b6d1d96279 100644 --- a/mobile/src/framework/program/program.h +++ b/mobile/src/framework/program/program.h @@ -34,6 +34,7 @@ class Program { bool quantification = false; size_t combined_params_len; uint8_t *combined_params_buf; + int quantification_fold = 1; }; } // namespace framework diff --git a/mobile/src/io/paddle_inference_api.h b/mobile/src/io/paddle_inference_api.h index 5c104db41f..dccfd1ceca 100644 --- a/mobile/src/io/paddle_inference_api.h +++ b/mobile/src/io/paddle_inference_api.h @@ -216,6 +216,7 @@ struct PaddleMobileConfig : public PaddlePredictor::Config { int batch_size = 1; bool optimize = true; bool quantification = false; + int quantification_fold = 1; bool lod_mode = false; int thread_num = 1; bool load_when_predict = false; diff --git a/mobile/src/io/paddle_mobile.cpp b/mobile/src/io/paddle_mobile.cpp index 95ae3763a2..be69ce0f63 100644 --- a/mobile/src/io/paddle_mobile.cpp +++ b/mobile/src/io/paddle_mobile.cpp @@ -37,7 +37,8 @@ void PaddleMobile::SetThreadNum(int thread_num, template PMStatus PaddleMobile::Load(const std::string &dirname, bool optimize, bool quantification, - int batch_size, bool lod_mode) { + int batch_size, bool lod_mode, + int quantification_fold) { if (loader_.get() == nullptr) { loader_ = std::make_shared>(); } else { @@ -46,8 +47,9 @@ PMStatus PaddleMobile::Load(const std::string &dirname, if (executor_.get() == nullptr) { executor_ = std::make_shared>( - loader_->Load(dirname, optimize, quantification), config_, batch_size, - optimize, lod_mode); + loader_->Load(dirname, optimize, quantification, false, + quantification_fold), + config_, batch_size, optimize, lod_mode); } else { LOG(kLOG_INFO) << "executor inited"; } @@ -59,7 +61,8 @@ template PMStatus PaddleMobile::Load(const std::string &model_path, const std::string ¶_path, bool optimize, bool quantification, - int batch_size, bool lod_mode) { + int batch_size, bool lod_mode, + int quantification_fold) { if (loader_.get() == nullptr) { loader_ = std::make_shared>(); } else { @@ -69,8 +72,9 @@ PMStatus PaddleMobile::Load(const std::string &model_path, if (executor_.get() == nullptr) { executor_ = std::make_shared>( - loader_->Load(model_path, para_path, optimize, quantification), config_, - batch_size, optimize, lod_mode); + loader_->Load(model_path, para_path, optimize, quantification, + quantification_fold), + config_, batch_size, optimize, lod_mode); } else { LOG(kLOG_INFO) << "executor inited"; } @@ -82,11 +86,12 @@ template PMStatus PaddleMobile::Load(const PaddleMobileConfig &config) { if (!config.model_dir.empty()) { return this->Load(config.model_dir, config.optimize, config.quantification, - config.batch_size, config.lod_mode); + config.batch_size, config.lod_mode, + config.quantification_fold); } else if (!config.prog_file.empty() && !config.param_file.empty()) { return this->Load(config.prog_file, config.param_file, config.optimize, - config.quantification, config.batch_size, - config.lod_mode); + config.quantification, config.batch_size, config.lod_mode, + config.quantification_fold); } else { LOG(kLOG_ERROR) << "Failed to load inference model"; return PMNotInitialized; @@ -97,7 +102,7 @@ template bool PaddleMobile::LoadCombinedMemory( size_t model_len, const uint8_t *model_buf, size_t combined_params_len, uint8_t *combined_params_buf, bool optimize, bool quantification, - int batch_size, bool lod_mode) { + int batch_size, bool lod_mode, int quantification_fold) { if (loader_.get() == nullptr) { loader_ = std::make_shared>(); } else { @@ -107,7 +112,7 @@ bool PaddleMobile::LoadCombinedMemory( executor_ = std::make_shared>( loader_->LoadCombinedMemory(model_len, model_buf, combined_params_len, combined_params_buf, optimize, - quantification), + quantification, quantification_fold), config_, batch_size, optimize, lod_mode); } else { LOG(kLOG_INFO) << "executor inited"; diff --git a/mobile/src/io/paddle_mobile.h b/mobile/src/io/paddle_mobile.h index e39d712447..8b8f0683ab 100644 --- a/mobile/src/io/paddle_mobile.h +++ b/mobile/src/io/paddle_mobile.h @@ -50,10 +50,11 @@ class PaddleMobile { PMStatus Load(const std::string &dirname, const bool optimize = false, const bool quantification = false, const int batch_size = 1, - const bool lod_mode = false); + const bool lod_mode = false, const int quantification_fold = 1); PMStatus Load(const std::string &model_path, const std::string ¶_path, const bool optimize = false, const bool quantification = false, - const int batch_size = 1, const bool lod_mode = false); + const int batch_size = 1, const bool lod_mode = false, + const int quantification_fold = 1); PMStatus Load(const PaddleMobileConfig &config); @@ -84,7 +85,7 @@ class PaddleMobile { size_t combined_params_len, uint8_t *combined_params_buf, bool optimize = false, bool quantification = false, int batch_size = 1, - bool lod_mode = false); + bool lod_mode = false, int quantification_fold = 1); void SetThreadNum(int thread_num, PowerMode power_mode = PERFORMANCE_PRIORITY); diff --git a/mobile/test/net/test_net.cpp b/mobile/test/net/test_net.cpp index a1c234dbca..74a124e46c 100644 --- a/mobile/test/net/test_net.cpp +++ b/mobile/test/net/test_net.cpp @@ -31,6 +31,10 @@ void test(int argc, char *argv[]) { arg_index++; bool enable_memory_optimization = std::stoi(argv[arg_index]) == 1; arg_index++; + bool quantification = std::stoi(argv[arg_index]) == 1; + arg_index++; + int quantification_fold = std::stoi(argv[arg_index]); + arg_index++; paddle_mobile::PaddleMobileConfigInternal config; config.memory_optimization_level = enable_memory_optimization ? MemoryOptimizationWithoutFeeds @@ -98,7 +102,7 @@ void test(int argc, char *argv[]) { auto time1 = time(); if (paddle_mobile.Load("./checked_model/model", "./checked_model/params", - fuse, false, 1, true)) { + fuse, quantification, 1, true, quantification_fold)) { auto time2 = time(); std::cout << "auto-test" << " load-time-cost :" << time_diff(time1, time2) << "ms" diff --git a/mobile/test/net/test_op_in_net.cpp b/mobile/test/net/test_op_in_net.cpp index 4666f4133c..9425c02762 100644 --- a/mobile/test/net/test_op_in_net.cpp +++ b/mobile/test/net/test_op_in_net.cpp @@ -58,7 +58,7 @@ void test(int argc, char *argv[]) { auto time1 = time(); if (paddle_mobile.Load("./checked_model/model", "./checked_model/params", - fuse, false, 1, true)) { + fuse, false, 1, true, 1)) { auto time2 = time(); std::cout << "auto-test" << " load-time-cost :" << time_diff(time1, time2) << "ms" diff --git a/mobile/tools/pre-commit.hooks/cpplint.hook b/mobile/tools/pre-commit.hooks/cpplint.hook index 78ca3cfcdd..3740e64c73 100644 --- a/mobile/tools/pre-commit.hooks/cpplint.hook +++ b/mobile/tools/pre-commit.hooks/cpplint.hook @@ -5,7 +5,7 @@ TOTAL_ERRORS=0 # The trick to remove deleted files: https://stackoverflow.com/a/2413151 for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | \ grep -v ".pb.cpp" | grep -v ".pb.h" | grep -v ".pb-c.h" | grep -v ".pb-c.c" | \ - grep -v "protobuf-c.h" | grep -v "protobuf-c.c"); do + grep -v "protobuf-c.h" | grep -v "protobuf-c.c" | grep -v "^mobile/tools/quantification"); do cpplint $file; TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?); done diff --git a/mobile/tools/python/fluidtools/run.py b/mobile/tools/python/fluidtools/run.py index a77943e2af..6fa5842009 100644 --- a/mobile/tools/python/fluidtools/run.py +++ b/mobile/tools/python/fluidtools/run.py @@ -22,6 +22,8 @@ checked_encrypt_model_path = "checked_encrypt_model" output_var_filter = [] output_key_filter = {} check_shape = False +quantification = False +quantification_fold = 1000 architecture = "arm-v7a" # architecture = "arm-v8a" @@ -107,7 +109,8 @@ def resave_model(feed_kv): for name in p_names: v = fluid.framework._get_var(name, prog) v.persistable = False - fluid.io.save_inference_model(dirname=checked_model_path, feeded_var_names=feeds, target_vars=fetches, executor=exe, main_program=prog, model_filename="model", params_filename="params") + if not quantification: + fluid.io.save_inference_model(dirname=checked_model_path, feeded_var_names=feeds, target_vars=fetches, executor=exe, main_program=prog, model_filename="model", params_filename="params") if has_found_wrong_shape: pp_red("has found wrong shape", 1) else: @@ -392,7 +395,7 @@ for op in ops: pp_tab("op types : {}".format(op_types), 1) def check_mobile_results(args, fuse, mem_opt): - args = "{} {} {}".format("1" if fuse else "0", "1" if mem_opt else "0", args) + args = "{} {} {} {} {}".format("1" if fuse else "0", "1" if mem_opt else "0", "1" if quantification else "0", quantification_fold, args) res = sh("adb shell \"cd {} && export LD_LIBRARY_PATH=. && ./test-net {}\"".format(mobile_exec_root, args)) lines = res.split("\n") # for line in lines: @@ -425,6 +428,26 @@ def check_mobile_results(args, fuse, mem_opt): fetch_names = [] for fetch in fetches: fetch_names.append(fetch.name) + fetch_diff = 0.0 + fetch_count = 0 + for index in op_cache: + op_output_var_name, op = op_cache[index] + if not op_output_var_name in output_var_cache: + continue + if not op_output_var_name in mobile_var_cache: + continue + if op_output_var_name not in fetch_names: + continue + values1 = output_var_cache[op_output_var_name] + values2 = mobile_var_cache[op_output_var_name] + shape = get_var_shape(op_output_var_name) if check_shape else [] + for i in range(len(values1)): + v1 = values1[i] + v2 = values2[len(shape) + i] + fetch_diff += abs(v1 - v2) + fetch_count += 1 + if fetch_count != 0: + pp_yellow("output avg diff : {}".format(fetch_diff / fetch_count), 1) for index in op_cache: op_output_var_name, op = op_cache[index] if mem_opt: diff --git a/mobile/tools/quantification/convert.cpp b/mobile/tools/quantification/convert.cpp index 3473f9a118..22be4ce5b9 100644 --- a/mobile/tools/quantification/convert.cpp +++ b/mobile/tools/quantification/convert.cpp @@ -68,7 +68,7 @@ std::shared_ptr loadParams(const std::string &model_path) { } -void LoadWithDumpForInt8(const paddle_mobile::framework::VarDesc &var_desc, char **dataP, FILE *out_file) { +void LoadWithDumpForInt8(const paddle_mobile::framework::VarDesc &var_desc, char **dataP, FILE *out_file, int quantification_fold) { // 1. version uint32_t version = *reinterpret_cast(*dataP); @@ -162,27 +162,33 @@ void LoadWithDumpForInt8(const paddle_mobile::framework::VarDesc &var_desc, char } *dataP += tensorSize; - // for float 32 - float min_value = std::numeric_limits::max(); - float max_value = std::numeric_limits::min(); + int step = std::max(memory_size / quantification_fold, 1); - for (int k = 0; k < memory_size; ++k) { - min_value = std::min(min_value, static_cast (memory)[k]); - max_value = std::max(max_value, static_cast (memory)[k]); - } + int visited_fold = 0; + while (visited_fold * step < memory_size) { + // for float 32 + float min_value = std::numeric_limits::max(); + float max_value = std::numeric_limits::min(); + + for (int k = visited_fold * step; k < std::min((visited_fold + 1) * step, memory_size); ++k) { + min_value = std::min(min_value, static_cast (memory)[k]); + max_value = std::max(max_value, static_cast (memory)[k]); + } - fwrite(&min_value, sizeof(float), 1, out_file); - fwrite(&max_value, sizeof(float), 1, out_file); + fwrite(&min_value, sizeof(float), 1, out_file); + fwrite(&max_value, sizeof(float), 1, out_file); - for (int g = 0; g < memory_size; ++g) { - float value = static_cast (memory)[g]; - auto factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255); - fwrite(&factor, sizeof(uint8_t), 1, out_file); + for (int g = visited_fold * step; g < std::min((visited_fold + 1) * step, memory_size); ++g) { + float value = static_cast (memory)[g]; + auto factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255); + fwrite(&factor, sizeof(uint8_t), 1, out_file); + } + visited_fold++; } } void -quantificate_combined_int8(const std::string &model_path, const std::string ¶m_path, const std::string ¶m_min_path) { +quantificate_combined_int8(const std::string &model_path, const std::string ¶m_path, const std::string ¶m_min_path, int quantification_fold) { auto program = loadParams(model_path); char *origin_data = Get_binary_data(param_path); char *data = origin_data; @@ -193,7 +199,7 @@ quantificate_combined_int8(const std::string &model_path, const std::string &par if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { continue; } - LoadWithDumpForInt8(*var_desc, &data, out_file); + LoadWithDumpForInt8(*var_desc, &data, out_file, quantification_fold); } } } @@ -201,7 +207,7 @@ quantificate_combined_int8(const std::string &model_path, const std::string &par delete origin_data; } -void quantificate_seperated_int8(const std::string model_dir, const std::string param_min_path) { +void quantificate_seperated_int8(const std::string model_dir, const std::string param_min_path, int quantification_fold) { auto program = loadParams(model_dir + "/__model__"); std::string shell_command = "mkdir " + param_min_path; @@ -217,7 +223,7 @@ void quantificate_seperated_int8(const std::string model_dir, const std::string FILE *out_file = fopen(file_name.c_str(), "wb"); char *origin_data = Get_binary_data(model_dir + "/" + var_desc->Name()); char *data = origin_data; - LoadWithDumpForInt8(*var_desc, &data, out_file); + LoadWithDumpForInt8(*var_desc, &data, out_file, quantification_fold); delete origin_data; fclose(out_file); } @@ -225,7 +231,7 @@ void quantificate_seperated_int8(const std::string model_dir, const std::string } } -void LoadWithDumpForFloat32(const paddle_mobile::framework::VarDesc &var_desc, char **dataP, FILE *out_file) { +void LoadWithDumpForFloat32(const paddle_mobile::framework::VarDesc &var_desc, char **dataP, FILE *out_file, int quantification_fold) { // 1. version uint32_t version = *reinterpret_cast(*dataP); @@ -319,30 +325,36 @@ void LoadWithDumpForFloat32(const paddle_mobile::framework::VarDesc &var_desc, c } *dataP += tensorSize; - // for float 32 - float min_value = std::numeric_limits::max(); - float max_value = std::numeric_limits::min(); + int step = std::max(memory_size / quantification_fold, 1); - for (int k = 0; k < memory_size; ++k) { - min_value = std::min(min_value, static_cast (memory)[k]); - max_value = std::max(max_value, static_cast (memory)[k]); - } + int visited_fold = 0; + while (visited_fold * step < memory_size) { + // for float 32 + float min_value = std::numeric_limits::max(); + float max_value = std::numeric_limits::min(); - float diff = 0.0; - for (int g = 0; g < memory_size; ++g) { - float value = static_cast (memory)[g]; - auto factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255); - float value_quantized = min_value + (factor / 255.0) * (max_value - min_value); - diff += fabs(value - value_quantized); - fwrite(&value_quantized, sizeof(float), 1, out_file); - } - if (memory_size > 0) { - std::cout << "avg diff caused by quantization for var " << var_desc.Name() << " is: " << (diff / memory_size) << std::endl; + for (int k = visited_fold * step; k < std::min((visited_fold + 1) * step, memory_size); ++k) { + min_value = std::min(min_value, static_cast (memory)[k]); + max_value = std::max(max_value, static_cast (memory)[k]); + } + + float diff = 0.0; + for (int g = visited_fold * step; g < std::min((visited_fold + 1) * step, memory_size); ++g) { + float value = static_cast (memory)[g]; + auto factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255); + float value_quantized = min_value + (factor / 255.0) * (max_value - min_value); + diff += fabs(value - value_quantized); + fwrite(&value_quantized, sizeof(float), 1, out_file); + } + if (memory_size > 0) { + std::cout << "avg diff caused by quantization for var " << var_desc.Name() << " is: " << (diff / memory_size) << std::endl; + } + visited_fold++; } } void -quantificate_combined_float32(const std::string &model_path, const std::string ¶m_path, const std::string ¶m_min_path) { +quantificate_combined_float32(const std::string &model_path, const std::string ¶m_path, const std::string ¶m_min_path, int quantification_fold) { auto program = loadParams(model_path); char *origin_data = Get_binary_data(param_path); char *data = origin_data; @@ -353,7 +365,7 @@ quantificate_combined_float32(const std::string &model_path, const std::string & if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { continue; } - LoadWithDumpForFloat32(*var_desc, &data, out_file); + LoadWithDumpForFloat32(*var_desc, &data, out_file, quantification_fold); } } } @@ -361,7 +373,7 @@ quantificate_combined_float32(const std::string &model_path, const std::string & delete origin_data; } -void quantificate_seperated_float32(const std::string model_dir, const std::string param_min_path) { +void quantificate_seperated_float32(const std::string model_dir, const std::string param_min_path, int quantification_fold) { auto program = loadParams(model_dir + "/__model__"); std::string shell_command = "mkdir " + param_min_path; @@ -377,7 +389,7 @@ void quantificate_seperated_float32(const std::string model_dir, const std::stri FILE *out_file = fopen(file_name.c_str(), "wb"); char *origin_data = Get_binary_data(model_dir + "/" + var_desc->Name()); char *data = origin_data; - LoadWithDumpForFloat32(*var_desc, &data, out_file); + LoadWithDumpForFloat32(*var_desc, &data, out_file, quantification_fold); delete origin_data; fclose(out_file); } @@ -402,10 +414,15 @@ int main(int argc, char **argv) { PADDLE_MOBILE_ENFORCE(argc > 3, "we need your output path. %s ", kNoteEg.c_str()); std::string output_path = argv[3]; + int quantification_fold = 1; + if (argc > 4) { + quantification_fold = std::stoi(argv[4]); + } + if (action_type == "0") { // for seperated const std::string &seperated_min_dir = output_path; - quantificate_seperated_int8(base_path, seperated_min_dir); + quantificate_seperated_int8(base_path, seperated_min_dir, quantification_fold); return 0; } @@ -414,14 +431,14 @@ int main(int argc, char **argv) { const std::string &combined_min_dir = output_path; std::string model_path = base_path + "/model"; std::string param_path = base_path + "/params"; - quantificate_combined_int8(model_path, param_path, combined_min_dir); + quantificate_combined_int8(model_path, param_path, combined_min_dir, quantification_fold); return 0; } if (action_type == "2") { // for seperated const std::string &seperated_min_dir = output_path; - quantificate_seperated_float32(base_path, seperated_min_dir); + quantificate_seperated_float32(base_path, seperated_min_dir, quantification_fold); return 0; } @@ -430,7 +447,7 @@ int main(int argc, char **argv) { const std::string &combined_min_dir = output_path; std::string model_path = base_path + "/model"; std::string param_path = base_path + "/params"; - quantificate_combined_float32(model_path, param_path, combined_min_dir); + quantificate_combined_float32(model_path, param_path, combined_min_dir, quantification_fold); return 0; } -- GitLab