未验证 提交 7e9bb98a 编写于 作者: Y Yanzhan Yang 提交者: GitHub

add n-fold quantification algorithm (#2164)

* 1. add quantification_fold parameter. 2. support quantification test in run.py.

* implement n-fold quantification
上级 4b9df8fb
......@@ -173,24 +173,33 @@ void Executor<Device, T>::InitFeedFetchList() {
}
template <typename T>
static void LoadMemInternal(void **data, LoDTensor *tensor,
bool quant_uint8 = false) {
char **data_buf = reinterpret_cast<char **>(data);
int64_t size = tensor->numel();
T *tensor_data = tensor->mutable_data<T>();
static void LoadMemInternal(void **in_data, void *out_data, int64_t size,
bool quant_uint8 = false, int quant_fold = 1) {
char **data_buf = reinterpret_cast<char **>(in_data);
T *tensor_data = reinterpret_cast<T *>(out_data);
if (quant_uint8) {
// should be moved into operator init function
float min_value;
float max_value;
memory::Copy(&min_value, *data_buf, sizeof(float));
memory::Copy(&max_value, *data_buf + sizeof(float), sizeof(float));
*data_buf += 2 * sizeof(float);
const float factor = (max_value - min_value) / 255.0;
const uint8_t *uint8_data = reinterpret_cast<uint8_t *>(*data_buf);
for (int k = 0; k < size; ++k) {
tensor_data[k] = uint8_data[k] * factor + min_value;
int step = fmax(size / quant_fold, 1);
int visited_fold = 0;
while (visited_fold * step < size) {
// should be moved into operator init function
float min_value;
float max_value;
memory::Copy(&min_value, *data_buf, sizeof(float));
memory::Copy(&max_value, *data_buf + sizeof(float), sizeof(float));
*data_buf += 2 * sizeof(float);
const float factor = (max_value - min_value) / 255.0;
const uint8_t *uint8_data = reinterpret_cast<uint8_t *>(*data_buf);
int k = 0;
for (; k < step; ++k) {
int tensor_data_idx = visited_fold * step + k;
if (tensor_data_idx >= size) {
break;
}
tensor_data[tensor_data_idx] = uint8_data[k] * factor + min_value;
}
*data_buf += k * sizeof(uint8_t);
visited_fold++;
}
*data_buf += size * sizeof(uint8_t);
} else {
memory::Copy(tensor_data, *data_buf, size * sizeof(T));
*data_buf += size * sizeof(T);
......@@ -235,14 +244,20 @@ void Executor<Device, T>::LoadMemory(void **data,
// parse tensor from stream
switch (tensor_desc.DataType()) {
case VARTYPE_TYPE_FP32:
LoadMemInternal<float>(reinterpret_cast<void **>(data_buf), tensor,
program_.quantification);
LoadMemInternal<float>(
reinterpret_cast<void **>(data_buf),
reinterpret_cast<void *>(tensor->mutable_data<T>()), tensor->numel(),
program_.quantification, program_.quantification_fold);
break;
case VARTYPE_TYPE_INT8:
LoadMemInternal<int8_t>(reinterpret_cast<void **>(data_buf), tensor);
LoadMemInternal<int8_t>(
reinterpret_cast<void **>(data_buf),
reinterpret_cast<void *>(tensor->mutable_data<T>()), tensor->numel());
break;
case VARTYPE_TYPE_INT32:
LoadMemInternal<int>(reinterpret_cast<void **>(data_buf), tensor);
LoadMemInternal<int>(reinterpret_cast<void **>(data_buf),
reinterpret_cast<void *>(tensor->mutable_data<T>()),
tensor->numel());
break;
default:
LOG(kLOG_ERROR) << "data type is not supported";
......@@ -944,31 +959,10 @@ void Executor<GPU_CL, float>::LoadMemory(const VarDesc var_desc,
void *memory = nullptr;
int type_size = 4;
memory = tensorInput;
if (program_.quantification) {
float min_value;
float max_value;
memcpy(&min_value, *data, sizeof(float));
memcpy(&max_value, *data + sizeof(float), sizeof(float));
*data += 2 * sizeof(float);
const float factor = (max_value - min_value) / 255.0;
uint8_t *uint8_data = reinterpret_cast<uint8_t *>(*data);
for (int k = 0; k < memory_size; ++k) {
static_cast<float *>(memory)[k] = uint8_data[k] * factor + min_value;
}
*data += (memory_size * sizeof(uint8_t));
} else {
for (int n = 0; n < memory_size; n++) {
float value;
memcpy(&value, *data + n * type_size, type_size);
if (value < 1e-30 && value > -1e-30) {
static_cast<float *>(memory)[n] = 0.0;
} else {
static_cast<float *>(memory)[n] = value;
}
}
(*data) += (sizeof(char) * memory_size * type_size);
}
LoadMemInternal<float>(reinterpret_cast<void **>(data),
reinterpret_cast<void *>(memory), memory_size,
program_.quantification, program_.quantification_fold);
}
template <>
......
......@@ -87,7 +87,8 @@ void Loader<GPU_CL, float>::InitMemoryFromProgram(
template <>
const Program<GPU_CL, float> Loader<GPU_CL, float>::LoadCombinedMemory(
size_t read_size, const uint8_t *buf, size_t combined_params_len,
uint8_t *combined_params_buf, bool optimize, bool quantification) {
uint8_t *combined_params_buf, bool optimize, bool quantification,
int quantification_fold) {
bool can_add_split = false;
PaddleMobile__Framework__Proto__ProgramDesc *c_program;
......@@ -109,6 +110,7 @@ const Program<GPU_CL, float> Loader<GPU_CL, float>::LoadCombinedMemory(
program.quantification = quantification;
program.combined_params_len = combined_params_len;
program.combined_params_buf = combined_params_buf;
program.quantification_fold = quantification_fold;
auto scope = std::make_shared<Scope>();
program.scope = scope;
......@@ -187,9 +189,11 @@ template <typename Device, typename T>
const Program<Device, T> Loader<Device, T>::Load(const std::string &dirname,
bool optimize,
bool quantification,
bool can_add_split) {
auto program = this->LoadProgram(dirname + "/__model__", optimize,
quantification, can_add_split);
bool can_add_split,
int quantification_fold) {
auto program =
this->LoadProgram(dirname + "/__model__", optimize, quantification,
can_add_split, quantification_fold);
program.model_path = dirname;
return program;
}
......@@ -198,8 +202,10 @@ template <typename Device, typename T>
const Program<Device, T> Loader<Device, T>::Load(const std::string &model_path,
const std::string &para_path,
bool optimize,
bool quantification) {
auto program = this->LoadProgram(model_path, optimize, quantification);
bool quantification,
int quantification_fold) {
auto program = this->LoadProgram(model_path, optimize, quantification, false,
quantification_fold);
program.para_path = para_path;
program.combined = true;
......@@ -210,7 +216,7 @@ const Program<Device, T> Loader<Device, T>::Load(const std::string &model_path,
template <typename Device, typename T>
const Program<Device, T> Loader<Device, T>::LoadProgram(
const std::string &model_path, bool optimize, bool quantification,
bool can_add_split) {
bool can_add_split, int quantification_fold) {
std::string model_filename = model_path;
PaddleMobile__Framework__Proto__ProgramDesc *c_program;
uint8_t *buf = NULL;
......@@ -232,6 +238,7 @@ const Program<Device, T> Loader<Device, T>::LoadProgram(
program.quantification = quantification;
program.combined_params_len = 0;
program.combined_params_buf = nullptr;
program.quantification_fold = quantification_fold;
auto scope = std::make_shared<Scope>();
program.scope = scope;
......@@ -248,7 +255,8 @@ const Program<Device, T> Loader<Device, T>::LoadProgram(
template <typename Device, typename T>
const Program<Device, T> Loader<Device, T>::LoadCombinedMemory(
size_t read_size, const uint8_t *buf, size_t combined_params_len,
uint8_t *combined_params_buf, bool optimize, bool quantification) {
uint8_t *combined_params_buf, bool optimize, bool quantification,
int quantification_fold) {
bool can_add_split = false;
PaddleMobile__Framework__Proto__ProgramDesc *c_program;
......@@ -270,6 +278,7 @@ const Program<Device, T> Loader<Device, T>::LoadCombinedMemory(
program.quantification = quantification;
program.combined_params_len = combined_params_len;
program.combined_params_buf = combined_params_buf;
program.quantification_fold = quantification_fold;
auto scope = std::make_shared<Scope>();
program.scope = scope;
......
......@@ -32,7 +32,8 @@ class Loader {
const Program<Device, T> Load(const std::string &dirname,
bool optimize = false,
bool quantification = false,
bool can_add_split = false);
bool can_add_split = false,
int quantification_fold = 1);
/*
* @b load combine format fluid mode
......@@ -41,20 +42,20 @@ class Loader {
const Program<Device, T> Load(const std::string &model_path,
const std::string &para_path,
bool optimize = false,
bool quantification = false);
bool quantification = false,
int quantification_fold = 1);
const Program<Device, T> LoadCombinedMemory(size_t model_len,
const uint8_t *model_buf,
size_t combined_params_len,
uint8_t *combined_params_buf,
bool optimize = false,
bool quantification = false);
const Program<Device, T> LoadCombinedMemory(
size_t model_len, const uint8_t *model_buf, size_t combined_params_len,
uint8_t *combined_params_buf, bool optimize = false,
bool quantification = false, int quantification_fold = 1);
private:
const Program<Device, T> LoadProgram(const std::string &model_path,
bool optimize = false,
bool quantification = false,
bool can_add_split = false);
bool can_add_split = false,
int quantification_fold = 1);
void InitMemoryFromProgram(
const std::shared_ptr<ProgramDesc> &originProgramDesc,
......
......@@ -34,6 +34,7 @@ class Program {
bool quantification = false;
size_t combined_params_len;
uint8_t *combined_params_buf;
int quantification_fold = 1;
};
} // namespace framework
......
......@@ -216,6 +216,7 @@ struct PaddleMobileConfig : public PaddlePredictor::Config {
int batch_size = 1;
bool optimize = true;
bool quantification = false;
int quantification_fold = 1;
bool lod_mode = false;
int thread_num = 1;
bool load_when_predict = false;
......
......@@ -37,7 +37,8 @@ void PaddleMobile<Device, T>::SetThreadNum(int thread_num,
template <typename Device, typename T>
PMStatus PaddleMobile<Device, T>::Load(const std::string &dirname,
bool optimize, bool quantification,
int batch_size, bool lod_mode) {
int batch_size, bool lod_mode,
int quantification_fold) {
if (loader_.get() == nullptr) {
loader_ = std::make_shared<framework::Loader<Device, T>>();
} else {
......@@ -46,8 +47,9 @@ PMStatus PaddleMobile<Device, T>::Load(const std::string &dirname,
if (executor_.get() == nullptr) {
executor_ = std::make_shared<framework::Executor<Device, T>>(
loader_->Load(dirname, optimize, quantification), config_, batch_size,
optimize, lod_mode);
loader_->Load(dirname, optimize, quantification, false,
quantification_fold),
config_, batch_size, optimize, lod_mode);
} else {
LOG(kLOG_INFO) << "executor inited";
}
......@@ -59,7 +61,8 @@ template <typename Device, typename T>
PMStatus PaddleMobile<Device, T>::Load(const std::string &model_path,
const std::string &para_path,
bool optimize, bool quantification,
int batch_size, bool lod_mode) {
int batch_size, bool lod_mode,
int quantification_fold) {
if (loader_.get() == nullptr) {
loader_ = std::make_shared<framework::Loader<Device, T>>();
} else {
......@@ -69,8 +72,9 @@ PMStatus PaddleMobile<Device, T>::Load(const std::string &model_path,
if (executor_.get() == nullptr) {
executor_ = std::make_shared<framework::Executor<Device, T>>(
loader_->Load(model_path, para_path, optimize, quantification), config_,
batch_size, optimize, lod_mode);
loader_->Load(model_path, para_path, optimize, quantification,
quantification_fold),
config_, batch_size, optimize, lod_mode);
} else {
LOG(kLOG_INFO) << "executor inited";
}
......@@ -82,11 +86,12 @@ template <typename Device, typename T>
PMStatus PaddleMobile<Device, T>::Load(const PaddleMobileConfig &config) {
if (!config.model_dir.empty()) {
return this->Load(config.model_dir, config.optimize, config.quantification,
config.batch_size, config.lod_mode);
config.batch_size, config.lod_mode,
config.quantification_fold);
} else if (!config.prog_file.empty() && !config.param_file.empty()) {
return this->Load(config.prog_file, config.param_file, config.optimize,
config.quantification, config.batch_size,
config.lod_mode);
config.quantification, config.batch_size, config.lod_mode,
config.quantification_fold);
} else {
LOG(kLOG_ERROR) << "Failed to load inference model";
return PMNotInitialized;
......@@ -97,7 +102,7 @@ template <typename Device, typename T>
bool PaddleMobile<Device, T>::LoadCombinedMemory(
size_t model_len, const uint8_t *model_buf, size_t combined_params_len,
uint8_t *combined_params_buf, bool optimize, bool quantification,
int batch_size, bool lod_mode) {
int batch_size, bool lod_mode, int quantification_fold) {
if (loader_.get() == nullptr) {
loader_ = std::make_shared<framework::Loader<Device, T>>();
} else {
......@@ -107,7 +112,7 @@ bool PaddleMobile<Device, T>::LoadCombinedMemory(
executor_ = std::make_shared<framework::Executor<Device, T>>(
loader_->LoadCombinedMemory(model_len, model_buf, combined_params_len,
combined_params_buf, optimize,
quantification),
quantification, quantification_fold),
config_, batch_size, optimize, lod_mode);
} else {
LOG(kLOG_INFO) << "executor inited";
......
......@@ -50,10 +50,11 @@ class PaddleMobile {
PMStatus Load(const std::string &dirname, const bool optimize = false,
const bool quantification = false, const int batch_size = 1,
const bool lod_mode = false);
const bool lod_mode = false, const int quantification_fold = 1);
PMStatus Load(const std::string &model_path, const std::string &para_path,
const bool optimize = false, const bool quantification = false,
const int batch_size = 1, const bool lod_mode = false);
const int batch_size = 1, const bool lod_mode = false,
const int quantification_fold = 1);
PMStatus Load(const PaddleMobileConfig &config);
......@@ -84,7 +85,7 @@ class PaddleMobile {
size_t combined_params_len,
uint8_t *combined_params_buf, bool optimize = false,
bool quantification = false, int batch_size = 1,
bool lod_mode = false);
bool lod_mode = false, int quantification_fold = 1);
void SetThreadNum(int thread_num,
PowerMode power_mode = PERFORMANCE_PRIORITY);
......
......@@ -31,6 +31,10 @@ void test(int argc, char *argv[]) {
arg_index++;
bool enable_memory_optimization = std::stoi(argv[arg_index]) == 1;
arg_index++;
bool quantification = std::stoi(argv[arg_index]) == 1;
arg_index++;
int quantification_fold = std::stoi(argv[arg_index]);
arg_index++;
paddle_mobile::PaddleMobileConfigInternal config;
config.memory_optimization_level = enable_memory_optimization
? MemoryOptimizationWithoutFeeds
......@@ -98,7 +102,7 @@ void test(int argc, char *argv[]) {
auto time1 = time();
if (paddle_mobile.Load("./checked_model/model", "./checked_model/params",
fuse, false, 1, true)) {
fuse, quantification, 1, true, quantification_fold)) {
auto time2 = time();
std::cout << "auto-test"
<< " load-time-cost :" << time_diff(time1, time2) << "ms"
......
......@@ -58,7 +58,7 @@ void test(int argc, char *argv[]) {
auto time1 = time();
if (paddle_mobile.Load("./checked_model/model", "./checked_model/params",
fuse, false, 1, true)) {
fuse, false, 1, true, 1)) {
auto time2 = time();
std::cout << "auto-test"
<< " load-time-cost :" << time_diff(time1, time2) << "ms"
......
......@@ -5,7 +5,7 @@ TOTAL_ERRORS=0
# The trick to remove deleted files: https://stackoverflow.com/a/2413151
for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | \
grep -v ".pb.cpp" | grep -v ".pb.h" | grep -v ".pb-c.h" | grep -v ".pb-c.c" | \
grep -v "protobuf-c.h" | grep -v "protobuf-c.c"); do
grep -v "protobuf-c.h" | grep -v "protobuf-c.c" | grep -v "^mobile/tools/quantification"); do
cpplint $file;
TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
done
......
......@@ -22,6 +22,8 @@ checked_encrypt_model_path = "checked_encrypt_model"
output_var_filter = []
output_key_filter = {}
check_shape = False
quantification = False
quantification_fold = 1000
architecture = "arm-v7a"
# architecture = "arm-v8a"
......@@ -107,7 +109,8 @@ def resave_model(feed_kv):
for name in p_names:
v = fluid.framework._get_var(name, prog)
v.persistable = False
fluid.io.save_inference_model(dirname=checked_model_path, feeded_var_names=feeds, target_vars=fetches, executor=exe, main_program=prog, model_filename="model", params_filename="params")
if not quantification:
fluid.io.save_inference_model(dirname=checked_model_path, feeded_var_names=feeds, target_vars=fetches, executor=exe, main_program=prog, model_filename="model", params_filename="params")
if has_found_wrong_shape:
pp_red("has found wrong shape", 1)
else:
......@@ -392,7 +395,7 @@ for op in ops:
pp_tab("op types : {}".format(op_types), 1)
def check_mobile_results(args, fuse, mem_opt):
args = "{} {} {}".format("1" if fuse else "0", "1" if mem_opt else "0", args)
args = "{} {} {} {} {}".format("1" if fuse else "0", "1" if mem_opt else "0", "1" if quantification else "0", quantification_fold, args)
res = sh("adb shell \"cd {} && export LD_LIBRARY_PATH=. && ./test-net {}\"".format(mobile_exec_root, args))
lines = res.split("\n")
# for line in lines:
......@@ -425,6 +428,26 @@ def check_mobile_results(args, fuse, mem_opt):
fetch_names = []
for fetch in fetches:
fetch_names.append(fetch.name)
fetch_diff = 0.0
fetch_count = 0
for index in op_cache:
op_output_var_name, op = op_cache[index]
if not op_output_var_name in output_var_cache:
continue
if not op_output_var_name in mobile_var_cache:
continue
if op_output_var_name not in fetch_names:
continue
values1 = output_var_cache[op_output_var_name]
values2 = mobile_var_cache[op_output_var_name]
shape = get_var_shape(op_output_var_name) if check_shape else []
for i in range(len(values1)):
v1 = values1[i]
v2 = values2[len(shape) + i]
fetch_diff += abs(v1 - v2)
fetch_count += 1
if fetch_count != 0:
pp_yellow("output avg diff : {}".format(fetch_diff / fetch_count), 1)
for index in op_cache:
op_output_var_name, op = op_cache[index]
if mem_opt:
......
......@@ -68,7 +68,7 @@ std::shared_ptr<ProgramDesc> loadParams(const std::string &model_path) {
}
void LoadWithDumpForInt8(const paddle_mobile::framework::VarDesc &var_desc, char **dataP, FILE *out_file) {
void LoadWithDumpForInt8(const paddle_mobile::framework::VarDesc &var_desc, char **dataP, FILE *out_file, int quantification_fold) {
// 1. version
uint32_t version = *reinterpret_cast<uint32_t *>(*dataP);
......@@ -162,27 +162,33 @@ void LoadWithDumpForInt8(const paddle_mobile::framework::VarDesc &var_desc, char
}
*dataP += tensorSize;
// for float 32
float min_value = std::numeric_limits<float>::max();
float max_value = std::numeric_limits<float>::min();
int step = std::max(memory_size / quantification_fold, 1);
for (int k = 0; k < memory_size; ++k) {
min_value = std::min(min_value, static_cast<float *> (memory)[k]);
max_value = std::max(max_value, static_cast<float *> (memory)[k]);
}
int visited_fold = 0;
while (visited_fold * step < memory_size) {
// for float 32
float min_value = std::numeric_limits<float>::max();
float max_value = std::numeric_limits<float>::min();
for (int k = visited_fold * step; k < std::min((visited_fold + 1) * step, memory_size); ++k) {
min_value = std::min(min_value, static_cast<float *> (memory)[k]);
max_value = std::max(max_value, static_cast<float *> (memory)[k]);
}
fwrite(&min_value, sizeof(float), 1, out_file);
fwrite(&max_value, sizeof(float), 1, out_file);
fwrite(&min_value, sizeof(float), 1, out_file);
fwrite(&max_value, sizeof(float), 1, out_file);
for (int g = 0; g < memory_size; ++g) {
float value = static_cast<float *> (memory)[g];
auto factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255);
fwrite(&factor, sizeof(uint8_t), 1, out_file);
for (int g = visited_fold * step; g < std::min((visited_fold + 1) * step, memory_size); ++g) {
float value = static_cast<float *> (memory)[g];
auto factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255);
fwrite(&factor, sizeof(uint8_t), 1, out_file);
}
visited_fold++;
}
}
void
quantificate_combined_int8(const std::string &model_path, const std::string &param_path, const std::string &param_min_path) {
quantificate_combined_int8(const std::string &model_path, const std::string &param_path, const std::string &param_min_path, int quantification_fold) {
auto program = loadParams(model_path);
char *origin_data = Get_binary_data(param_path);
char *data = origin_data;
......@@ -193,7 +199,7 @@ quantificate_combined_int8(const std::string &model_path, const std::string &par
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue;
}
LoadWithDumpForInt8(*var_desc, &data, out_file);
LoadWithDumpForInt8(*var_desc, &data, out_file, quantification_fold);
}
}
}
......@@ -201,7 +207,7 @@ quantificate_combined_int8(const std::string &model_path, const std::string &par
delete origin_data;
}
void quantificate_seperated_int8(const std::string model_dir, const std::string param_min_path) {
void quantificate_seperated_int8(const std::string model_dir, const std::string param_min_path, int quantification_fold) {
auto program = loadParams(model_dir + "/__model__");
std::string shell_command = "mkdir " + param_min_path;
......@@ -217,7 +223,7 @@ void quantificate_seperated_int8(const std::string model_dir, const std::string
FILE *out_file = fopen(file_name.c_str(), "wb");
char *origin_data = Get_binary_data(model_dir + "/" + var_desc->Name());
char *data = origin_data;
LoadWithDumpForInt8(*var_desc, &data, out_file);
LoadWithDumpForInt8(*var_desc, &data, out_file, quantification_fold);
delete origin_data;
fclose(out_file);
}
......@@ -225,7 +231,7 @@ void quantificate_seperated_int8(const std::string model_dir, const std::string
}
}
void LoadWithDumpForFloat32(const paddle_mobile::framework::VarDesc &var_desc, char **dataP, FILE *out_file) {
void LoadWithDumpForFloat32(const paddle_mobile::framework::VarDesc &var_desc, char **dataP, FILE *out_file, int quantification_fold) {
// 1. version
uint32_t version = *reinterpret_cast<uint32_t *>(*dataP);
......@@ -319,30 +325,36 @@ void LoadWithDumpForFloat32(const paddle_mobile::framework::VarDesc &var_desc, c
}
*dataP += tensorSize;
// for float 32
float min_value = std::numeric_limits<float>::max();
float max_value = std::numeric_limits<float>::min();
int step = std::max(memory_size / quantification_fold, 1);
for (int k = 0; k < memory_size; ++k) {
min_value = std::min(min_value, static_cast<float *> (memory)[k]);
max_value = std::max(max_value, static_cast<float *> (memory)[k]);
}
int visited_fold = 0;
while (visited_fold * step < memory_size) {
// for float 32
float min_value = std::numeric_limits<float>::max();
float max_value = std::numeric_limits<float>::min();
float diff = 0.0;
for (int g = 0; g < memory_size; ++g) {
float value = static_cast<float *> (memory)[g];
auto factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255);
float value_quantized = min_value + (factor / 255.0) * (max_value - min_value);
diff += fabs(value - value_quantized);
fwrite(&value_quantized, sizeof(float), 1, out_file);
}
if (memory_size > 0) {
std::cout << "avg diff caused by quantization for var " << var_desc.Name() << " is: " << (diff / memory_size) << std::endl;
for (int k = visited_fold * step; k < std::min((visited_fold + 1) * step, memory_size); ++k) {
min_value = std::min(min_value, static_cast<float *> (memory)[k]);
max_value = std::max(max_value, static_cast<float *> (memory)[k]);
}
float diff = 0.0;
for (int g = visited_fold * step; g < std::min((visited_fold + 1) * step, memory_size); ++g) {
float value = static_cast<float *> (memory)[g];
auto factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255);
float value_quantized = min_value + (factor / 255.0) * (max_value - min_value);
diff += fabs(value - value_quantized);
fwrite(&value_quantized, sizeof(float), 1, out_file);
}
if (memory_size > 0) {
std::cout << "avg diff caused by quantization for var " << var_desc.Name() << " is: " << (diff / memory_size) << std::endl;
}
visited_fold++;
}
}
void
quantificate_combined_float32(const std::string &model_path, const std::string &param_path, const std::string &param_min_path) {
quantificate_combined_float32(const std::string &model_path, const std::string &param_path, const std::string &param_min_path, int quantification_fold) {
auto program = loadParams(model_path);
char *origin_data = Get_binary_data(param_path);
char *data = origin_data;
......@@ -353,7 +365,7 @@ quantificate_combined_float32(const std::string &model_path, const std::string &
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue;
}
LoadWithDumpForFloat32(*var_desc, &data, out_file);
LoadWithDumpForFloat32(*var_desc, &data, out_file, quantification_fold);
}
}
}
......@@ -361,7 +373,7 @@ quantificate_combined_float32(const std::string &model_path, const std::string &
delete origin_data;
}
void quantificate_seperated_float32(const std::string model_dir, const std::string param_min_path) {
void quantificate_seperated_float32(const std::string model_dir, const std::string param_min_path, int quantification_fold) {
auto program = loadParams(model_dir + "/__model__");
std::string shell_command = "mkdir " + param_min_path;
......@@ -377,7 +389,7 @@ void quantificate_seperated_float32(const std::string model_dir, const std::stri
FILE *out_file = fopen(file_name.c_str(), "wb");
char *origin_data = Get_binary_data(model_dir + "/" + var_desc->Name());
char *data = origin_data;
LoadWithDumpForFloat32(*var_desc, &data, out_file);
LoadWithDumpForFloat32(*var_desc, &data, out_file, quantification_fold);
delete origin_data;
fclose(out_file);
}
......@@ -402,10 +414,15 @@ int main(int argc, char **argv) {
PADDLE_MOBILE_ENFORCE(argc > 3, "we need your output path. %s ", kNoteEg.c_str());
std::string output_path = argv[3];
int quantification_fold = 1;
if (argc > 4) {
quantification_fold = std::stoi(argv[4]);
}
if (action_type == "0") {
// for seperated
const std::string &seperated_min_dir = output_path;
quantificate_seperated_int8(base_path, seperated_min_dir);
quantificate_seperated_int8(base_path, seperated_min_dir, quantification_fold);
return 0;
}
......@@ -414,14 +431,14 @@ int main(int argc, char **argv) {
const std::string &combined_min_dir = output_path;
std::string model_path = base_path + "/model";
std::string param_path = base_path + "/params";
quantificate_combined_int8(model_path, param_path, combined_min_dir);
quantificate_combined_int8(model_path, param_path, combined_min_dir, quantification_fold);
return 0;
}
if (action_type == "2") {
// for seperated
const std::string &seperated_min_dir = output_path;
quantificate_seperated_float32(base_path, seperated_min_dir);
quantificate_seperated_float32(base_path, seperated_min_dir, quantification_fold);
return 0;
}
......@@ -430,7 +447,7 @@ int main(int argc, char **argv) {
const std::string &combined_min_dir = output_path;
std::string model_path = base_path + "/model";
std::string param_path = base_path + "/params";
quantificate_combined_float32(model_path, param_path, combined_min_dir);
quantificate_combined_float32(model_path, param_path, combined_min_dir, quantification_fold);
return 0;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册