未验证 提交 7e9bb98a 编写于 作者: Y Yanzhan Yang 提交者: GitHub

add n-fold quantification algorithm (#2164)

* 1. add quantification_fold parameter. 2. support quantification test in run.py.

* implement n-fold quantification
上级 4b9df8fb
...@@ -173,24 +173,33 @@ void Executor<Device, T>::InitFeedFetchList() { ...@@ -173,24 +173,33 @@ void Executor<Device, T>::InitFeedFetchList() {
} }
template <typename T> template <typename T>
static void LoadMemInternal(void **data, LoDTensor *tensor, static void LoadMemInternal(void **in_data, void *out_data, int64_t size,
bool quant_uint8 = false) { bool quant_uint8 = false, int quant_fold = 1) {
char **data_buf = reinterpret_cast<char **>(data); char **data_buf = reinterpret_cast<char **>(in_data);
int64_t size = tensor->numel(); T *tensor_data = reinterpret_cast<T *>(out_data);
T *tensor_data = tensor->mutable_data<T>();
if (quant_uint8) { if (quant_uint8) {
// should be moved into operator init function int step = fmax(size / quant_fold, 1);
float min_value; int visited_fold = 0;
float max_value; while (visited_fold * step < size) {
memory::Copy(&min_value, *data_buf, sizeof(float)); // should be moved into operator init function
memory::Copy(&max_value, *data_buf + sizeof(float), sizeof(float)); float min_value;
*data_buf += 2 * sizeof(float); float max_value;
const float factor = (max_value - min_value) / 255.0; memory::Copy(&min_value, *data_buf, sizeof(float));
const uint8_t *uint8_data = reinterpret_cast<uint8_t *>(*data_buf); memory::Copy(&max_value, *data_buf + sizeof(float), sizeof(float));
for (int k = 0; k < size; ++k) { *data_buf += 2 * sizeof(float);
tensor_data[k] = uint8_data[k] * factor + min_value; const float factor = (max_value - min_value) / 255.0;
const uint8_t *uint8_data = reinterpret_cast<uint8_t *>(*data_buf);
int k = 0;
for (; k < step; ++k) {
int tensor_data_idx = visited_fold * step + k;
if (tensor_data_idx >= size) {
break;
}
tensor_data[tensor_data_idx] = uint8_data[k] * factor + min_value;
}
*data_buf += k * sizeof(uint8_t);
visited_fold++;
} }
*data_buf += size * sizeof(uint8_t);
} else { } else {
memory::Copy(tensor_data, *data_buf, size * sizeof(T)); memory::Copy(tensor_data, *data_buf, size * sizeof(T));
*data_buf += size * sizeof(T); *data_buf += size * sizeof(T);
...@@ -235,14 +244,20 @@ void Executor<Device, T>::LoadMemory(void **data, ...@@ -235,14 +244,20 @@ void Executor<Device, T>::LoadMemory(void **data,
// parse tensor from stream // parse tensor from stream
switch (tensor_desc.DataType()) { switch (tensor_desc.DataType()) {
case VARTYPE_TYPE_FP32: case VARTYPE_TYPE_FP32:
LoadMemInternal<float>(reinterpret_cast<void **>(data_buf), tensor, LoadMemInternal<float>(
program_.quantification); reinterpret_cast<void **>(data_buf),
reinterpret_cast<void *>(tensor->mutable_data<T>()), tensor->numel(),
program_.quantification, program_.quantification_fold);
break; break;
case VARTYPE_TYPE_INT8: case VARTYPE_TYPE_INT8:
LoadMemInternal<int8_t>(reinterpret_cast<void **>(data_buf), tensor); LoadMemInternal<int8_t>(
reinterpret_cast<void **>(data_buf),
reinterpret_cast<void *>(tensor->mutable_data<T>()), tensor->numel());
break; break;
case VARTYPE_TYPE_INT32: case VARTYPE_TYPE_INT32:
LoadMemInternal<int>(reinterpret_cast<void **>(data_buf), tensor); LoadMemInternal<int>(reinterpret_cast<void **>(data_buf),
reinterpret_cast<void *>(tensor->mutable_data<T>()),
tensor->numel());
break; break;
default: default:
LOG(kLOG_ERROR) << "data type is not supported"; LOG(kLOG_ERROR) << "data type is not supported";
...@@ -944,31 +959,10 @@ void Executor<GPU_CL, float>::LoadMemory(const VarDesc var_desc, ...@@ -944,31 +959,10 @@ void Executor<GPU_CL, float>::LoadMemory(const VarDesc var_desc,
void *memory = nullptr; void *memory = nullptr;
int type_size = 4; int type_size = 4;
memory = tensorInput; memory = tensorInput;
if (program_.quantification) {
float min_value; LoadMemInternal<float>(reinterpret_cast<void **>(data),
float max_value; reinterpret_cast<void *>(memory), memory_size,
program_.quantification, program_.quantification_fold);
memcpy(&min_value, *data, sizeof(float));
memcpy(&max_value, *data + sizeof(float), sizeof(float));
*data += 2 * sizeof(float);
const float factor = (max_value - min_value) / 255.0;
uint8_t *uint8_data = reinterpret_cast<uint8_t *>(*data);
for (int k = 0; k < memory_size; ++k) {
static_cast<float *>(memory)[k] = uint8_data[k] * factor + min_value;
}
*data += (memory_size * sizeof(uint8_t));
} else {
for (int n = 0; n < memory_size; n++) {
float value;
memcpy(&value, *data + n * type_size, type_size);
if (value < 1e-30 && value > -1e-30) {
static_cast<float *>(memory)[n] = 0.0;
} else {
static_cast<float *>(memory)[n] = value;
}
}
(*data) += (sizeof(char) * memory_size * type_size);
}
} }
template <> template <>
......
...@@ -87,7 +87,8 @@ void Loader<GPU_CL, float>::InitMemoryFromProgram( ...@@ -87,7 +87,8 @@ void Loader<GPU_CL, float>::InitMemoryFromProgram(
template <> template <>
const Program<GPU_CL, float> Loader<GPU_CL, float>::LoadCombinedMemory( const Program<GPU_CL, float> Loader<GPU_CL, float>::LoadCombinedMemory(
size_t read_size, const uint8_t *buf, size_t combined_params_len, size_t read_size, const uint8_t *buf, size_t combined_params_len,
uint8_t *combined_params_buf, bool optimize, bool quantification) { uint8_t *combined_params_buf, bool optimize, bool quantification,
int quantification_fold) {
bool can_add_split = false; bool can_add_split = false;
PaddleMobile__Framework__Proto__ProgramDesc *c_program; PaddleMobile__Framework__Proto__ProgramDesc *c_program;
...@@ -109,6 +110,7 @@ const Program<GPU_CL, float> Loader<GPU_CL, float>::LoadCombinedMemory( ...@@ -109,6 +110,7 @@ const Program<GPU_CL, float> Loader<GPU_CL, float>::LoadCombinedMemory(
program.quantification = quantification; program.quantification = quantification;
program.combined_params_len = combined_params_len; program.combined_params_len = combined_params_len;
program.combined_params_buf = combined_params_buf; program.combined_params_buf = combined_params_buf;
program.quantification_fold = quantification_fold;
auto scope = std::make_shared<Scope>(); auto scope = std::make_shared<Scope>();
program.scope = scope; program.scope = scope;
...@@ -187,9 +189,11 @@ template <typename Device, typename T> ...@@ -187,9 +189,11 @@ template <typename Device, typename T>
const Program<Device, T> Loader<Device, T>::Load(const std::string &dirname, const Program<Device, T> Loader<Device, T>::Load(const std::string &dirname,
bool optimize, bool optimize,
bool quantification, bool quantification,
bool can_add_split) { bool can_add_split,
auto program = this->LoadProgram(dirname + "/__model__", optimize, int quantification_fold) {
quantification, can_add_split); auto program =
this->LoadProgram(dirname + "/__model__", optimize, quantification,
can_add_split, quantification_fold);
program.model_path = dirname; program.model_path = dirname;
return program; return program;
} }
...@@ -198,8 +202,10 @@ template <typename Device, typename T> ...@@ -198,8 +202,10 @@ template <typename Device, typename T>
const Program<Device, T> Loader<Device, T>::Load(const std::string &model_path, const Program<Device, T> Loader<Device, T>::Load(const std::string &model_path,
const std::string &para_path, const std::string &para_path,
bool optimize, bool optimize,
bool quantification) { bool quantification,
auto program = this->LoadProgram(model_path, optimize, quantification); int quantification_fold) {
auto program = this->LoadProgram(model_path, optimize, quantification, false,
quantification_fold);
program.para_path = para_path; program.para_path = para_path;
program.combined = true; program.combined = true;
...@@ -210,7 +216,7 @@ const Program<Device, T> Loader<Device, T>::Load(const std::string &model_path, ...@@ -210,7 +216,7 @@ const Program<Device, T> Loader<Device, T>::Load(const std::string &model_path,
template <typename Device, typename T> template <typename Device, typename T>
const Program<Device, T> Loader<Device, T>::LoadProgram( const Program<Device, T> Loader<Device, T>::LoadProgram(
const std::string &model_path, bool optimize, bool quantification, const std::string &model_path, bool optimize, bool quantification,
bool can_add_split) { bool can_add_split, int quantification_fold) {
std::string model_filename = model_path; std::string model_filename = model_path;
PaddleMobile__Framework__Proto__ProgramDesc *c_program; PaddleMobile__Framework__Proto__ProgramDesc *c_program;
uint8_t *buf = NULL; uint8_t *buf = NULL;
...@@ -232,6 +238,7 @@ const Program<Device, T> Loader<Device, T>::LoadProgram( ...@@ -232,6 +238,7 @@ const Program<Device, T> Loader<Device, T>::LoadProgram(
program.quantification = quantification; program.quantification = quantification;
program.combined_params_len = 0; program.combined_params_len = 0;
program.combined_params_buf = nullptr; program.combined_params_buf = nullptr;
program.quantification_fold = quantification_fold;
auto scope = std::make_shared<Scope>(); auto scope = std::make_shared<Scope>();
program.scope = scope; program.scope = scope;
...@@ -248,7 +255,8 @@ const Program<Device, T> Loader<Device, T>::LoadProgram( ...@@ -248,7 +255,8 @@ const Program<Device, T> Loader<Device, T>::LoadProgram(
template <typename Device, typename T> template <typename Device, typename T>
const Program<Device, T> Loader<Device, T>::LoadCombinedMemory( const Program<Device, T> Loader<Device, T>::LoadCombinedMemory(
size_t read_size, const uint8_t *buf, size_t combined_params_len, size_t read_size, const uint8_t *buf, size_t combined_params_len,
uint8_t *combined_params_buf, bool optimize, bool quantification) { uint8_t *combined_params_buf, bool optimize, bool quantification,
int quantification_fold) {
bool can_add_split = false; bool can_add_split = false;
PaddleMobile__Framework__Proto__ProgramDesc *c_program; PaddleMobile__Framework__Proto__ProgramDesc *c_program;
...@@ -270,6 +278,7 @@ const Program<Device, T> Loader<Device, T>::LoadCombinedMemory( ...@@ -270,6 +278,7 @@ const Program<Device, T> Loader<Device, T>::LoadCombinedMemory(
program.quantification = quantification; program.quantification = quantification;
program.combined_params_len = combined_params_len; program.combined_params_len = combined_params_len;
program.combined_params_buf = combined_params_buf; program.combined_params_buf = combined_params_buf;
program.quantification_fold = quantification_fold;
auto scope = std::make_shared<Scope>(); auto scope = std::make_shared<Scope>();
program.scope = scope; program.scope = scope;
......
...@@ -32,7 +32,8 @@ class Loader { ...@@ -32,7 +32,8 @@ class Loader {
const Program<Device, T> Load(const std::string &dirname, const Program<Device, T> Load(const std::string &dirname,
bool optimize = false, bool optimize = false,
bool quantification = false, bool quantification = false,
bool can_add_split = false); bool can_add_split = false,
int quantification_fold = 1);
/* /*
* @b load combine format fluid mode * @b load combine format fluid mode
...@@ -41,20 +42,20 @@ class Loader { ...@@ -41,20 +42,20 @@ class Loader {
const Program<Device, T> Load(const std::string &model_path, const Program<Device, T> Load(const std::string &model_path,
const std::string &para_path, const std::string &para_path,
bool optimize = false, bool optimize = false,
bool quantification = false); bool quantification = false,
int quantification_fold = 1);
const Program<Device, T> LoadCombinedMemory(size_t model_len, const Program<Device, T> LoadCombinedMemory(
const uint8_t *model_buf, size_t model_len, const uint8_t *model_buf, size_t combined_params_len,
size_t combined_params_len, uint8_t *combined_params_buf, bool optimize = false,
uint8_t *combined_params_buf, bool quantification = false, int quantification_fold = 1);
bool optimize = false,
bool quantification = false);
private: private:
const Program<Device, T> LoadProgram(const std::string &model_path, const Program<Device, T> LoadProgram(const std::string &model_path,
bool optimize = false, bool optimize = false,
bool quantification = false, bool quantification = false,
bool can_add_split = false); bool can_add_split = false,
int quantification_fold = 1);
void InitMemoryFromProgram( void InitMemoryFromProgram(
const std::shared_ptr<ProgramDesc> &originProgramDesc, const std::shared_ptr<ProgramDesc> &originProgramDesc,
......
...@@ -34,6 +34,7 @@ class Program { ...@@ -34,6 +34,7 @@ class Program {
bool quantification = false; bool quantification = false;
size_t combined_params_len; size_t combined_params_len;
uint8_t *combined_params_buf; uint8_t *combined_params_buf;
int quantification_fold = 1;
}; };
} // namespace framework } // namespace framework
......
...@@ -216,6 +216,7 @@ struct PaddleMobileConfig : public PaddlePredictor::Config { ...@@ -216,6 +216,7 @@ struct PaddleMobileConfig : public PaddlePredictor::Config {
int batch_size = 1; int batch_size = 1;
bool optimize = true; bool optimize = true;
bool quantification = false; bool quantification = false;
int quantification_fold = 1;
bool lod_mode = false; bool lod_mode = false;
int thread_num = 1; int thread_num = 1;
bool load_when_predict = false; bool load_when_predict = false;
......
...@@ -37,7 +37,8 @@ void PaddleMobile<Device, T>::SetThreadNum(int thread_num, ...@@ -37,7 +37,8 @@ void PaddleMobile<Device, T>::SetThreadNum(int thread_num,
template <typename Device, typename T> template <typename Device, typename T>
PMStatus PaddleMobile<Device, T>::Load(const std::string &dirname, PMStatus PaddleMobile<Device, T>::Load(const std::string &dirname,
bool optimize, bool quantification, bool optimize, bool quantification,
int batch_size, bool lod_mode) { int batch_size, bool lod_mode,
int quantification_fold) {
if (loader_.get() == nullptr) { if (loader_.get() == nullptr) {
loader_ = std::make_shared<framework::Loader<Device, T>>(); loader_ = std::make_shared<framework::Loader<Device, T>>();
} else { } else {
...@@ -46,8 +47,9 @@ PMStatus PaddleMobile<Device, T>::Load(const std::string &dirname, ...@@ -46,8 +47,9 @@ PMStatus PaddleMobile<Device, T>::Load(const std::string &dirname,
if (executor_.get() == nullptr) { if (executor_.get() == nullptr) {
executor_ = std::make_shared<framework::Executor<Device, T>>( executor_ = std::make_shared<framework::Executor<Device, T>>(
loader_->Load(dirname, optimize, quantification), config_, batch_size, loader_->Load(dirname, optimize, quantification, false,
optimize, lod_mode); quantification_fold),
config_, batch_size, optimize, lod_mode);
} else { } else {
LOG(kLOG_INFO) << "executor inited"; LOG(kLOG_INFO) << "executor inited";
} }
...@@ -59,7 +61,8 @@ template <typename Device, typename T> ...@@ -59,7 +61,8 @@ template <typename Device, typename T>
PMStatus PaddleMobile<Device, T>::Load(const std::string &model_path, PMStatus PaddleMobile<Device, T>::Load(const std::string &model_path,
const std::string &para_path, const std::string &para_path,
bool optimize, bool quantification, bool optimize, bool quantification,
int batch_size, bool lod_mode) { int batch_size, bool lod_mode,
int quantification_fold) {
if (loader_.get() == nullptr) { if (loader_.get() == nullptr) {
loader_ = std::make_shared<framework::Loader<Device, T>>(); loader_ = std::make_shared<framework::Loader<Device, T>>();
} else { } else {
...@@ -69,8 +72,9 @@ PMStatus PaddleMobile<Device, T>::Load(const std::string &model_path, ...@@ -69,8 +72,9 @@ PMStatus PaddleMobile<Device, T>::Load(const std::string &model_path,
if (executor_.get() == nullptr) { if (executor_.get() == nullptr) {
executor_ = std::make_shared<framework::Executor<Device, T>>( executor_ = std::make_shared<framework::Executor<Device, T>>(
loader_->Load(model_path, para_path, optimize, quantification), config_, loader_->Load(model_path, para_path, optimize, quantification,
batch_size, optimize, lod_mode); quantification_fold),
config_, batch_size, optimize, lod_mode);
} else { } else {
LOG(kLOG_INFO) << "executor inited"; LOG(kLOG_INFO) << "executor inited";
} }
...@@ -82,11 +86,12 @@ template <typename Device, typename T> ...@@ -82,11 +86,12 @@ template <typename Device, typename T>
PMStatus PaddleMobile<Device, T>::Load(const PaddleMobileConfig &config) { PMStatus PaddleMobile<Device, T>::Load(const PaddleMobileConfig &config) {
if (!config.model_dir.empty()) { if (!config.model_dir.empty()) {
return this->Load(config.model_dir, config.optimize, config.quantification, return this->Load(config.model_dir, config.optimize, config.quantification,
config.batch_size, config.lod_mode); config.batch_size, config.lod_mode,
config.quantification_fold);
} else if (!config.prog_file.empty() && !config.param_file.empty()) { } else if (!config.prog_file.empty() && !config.param_file.empty()) {
return this->Load(config.prog_file, config.param_file, config.optimize, return this->Load(config.prog_file, config.param_file, config.optimize,
config.quantification, config.batch_size, config.quantification, config.batch_size, config.lod_mode,
config.lod_mode); config.quantification_fold);
} else { } else {
LOG(kLOG_ERROR) << "Failed to load inference model"; LOG(kLOG_ERROR) << "Failed to load inference model";
return PMNotInitialized; return PMNotInitialized;
...@@ -97,7 +102,7 @@ template <typename Device, typename T> ...@@ -97,7 +102,7 @@ template <typename Device, typename T>
bool PaddleMobile<Device, T>::LoadCombinedMemory( bool PaddleMobile<Device, T>::LoadCombinedMemory(
size_t model_len, const uint8_t *model_buf, size_t combined_params_len, size_t model_len, const uint8_t *model_buf, size_t combined_params_len,
uint8_t *combined_params_buf, bool optimize, bool quantification, uint8_t *combined_params_buf, bool optimize, bool quantification,
int batch_size, bool lod_mode) { int batch_size, bool lod_mode, int quantification_fold) {
if (loader_.get() == nullptr) { if (loader_.get() == nullptr) {
loader_ = std::make_shared<framework::Loader<Device, T>>(); loader_ = std::make_shared<framework::Loader<Device, T>>();
} else { } else {
...@@ -107,7 +112,7 @@ bool PaddleMobile<Device, T>::LoadCombinedMemory( ...@@ -107,7 +112,7 @@ bool PaddleMobile<Device, T>::LoadCombinedMemory(
executor_ = std::make_shared<framework::Executor<Device, T>>( executor_ = std::make_shared<framework::Executor<Device, T>>(
loader_->LoadCombinedMemory(model_len, model_buf, combined_params_len, loader_->LoadCombinedMemory(model_len, model_buf, combined_params_len,
combined_params_buf, optimize, combined_params_buf, optimize,
quantification), quantification, quantification_fold),
config_, batch_size, optimize, lod_mode); config_, batch_size, optimize, lod_mode);
} else { } else {
LOG(kLOG_INFO) << "executor inited"; LOG(kLOG_INFO) << "executor inited";
......
...@@ -50,10 +50,11 @@ class PaddleMobile { ...@@ -50,10 +50,11 @@ class PaddleMobile {
PMStatus Load(const std::string &dirname, const bool optimize = false, PMStatus Load(const std::string &dirname, const bool optimize = false,
const bool quantification = false, const int batch_size = 1, const bool quantification = false, const int batch_size = 1,
const bool lod_mode = false); const bool lod_mode = false, const int quantification_fold = 1);
PMStatus Load(const std::string &model_path, const std::string &para_path, PMStatus Load(const std::string &model_path, const std::string &para_path,
const bool optimize = false, const bool quantification = false, const bool optimize = false, const bool quantification = false,
const int batch_size = 1, const bool lod_mode = false); const int batch_size = 1, const bool lod_mode = false,
const int quantification_fold = 1);
PMStatus Load(const PaddleMobileConfig &config); PMStatus Load(const PaddleMobileConfig &config);
...@@ -84,7 +85,7 @@ class PaddleMobile { ...@@ -84,7 +85,7 @@ class PaddleMobile {
size_t combined_params_len, size_t combined_params_len,
uint8_t *combined_params_buf, bool optimize = false, uint8_t *combined_params_buf, bool optimize = false,
bool quantification = false, int batch_size = 1, bool quantification = false, int batch_size = 1,
bool lod_mode = false); bool lod_mode = false, int quantification_fold = 1);
void SetThreadNum(int thread_num, void SetThreadNum(int thread_num,
PowerMode power_mode = PERFORMANCE_PRIORITY); PowerMode power_mode = PERFORMANCE_PRIORITY);
......
...@@ -31,6 +31,10 @@ void test(int argc, char *argv[]) { ...@@ -31,6 +31,10 @@ void test(int argc, char *argv[]) {
arg_index++; arg_index++;
bool enable_memory_optimization = std::stoi(argv[arg_index]) == 1; bool enable_memory_optimization = std::stoi(argv[arg_index]) == 1;
arg_index++; arg_index++;
bool quantification = std::stoi(argv[arg_index]) == 1;
arg_index++;
int quantification_fold = std::stoi(argv[arg_index]);
arg_index++;
paddle_mobile::PaddleMobileConfigInternal config; paddle_mobile::PaddleMobileConfigInternal config;
config.memory_optimization_level = enable_memory_optimization config.memory_optimization_level = enable_memory_optimization
? MemoryOptimizationWithoutFeeds ? MemoryOptimizationWithoutFeeds
...@@ -98,7 +102,7 @@ void test(int argc, char *argv[]) { ...@@ -98,7 +102,7 @@ void test(int argc, char *argv[]) {
auto time1 = time(); auto time1 = time();
if (paddle_mobile.Load("./checked_model/model", "./checked_model/params", if (paddle_mobile.Load("./checked_model/model", "./checked_model/params",
fuse, false, 1, true)) { fuse, quantification, 1, true, quantification_fold)) {
auto time2 = time(); auto time2 = time();
std::cout << "auto-test" std::cout << "auto-test"
<< " load-time-cost :" << time_diff(time1, time2) << "ms" << " load-time-cost :" << time_diff(time1, time2) << "ms"
......
...@@ -58,7 +58,7 @@ void test(int argc, char *argv[]) { ...@@ -58,7 +58,7 @@ void test(int argc, char *argv[]) {
auto time1 = time(); auto time1 = time();
if (paddle_mobile.Load("./checked_model/model", "./checked_model/params", if (paddle_mobile.Load("./checked_model/model", "./checked_model/params",
fuse, false, 1, true)) { fuse, false, 1, true, 1)) {
auto time2 = time(); auto time2 = time();
std::cout << "auto-test" std::cout << "auto-test"
<< " load-time-cost :" << time_diff(time1, time2) << "ms" << " load-time-cost :" << time_diff(time1, time2) << "ms"
......
...@@ -5,7 +5,7 @@ TOTAL_ERRORS=0 ...@@ -5,7 +5,7 @@ TOTAL_ERRORS=0
# The trick to remove deleted files: https://stackoverflow.com/a/2413151 # The trick to remove deleted files: https://stackoverflow.com/a/2413151
for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | \ for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | \
grep -v ".pb.cpp" | grep -v ".pb.h" | grep -v ".pb-c.h" | grep -v ".pb-c.c" | \ grep -v ".pb.cpp" | grep -v ".pb.h" | grep -v ".pb-c.h" | grep -v ".pb-c.c" | \
grep -v "protobuf-c.h" | grep -v "protobuf-c.c"); do grep -v "protobuf-c.h" | grep -v "protobuf-c.c" | grep -v "^mobile/tools/quantification"); do
cpplint $file; cpplint $file;
TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?); TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
done done
......
...@@ -22,6 +22,8 @@ checked_encrypt_model_path = "checked_encrypt_model" ...@@ -22,6 +22,8 @@ checked_encrypt_model_path = "checked_encrypt_model"
output_var_filter = [] output_var_filter = []
output_key_filter = {} output_key_filter = {}
check_shape = False check_shape = False
quantification = False
quantification_fold = 1000
architecture = "arm-v7a" architecture = "arm-v7a"
# architecture = "arm-v8a" # architecture = "arm-v8a"
...@@ -107,7 +109,8 @@ def resave_model(feed_kv): ...@@ -107,7 +109,8 @@ def resave_model(feed_kv):
for name in p_names: for name in p_names:
v = fluid.framework._get_var(name, prog) v = fluid.framework._get_var(name, prog)
v.persistable = False v.persistable = False
fluid.io.save_inference_model(dirname=checked_model_path, feeded_var_names=feeds, target_vars=fetches, executor=exe, main_program=prog, model_filename="model", params_filename="params") if not quantification:
fluid.io.save_inference_model(dirname=checked_model_path, feeded_var_names=feeds, target_vars=fetches, executor=exe, main_program=prog, model_filename="model", params_filename="params")
if has_found_wrong_shape: if has_found_wrong_shape:
pp_red("has found wrong shape", 1) pp_red("has found wrong shape", 1)
else: else:
...@@ -392,7 +395,7 @@ for op in ops: ...@@ -392,7 +395,7 @@ for op in ops:
pp_tab("op types : {}".format(op_types), 1) pp_tab("op types : {}".format(op_types), 1)
def check_mobile_results(args, fuse, mem_opt): def check_mobile_results(args, fuse, mem_opt):
args = "{} {} {}".format("1" if fuse else "0", "1" if mem_opt else "0", args) args = "{} {} {} {} {}".format("1" if fuse else "0", "1" if mem_opt else "0", "1" if quantification else "0", quantification_fold, args)
res = sh("adb shell \"cd {} && export LD_LIBRARY_PATH=. && ./test-net {}\"".format(mobile_exec_root, args)) res = sh("adb shell \"cd {} && export LD_LIBRARY_PATH=. && ./test-net {}\"".format(mobile_exec_root, args))
lines = res.split("\n") lines = res.split("\n")
# for line in lines: # for line in lines:
...@@ -425,6 +428,26 @@ def check_mobile_results(args, fuse, mem_opt): ...@@ -425,6 +428,26 @@ def check_mobile_results(args, fuse, mem_opt):
fetch_names = [] fetch_names = []
for fetch in fetches: for fetch in fetches:
fetch_names.append(fetch.name) fetch_names.append(fetch.name)
fetch_diff = 0.0
fetch_count = 0
for index in op_cache:
op_output_var_name, op = op_cache[index]
if not op_output_var_name in output_var_cache:
continue
if not op_output_var_name in mobile_var_cache:
continue
if op_output_var_name not in fetch_names:
continue
values1 = output_var_cache[op_output_var_name]
values2 = mobile_var_cache[op_output_var_name]
shape = get_var_shape(op_output_var_name) if check_shape else []
for i in range(len(values1)):
v1 = values1[i]
v2 = values2[len(shape) + i]
fetch_diff += abs(v1 - v2)
fetch_count += 1
if fetch_count != 0:
pp_yellow("output avg diff : {}".format(fetch_diff / fetch_count), 1)
for index in op_cache: for index in op_cache:
op_output_var_name, op = op_cache[index] op_output_var_name, op = op_cache[index]
if mem_opt: if mem_opt:
......
...@@ -68,7 +68,7 @@ std::shared_ptr<ProgramDesc> loadParams(const std::string &model_path) { ...@@ -68,7 +68,7 @@ std::shared_ptr<ProgramDesc> loadParams(const std::string &model_path) {
} }
void LoadWithDumpForInt8(const paddle_mobile::framework::VarDesc &var_desc, char **dataP, FILE *out_file) { void LoadWithDumpForInt8(const paddle_mobile::framework::VarDesc &var_desc, char **dataP, FILE *out_file, int quantification_fold) {
// 1. version // 1. version
uint32_t version = *reinterpret_cast<uint32_t *>(*dataP); uint32_t version = *reinterpret_cast<uint32_t *>(*dataP);
...@@ -162,27 +162,33 @@ void LoadWithDumpForInt8(const paddle_mobile::framework::VarDesc &var_desc, char ...@@ -162,27 +162,33 @@ void LoadWithDumpForInt8(const paddle_mobile::framework::VarDesc &var_desc, char
} }
*dataP += tensorSize; *dataP += tensorSize;
// for float 32 int step = std::max(memory_size / quantification_fold, 1);
float min_value = std::numeric_limits<float>::max();
float max_value = std::numeric_limits<float>::min();
for (int k = 0; k < memory_size; ++k) { int visited_fold = 0;
min_value = std::min(min_value, static_cast<float *> (memory)[k]); while (visited_fold * step < memory_size) {
max_value = std::max(max_value, static_cast<float *> (memory)[k]); // for float 32
} float min_value = std::numeric_limits<float>::max();
float max_value = std::numeric_limits<float>::min();
for (int k = visited_fold * step; k < std::min((visited_fold + 1) * step, memory_size); ++k) {
min_value = std::min(min_value, static_cast<float *> (memory)[k]);
max_value = std::max(max_value, static_cast<float *> (memory)[k]);
}
fwrite(&min_value, sizeof(float), 1, out_file); fwrite(&min_value, sizeof(float), 1, out_file);
fwrite(&max_value, sizeof(float), 1, out_file); fwrite(&max_value, sizeof(float), 1, out_file);
for (int g = 0; g < memory_size; ++g) { for (int g = visited_fold * step; g < std::min((visited_fold + 1) * step, memory_size); ++g) {
float value = static_cast<float *> (memory)[g]; float value = static_cast<float *> (memory)[g];
auto factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255); auto factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255);
fwrite(&factor, sizeof(uint8_t), 1, out_file); fwrite(&factor, sizeof(uint8_t), 1, out_file);
}
visited_fold++;
} }
} }
void void
quantificate_combined_int8(const std::string &model_path, const std::string &param_path, const std::string &param_min_path) { quantificate_combined_int8(const std::string &model_path, const std::string &param_path, const std::string &param_min_path, int quantification_fold) {
auto program = loadParams(model_path); auto program = loadParams(model_path);
char *origin_data = Get_binary_data(param_path); char *origin_data = Get_binary_data(param_path);
char *data = origin_data; char *data = origin_data;
...@@ -193,7 +199,7 @@ quantificate_combined_int8(const std::string &model_path, const std::string &par ...@@ -193,7 +199,7 @@ quantificate_combined_int8(const std::string &model_path, const std::string &par
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue; continue;
} }
LoadWithDumpForInt8(*var_desc, &data, out_file); LoadWithDumpForInt8(*var_desc, &data, out_file, quantification_fold);
} }
} }
} }
...@@ -201,7 +207,7 @@ quantificate_combined_int8(const std::string &model_path, const std::string &par ...@@ -201,7 +207,7 @@ quantificate_combined_int8(const std::string &model_path, const std::string &par
delete origin_data; delete origin_data;
} }
void quantificate_seperated_int8(const std::string model_dir, const std::string param_min_path) { void quantificate_seperated_int8(const std::string model_dir, const std::string param_min_path, int quantification_fold) {
auto program = loadParams(model_dir + "/__model__"); auto program = loadParams(model_dir + "/__model__");
std::string shell_command = "mkdir " + param_min_path; std::string shell_command = "mkdir " + param_min_path;
...@@ -217,7 +223,7 @@ void quantificate_seperated_int8(const std::string model_dir, const std::string ...@@ -217,7 +223,7 @@ void quantificate_seperated_int8(const std::string model_dir, const std::string
FILE *out_file = fopen(file_name.c_str(), "wb"); FILE *out_file = fopen(file_name.c_str(), "wb");
char *origin_data = Get_binary_data(model_dir + "/" + var_desc->Name()); char *origin_data = Get_binary_data(model_dir + "/" + var_desc->Name());
char *data = origin_data; char *data = origin_data;
LoadWithDumpForInt8(*var_desc, &data, out_file); LoadWithDumpForInt8(*var_desc, &data, out_file, quantification_fold);
delete origin_data; delete origin_data;
fclose(out_file); fclose(out_file);
} }
...@@ -225,7 +231,7 @@ void quantificate_seperated_int8(const std::string model_dir, const std::string ...@@ -225,7 +231,7 @@ void quantificate_seperated_int8(const std::string model_dir, const std::string
} }
} }
void LoadWithDumpForFloat32(const paddle_mobile::framework::VarDesc &var_desc, char **dataP, FILE *out_file) { void LoadWithDumpForFloat32(const paddle_mobile::framework::VarDesc &var_desc, char **dataP, FILE *out_file, int quantification_fold) {
// 1. version // 1. version
uint32_t version = *reinterpret_cast<uint32_t *>(*dataP); uint32_t version = *reinterpret_cast<uint32_t *>(*dataP);
...@@ -319,30 +325,36 @@ void LoadWithDumpForFloat32(const paddle_mobile::framework::VarDesc &var_desc, c ...@@ -319,30 +325,36 @@ void LoadWithDumpForFloat32(const paddle_mobile::framework::VarDesc &var_desc, c
} }
*dataP += tensorSize; *dataP += tensorSize;
// for float 32 int step = std::max(memory_size / quantification_fold, 1);
float min_value = std::numeric_limits<float>::max();
float max_value = std::numeric_limits<float>::min();
for (int k = 0; k < memory_size; ++k) { int visited_fold = 0;
min_value = std::min(min_value, static_cast<float *> (memory)[k]); while (visited_fold * step < memory_size) {
max_value = std::max(max_value, static_cast<float *> (memory)[k]); // for float 32
} float min_value = std::numeric_limits<float>::max();
float max_value = std::numeric_limits<float>::min();
float diff = 0.0; for (int k = visited_fold * step; k < std::min((visited_fold + 1) * step, memory_size); ++k) {
for (int g = 0; g < memory_size; ++g) { min_value = std::min(min_value, static_cast<float *> (memory)[k]);
float value = static_cast<float *> (memory)[g]; max_value = std::max(max_value, static_cast<float *> (memory)[k]);
auto factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255); }
float value_quantized = min_value + (factor / 255.0) * (max_value - min_value);
diff += fabs(value - value_quantized); float diff = 0.0;
fwrite(&value_quantized, sizeof(float), 1, out_file); for (int g = visited_fold * step; g < std::min((visited_fold + 1) * step, memory_size); ++g) {
} float value = static_cast<float *> (memory)[g];
if (memory_size > 0) { auto factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255);
std::cout << "avg diff caused by quantization for var " << var_desc.Name() << " is: " << (diff / memory_size) << std::endl; float value_quantized = min_value + (factor / 255.0) * (max_value - min_value);
diff += fabs(value - value_quantized);
fwrite(&value_quantized, sizeof(float), 1, out_file);
}
if (memory_size > 0) {
std::cout << "avg diff caused by quantization for var " << var_desc.Name() << " is: " << (diff / memory_size) << std::endl;
}
visited_fold++;
} }
} }
void void
quantificate_combined_float32(const std::string &model_path, const std::string &param_path, const std::string &param_min_path) { quantificate_combined_float32(const std::string &model_path, const std::string &param_path, const std::string &param_min_path, int quantification_fold) {
auto program = loadParams(model_path); auto program = loadParams(model_path);
char *origin_data = Get_binary_data(param_path); char *origin_data = Get_binary_data(param_path);
char *data = origin_data; char *data = origin_data;
...@@ -353,7 +365,7 @@ quantificate_combined_float32(const std::string &model_path, const std::string & ...@@ -353,7 +365,7 @@ quantificate_combined_float32(const std::string &model_path, const std::string &
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue; continue;
} }
LoadWithDumpForFloat32(*var_desc, &data, out_file); LoadWithDumpForFloat32(*var_desc, &data, out_file, quantification_fold);
} }
} }
} }
...@@ -361,7 +373,7 @@ quantificate_combined_float32(const std::string &model_path, const std::string & ...@@ -361,7 +373,7 @@ quantificate_combined_float32(const std::string &model_path, const std::string &
delete origin_data; delete origin_data;
} }
void quantificate_seperated_float32(const std::string model_dir, const std::string param_min_path) { void quantificate_seperated_float32(const std::string model_dir, const std::string param_min_path, int quantification_fold) {
auto program = loadParams(model_dir + "/__model__"); auto program = loadParams(model_dir + "/__model__");
std::string shell_command = "mkdir " + param_min_path; std::string shell_command = "mkdir " + param_min_path;
...@@ -377,7 +389,7 @@ void quantificate_seperated_float32(const std::string model_dir, const std::stri ...@@ -377,7 +389,7 @@ void quantificate_seperated_float32(const std::string model_dir, const std::stri
FILE *out_file = fopen(file_name.c_str(), "wb"); FILE *out_file = fopen(file_name.c_str(), "wb");
char *origin_data = Get_binary_data(model_dir + "/" + var_desc->Name()); char *origin_data = Get_binary_data(model_dir + "/" + var_desc->Name());
char *data = origin_data; char *data = origin_data;
LoadWithDumpForFloat32(*var_desc, &data, out_file); LoadWithDumpForFloat32(*var_desc, &data, out_file, quantification_fold);
delete origin_data; delete origin_data;
fclose(out_file); fclose(out_file);
} }
...@@ -402,10 +414,15 @@ int main(int argc, char **argv) { ...@@ -402,10 +414,15 @@ int main(int argc, char **argv) {
PADDLE_MOBILE_ENFORCE(argc > 3, "we need your output path. %s ", kNoteEg.c_str()); PADDLE_MOBILE_ENFORCE(argc > 3, "we need your output path. %s ", kNoteEg.c_str());
std::string output_path = argv[3]; std::string output_path = argv[3];
int quantification_fold = 1;
if (argc > 4) {
quantification_fold = std::stoi(argv[4]);
}
if (action_type == "0") { if (action_type == "0") {
// for seperated // for seperated
const std::string &seperated_min_dir = output_path; const std::string &seperated_min_dir = output_path;
quantificate_seperated_int8(base_path, seperated_min_dir); quantificate_seperated_int8(base_path, seperated_min_dir, quantification_fold);
return 0; return 0;
} }
...@@ -414,14 +431,14 @@ int main(int argc, char **argv) { ...@@ -414,14 +431,14 @@ int main(int argc, char **argv) {
const std::string &combined_min_dir = output_path; const std::string &combined_min_dir = output_path;
std::string model_path = base_path + "/model"; std::string model_path = base_path + "/model";
std::string param_path = base_path + "/params"; std::string param_path = base_path + "/params";
quantificate_combined_int8(model_path, param_path, combined_min_dir); quantificate_combined_int8(model_path, param_path, combined_min_dir, quantification_fold);
return 0; return 0;
} }
if (action_type == "2") { if (action_type == "2") {
// for seperated // for seperated
const std::string &seperated_min_dir = output_path; const std::string &seperated_min_dir = output_path;
quantificate_seperated_float32(base_path, seperated_min_dir); quantificate_seperated_float32(base_path, seperated_min_dir, quantification_fold);
return 0; return 0;
} }
...@@ -430,7 +447,7 @@ int main(int argc, char **argv) { ...@@ -430,7 +447,7 @@ int main(int argc, char **argv) {
const std::string &combined_min_dir = output_path; const std::string &combined_min_dir = output_path;
std::string model_path = base_path + "/model"; std::string model_path = base_path + "/model";
std::string param_path = base_path + "/params"; std::string param_path = base_path + "/params";
quantificate_combined_float32(model_path, param_path, combined_min_dir); quantificate_combined_float32(model_path, param_path, combined_min_dir, quantification_fold);
return 0; return 0;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册