未验证 提交 4bad9853 编写于 作者: Y Yanzhan Yang 提交者: GitHub

1. improve n-fold quantification algorithm by introducing a minimal size for...

1. improve n-fold quantification algorithm by introducing a minimal size for each fold. 2. automatically search for best n for n-fold algorithm. (#2167)
上级 2dc3b4d7
...@@ -178,6 +178,8 @@ static void LoadMemInternal(void **in_data, void *out_data, int64_t size, ...@@ -178,6 +178,8 @@ static void LoadMemInternal(void **in_data, void *out_data, int64_t size,
char **data_buf = reinterpret_cast<char **>(in_data); char **data_buf = reinterpret_cast<char **>(in_data);
T *tensor_data = reinterpret_cast<T *>(out_data); T *tensor_data = reinterpret_cast<T *>(out_data);
if (quant_uint8) { if (quant_uint8) {
const int minimal_fold_size = 2;
quant_fold = fmin(fmax(1, size / minimal_fold_size), quant_fold);
int step = fmax(size / quant_fold, 1); int step = fmax(size / quant_fold, 1);
int visited_fold = 0; int visited_fold = 0;
while (visited_fold * step < size) { while (visited_fold * step < size) {
......
...@@ -17,6 +17,22 @@ ...@@ -17,6 +17,22 @@
const size_t kSize64 = sizeof(uint64_t); const size_t kSize64 = sizeof(uint64_t);
const size_t kSize32 = sizeof(uint32_t); const size_t kSize32 = sizeof(uint32_t);
const int minimal_fold_size = 2;
float max_entropy = 0.0;
float entropy(std::vector<uint8_t> &factors) {
int n = factors.size();
std::vector<int> counts(256);
for (uint8_t &factor : factors) {
counts[factor]++;
}
float res = 1.0;
float shift = 100000.0;
for (int i = 0; i < 256; i++) {
res *= (counts[i] + shift) / (n + shift);
}
return 1.0 / res;
}
char *Get_binary_data(const std::string &filename) { char *Get_binary_data(const std::string &filename) {
...@@ -162,6 +178,7 @@ void LoadWithDumpForInt8(const paddle_mobile::framework::VarDesc &var_desc, char ...@@ -162,6 +178,7 @@ void LoadWithDumpForInt8(const paddle_mobile::framework::VarDesc &var_desc, char
} }
*dataP += tensorSize; *dataP += tensorSize;
quantification_fold = std::min(std::max(1, memory_size / minimal_fold_size), quantification_fold);
int step = std::max(memory_size / quantification_fold, 1); int step = std::max(memory_size / quantification_fold, 1);
int visited_fold = 0; int visited_fold = 0;
...@@ -178,11 +195,14 @@ void LoadWithDumpForInt8(const paddle_mobile::framework::VarDesc &var_desc, char ...@@ -178,11 +195,14 @@ void LoadWithDumpForInt8(const paddle_mobile::framework::VarDesc &var_desc, char
fwrite(&min_value, sizeof(float), 1, out_file); fwrite(&min_value, sizeof(float), 1, out_file);
fwrite(&max_value, sizeof(float), 1, out_file); fwrite(&max_value, sizeof(float), 1, out_file);
std::vector<uint8_t> factors;
for (int g = visited_fold * step; g < std::min((visited_fold + 1) * step, memory_size); ++g) { for (int g = visited_fold * step; g < std::min((visited_fold + 1) * step, memory_size); ++g) {
float value = static_cast<float *> (memory)[g]; float value = static_cast<float *> (memory)[g];
auto factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255); auto factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255);
factors.push_back(factor);
fwrite(&factor, sizeof(uint8_t), 1, out_file); fwrite(&factor, sizeof(uint8_t), 1, out_file);
} }
max_entropy = fmax(max_entropy, entropy(factors));
visited_fold++; visited_fold++;
} }
} }
...@@ -325,6 +345,7 @@ void LoadWithDumpForFloat32(const paddle_mobile::framework::VarDesc &var_desc, c ...@@ -325,6 +345,7 @@ void LoadWithDumpForFloat32(const paddle_mobile::framework::VarDesc &var_desc, c
} }
*dataP += tensorSize; *dataP += tensorSize;
quantification_fold = std::min(std::max(1, memory_size / minimal_fold_size), quantification_fold);
int step = std::max(memory_size / quantification_fold, 1); int step = std::max(memory_size / quantification_fold, 1);
int visited_fold = 0; int visited_fold = 0;
...@@ -339,13 +360,16 @@ void LoadWithDumpForFloat32(const paddle_mobile::framework::VarDesc &var_desc, c ...@@ -339,13 +360,16 @@ void LoadWithDumpForFloat32(const paddle_mobile::framework::VarDesc &var_desc, c
} }
float diff = 0.0; float diff = 0.0;
std::vector<uint8_t> factors;
for (int g = visited_fold * step; g < std::min((visited_fold + 1) * step, memory_size); ++g) { for (int g = visited_fold * step; g < std::min((visited_fold + 1) * step, memory_size); ++g) {
float value = static_cast<float *> (memory)[g]; float value = static_cast<float *> (memory)[g];
auto factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255); auto factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255);
factors.push_back(factor);
float value_quantized = min_value + (factor / 255.0) * (max_value - min_value); float value_quantized = min_value + (factor / 255.0) * (max_value - min_value);
diff += fabs(value - value_quantized); diff += fabs(value - value_quantized);
fwrite(&value_quantized, sizeof(float), 1, out_file); fwrite(&value_quantized, sizeof(float), 1, out_file);
} }
max_entropy = fmax(max_entropy, entropy(factors));
if (memory_size > 0) { if (memory_size > 0) {
std::cout << "avg diff caused by quantization for var " << var_desc.Name() << " is: " << (diff / memory_size) << std::endl; std::cout << "avg diff caused by quantization for var " << var_desc.Name() << " is: " << (diff / memory_size) << std::endl;
} }
...@@ -432,6 +456,7 @@ int main(int argc, char **argv) { ...@@ -432,6 +456,7 @@ int main(int argc, char **argv) {
std::string model_path = base_path + "/model"; std::string model_path = base_path + "/model";
std::string param_path = base_path + "/params"; std::string param_path = base_path + "/params";
quantificate_combined_int8(model_path, param_path, combined_min_dir, quantification_fold); quantificate_combined_int8(model_path, param_path, combined_min_dir, quantification_fold);
std::cout << "max entropy : " << max_entropy << std::endl;
return 0; return 0;
} }
......
此差异已折叠。
# -*- coding: utf-8 -*
import os
import sys
import math
import subprocess
import numpy as np
import paddle.fluid as fluid
def sh(command):
pipe = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
return pipe.stdout.read().decode("utf-8")
for fold in range(100, 1001, 100):
print("checking fold : {}".format(fold))
max_entropy = sh("./quantify 1 model params {}".format(fold))
print("max entropy :", max_entropy, end="")
sh("rm -rf scripts/model")
sh("rm -rf scripts/quantification_model")
sh("cp -r model scripts/model")
sh("cp -r model scripts/quantification_model")
sh("mv params scripts/quantification_model")
diff = sh("cd scripts && python run.py {}".format(fold))
print("output diff :", diff, end="")
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册