未验证 提交 d71f1b3f 编写于 作者: Y yeliang2258 提交者: GitHub

Remove calibration file path when deploy quantize model (#46283)

* remove calibration file path

* remove useless code
上级 1ef1cace
...@@ -716,10 +716,6 @@ void QuantDequantMkldnnPass::ApplyImpl(ir::Graph* graph) const { ...@@ -716,10 +716,6 @@ void QuantDequantMkldnnPass::ApplyImpl(ir::Graph* graph) const {
std::unordered_map<std::string, std::vector<float>> var_quant_scales{}; std::unordered_map<std::string, std::vector<float>> var_quant_scales{};
bool onnx_format_quantize_model = false; bool onnx_format_quantize_model = false;
auto* scope = param_scope(); auto* scope = param_scope();
GetInfoFromTheFirstOp(
graph, "has_quant_info", "var_quant_scales", &var_quant_scales);
VLOG(1) << "The nums of scale info from slim txt is: "
<< var_quant_scales.size();
MarkSkipQuantizedOps(graph, skip_ops); MarkSkipQuantizedOps(graph, skip_ops);
CollectInfoFromFake(graph, scope, fake_dequantize_types, &weight_thresholds); CollectInfoFromFake(graph, scope, fake_dequantize_types, &weight_thresholds);
CollectWeightScalesInfoFromONNXFormatDequantize(graph, CollectWeightScalesInfoFromONNXFormatDequantize(graph,
......
...@@ -177,9 +177,6 @@ struct Argument { ...@@ -177,9 +177,6 @@ struct Argument {
DECL_ARGUMENT_FIELD(mkldnn_cache_capacity, MkldnnCacheCapacity, int); DECL_ARGUMENT_FIELD(mkldnn_cache_capacity, MkldnnCacheCapacity, int);
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
// Calibration file path of quantize model
DECL_ARGUMENT_FIELD(calibration_file_path, CalibrationFilePath, std::string);
// A set of op types to enable their quantized kernels // A set of op types to enable their quantized kernels
DECL_ARGUMENT_FIELD(quantize_enabled_op_types, DECL_ARGUMENT_FIELD(quantize_enabled_op_types,
QuantizeEnabledOpTypes, QuantizeEnabledOpTypes,
......
...@@ -36,19 +36,6 @@ void IrAnalysisPass::RunImpl(Argument* argument) { ...@@ -36,19 +36,6 @@ void IrAnalysisPass::RunImpl(Argument* argument) {
auto* the_graph = argument->ReleaseMainGraph(); auto* the_graph = argument->ReleaseMainGraph();
auto graph = std::unique_ptr<Graph>(the_graph); auto graph = std::unique_ptr<Graph>(the_graph);
#ifdef PADDLE_WITH_MKLDNN
if (argument->Has("calibration_file_path")) {
VLOG(5) << "Calibration file path of quantize model: "
<< argument->calibration_file_path();
std::unordered_map<std::string, std::vector<float>> var_quant_scales{};
ReadCalibrationInfo(argument, &var_quant_scales);
// save var_quant_scales in the first op's attr
// for quant_dequant_mkldnn_pass
SaveInfoInTheFirstOp(
the_graph, "has_quant_info", "var_quant_scales", var_quant_scales);
}
#endif
// Apply passes. // Apply passes.
IRPassManager the_ir_manager(argument); IRPassManager the_ir_manager(argument);
graph = the_ir_manager.Apply(std::move(graph)); graph = the_ir_manager.Apply(std::move(graph));
...@@ -61,40 +48,6 @@ void IrAnalysisPass::RunImpl(Argument* argument) { ...@@ -61,40 +48,6 @@ void IrAnalysisPass::RunImpl(Argument* argument) {
CollectFusionStatis(argument); CollectFusionStatis(argument);
} }
void IrAnalysisPass::ReadCalibrationInfo(
Argument* argument,
std::unordered_map<std::string, std::vector<float>>* var_quant_scales) {
std::string calibration_file_path;
#ifdef PADDLE_WITH_MKLDNN
if (argument->Has("calibration_file_path")) {
calibration_file_path = argument->calibration_file_path();
}
#endif
if (calibration_file_path.empty()) {
LOG(INFO) << "argument has no calibration_file_path";
return;
}
std::ifstream calibration_file(calibration_file_path);
std::string one_line;
while (getline(calibration_file, one_line)) {
if (one_line.find(" ") != one_line.npos) {
auto pos = one_line.find(" ");
std::string pre_str = one_line.substr(0, pos);
std::string pos_str = one_line.substr(pos);
if (pre_str.size() && pos_str.size()) {
std::string tensor_name = pre_str;
float scale = std::stod(pos_str);
scale = 1.0 / scale;
if (std::isinf(scale) || std::isnan(scale)) {
continue;
}
std::vector<float> scales = {scale};
(*var_quant_scales)[tensor_name] = scales;
}
}
}
}
void IrAnalysisPass::CollectFusionStatis(Argument* argument) { void IrAnalysisPass::CollectFusionStatis(Argument* argument) {
if (!argument->main_graph().Has(framework::ir::kFuseStatisAttr)) { if (!argument->main_graph().Has(framework::ir::kFuseStatisAttr)) {
LOG(INFO) << "argument has no fuse statis"; LOG(INFO) << "argument has no fuse statis";
......
...@@ -246,7 +246,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { ...@@ -246,7 +246,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(opt_cache_dir_); CP_MEMBER(opt_cache_dir_);
CP_MEMBER(prog_file_); CP_MEMBER(prog_file_);
CP_MEMBER(params_file_); CP_MEMBER(params_file_);
CP_MEMBER(calibration_file_path_);
CP_MEMBER(use_fc_padding_); CP_MEMBER(use_fc_padding_);
// GPU related. // GPU related.
...@@ -518,14 +517,6 @@ void AnalysisConfig::EnableMkldnnInt8( ...@@ -518,14 +517,6 @@ void AnalysisConfig::EnableMkldnnInt8(
Update(); Update();
} }
void AnalysisConfig::SetCalibrationFilePath(
const std::string &calibration_file_path) {
calibration_file_path_ = calibration_file_path;
VLOG(1) << "Set calibration file path of quantize model: " +
calibration_file_path_;
Update();
}
MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const { MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const {
PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_, PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_,
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
...@@ -850,8 +841,6 @@ std::string AnalysisConfig::SerializeInfoCache() { ...@@ -850,8 +841,6 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << prog_file_; ss << prog_file_;
ss << params_file_; ss << params_file_;
ss << calibration_file_path_;
ss << use_gpu_; ss << use_gpu_;
ss << use_external_stream_; ss << use_external_stream_;
ss << exec_stream_; ss << exec_stream_;
...@@ -1039,9 +1028,6 @@ std::string AnalysisConfig::Summary() { ...@@ -1039,9 +1028,6 @@ std::string AnalysisConfig::Summary() {
os.InsertRow({"model_file", prog_file_}); os.InsertRow({"model_file", prog_file_});
os.InsertRow({"params_file", params_file_}); os.InsertRow({"params_file", params_file_});
} }
if (!(calibration_file_path_.empty())) {
os.InsertRow({"calibration_file_path", calibration_file_path_});
}
if (model_from_memory_) { if (model_from_memory_) {
os.InsertRow({"model_from_memory", params_file_}); os.InsertRow({"model_from_memory", params_file_});
......
...@@ -1189,7 +1189,6 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -1189,7 +1189,6 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetQuantizeEnabledOpTypes(config_.quantize_enabled_op_types_); argument_.SetQuantizeEnabledOpTypes(config_.quantize_enabled_op_types_);
argument_.SetQuantizeExcludedOpIds(config_.quantize_excluded_op_ids_); argument_.SetQuantizeExcludedOpIds(config_.quantize_excluded_op_ids_);
argument_.SetQuantVarScales({}); argument_.SetQuantVarScales({});
argument_.SetCalibrationFilePath(config_.calibration_file_path_);
} }
#endif #endif
......
...@@ -770,18 +770,6 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -770,18 +770,6 @@ struct PD_INFER_DECL AnalysisConfig {
/// ///
void EnableMkldnnQuantizer(); void EnableMkldnnQuantizer();
///
/// \brief Set the calibration ranges file path of quantize model.
///
///
void SetCalibrationFilePath(const std::string& calibration_file_path = "");
///
/// \brief Return the calibration ranges file path of quantize model.
///
///
std::string CalibrationFilePath() { return calibration_file_path_; }
/// ///
/// \brief Turn on MKLDNN int8. /// \brief Turn on MKLDNN int8.
/// ///
...@@ -960,7 +948,6 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -960,7 +948,6 @@ struct PD_INFER_DECL AnalysisConfig {
std::string model_dir_; std::string model_dir_;
mutable std::string prog_file_; mutable std::string prog_file_;
mutable std::string params_file_; mutable std::string params_file_;
mutable std::string calibration_file_path_;
// Mixed precision. // Mixed precision.
std::unordered_set<std::string> mixed_black_list_; std::unordered_set<std::string> mixed_black_list_;
......
...@@ -782,9 +782,6 @@ void BindAnalysisConfig(py::module *m) { ...@@ -782,9 +782,6 @@ void BindAnalysisConfig(py::module *m) {
.def("to_native_config", &AnalysisConfig::ToNativeConfig) .def("to_native_config", &AnalysisConfig::ToNativeConfig)
.def("enable_quantizer", &AnalysisConfig::EnableMkldnnQuantizer) .def("enable_quantizer", &AnalysisConfig::EnableMkldnnQuantizer)
.def("enable_mkldnn_bfloat16", &AnalysisConfig::EnableMkldnnBfloat16) .def("enable_mkldnn_bfloat16", &AnalysisConfig::EnableMkldnnBfloat16)
.def("set_calibration_file_path",
&AnalysisConfig::SetCalibrationFilePath,
py::arg("calibration_file_path") = std::string(""))
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
.def("quantizer_config", .def("quantizer_config",
&AnalysisConfig::mkldnn_quantizer_config, &AnalysisConfig::mkldnn_quantizer_config,
......
...@@ -210,9 +210,6 @@ class TestPostTrainingQuantization(unittest.TestCase): ...@@ -210,9 +210,6 @@ class TestPostTrainingQuantization(unittest.TestCase):
config.set_cpu_math_library_num_threads(1) config.set_cpu_math_library_num_threads(1)
config.disable_glog_info() config.disable_glog_info()
if is_quantized_model: if is_quantized_model:
calibration_file_path = os.path.join(model_path,
'calibration_table.txt')
config.set_calibration_file_path(calibration_file_path)
config.enable_mkldnn_int8() config.enable_mkldnn_int8()
predictor = paddle.inference.create_predictor(config) predictor = paddle.inference.create_predictor(config)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册