From d71f1b3fb0053544fd76cd4d5dfd512ed62fbd83 Mon Sep 17 00:00:00 2001 From: yeliang2258 <30516196+yeliang2258@users.noreply.github.com> Date: Thu, 29 Sep 2022 11:23:31 +0800 Subject: [PATCH] Remove calibration file path when deploy quantize model (#46283) * remove calibration file path * remove useless code --- .../ir/mkldnn/quant_dequant_mkldnn_pass.cc | 4 -- paddle/fluid/inference/analysis/argument.h | 3 -- .../analysis/passes/ir_analysis_pass.cc | 47 ------------------- paddle/fluid/inference/api/analysis_config.cc | 14 ------ .../fluid/inference/api/analysis_predictor.cc | 1 - .../inference/api/paddle_analysis_config.h | 13 ----- paddle/fluid/pybind/inference_api.cc | 3 -- ...st_onnx_format_quantization_mobilenetv1.py | 3 -- 8 files changed, 88 deletions(-) mode change 100644 => 100755 paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc mode change 100644 => 100755 paddle/fluid/inference/api/analysis_config.cc mode change 100755 => 100644 paddle/fluid/inference/api/paddle_analysis_config.h diff --git a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc old mode 100644 new mode 100755 index abe51960183..65c64af4642 --- a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc @@ -716,10 +716,6 @@ void QuantDequantMkldnnPass::ApplyImpl(ir::Graph* graph) const { std::unordered_map> var_quant_scales{}; bool onnx_format_quantize_model = false; auto* scope = param_scope(); - GetInfoFromTheFirstOp( - graph, "has_quant_info", "var_quant_scales", &var_quant_scales); - VLOG(1) << "The nums of scale info from slim txt is: " - << var_quant_scales.size(); MarkSkipQuantizedOps(graph, skip_ops); CollectInfoFromFake(graph, scope, fake_dequantize_types, &weight_thresholds); CollectWeightScalesInfoFromONNXFormatDequantize(graph, diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 871718eff14..39e844aacb1 100755 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -177,9 +177,6 @@ struct Argument { DECL_ARGUMENT_FIELD(mkldnn_cache_capacity, MkldnnCacheCapacity, int); #ifdef PADDLE_WITH_MKLDNN - // Calibration file path of quantize model - DECL_ARGUMENT_FIELD(calibration_file_path, CalibrationFilePath, std::string); - // A set of op types to enable their quantized kernels DECL_ARGUMENT_FIELD(quantize_enabled_op_types, QuantizeEnabledOpTypes, diff --git a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc index 53398a69536..2b2b0ab5cab 100644 --- a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc @@ -36,19 +36,6 @@ void IrAnalysisPass::RunImpl(Argument* argument) { auto* the_graph = argument->ReleaseMainGraph(); auto graph = std::unique_ptr(the_graph); -#ifdef PADDLE_WITH_MKLDNN - if (argument->Has("calibration_file_path")) { - VLOG(5) << "Calibration file path of quantize model: " - << argument->calibration_file_path(); - std::unordered_map> var_quant_scales{}; - ReadCalibrationInfo(argument, &var_quant_scales); - // save var_quant_scales in the first op's attr - // for quant_dequant_mkldnn_pass - SaveInfoInTheFirstOp( - the_graph, "has_quant_info", "var_quant_scales", var_quant_scales); - } -#endif - // Apply passes. IRPassManager the_ir_manager(argument); graph = the_ir_manager.Apply(std::move(graph)); @@ -61,40 +48,6 @@ void IrAnalysisPass::RunImpl(Argument* argument) { CollectFusionStatis(argument); } -void IrAnalysisPass::ReadCalibrationInfo( - Argument* argument, - std::unordered_map>* var_quant_scales) { - std::string calibration_file_path; -#ifdef PADDLE_WITH_MKLDNN - if (argument->Has("calibration_file_path")) { - calibration_file_path = argument->calibration_file_path(); - } -#endif - if (calibration_file_path.empty()) { - LOG(INFO) << "argument has no calibration_file_path"; - return; - } - std::ifstream calibration_file(calibration_file_path); - std::string one_line; - while (getline(calibration_file, one_line)) { - if (one_line.find(" ") != one_line.npos) { - auto pos = one_line.find(" "); - std::string pre_str = one_line.substr(0, pos); - std::string pos_str = one_line.substr(pos); - if (pre_str.size() && pos_str.size()) { - std::string tensor_name = pre_str; - float scale = std::stod(pos_str); - scale = 1.0 / scale; - if (std::isinf(scale) || std::isnan(scale)) { - continue; - } - std::vector scales = {scale}; - (*var_quant_scales)[tensor_name] = scales; - } - } - } -} - void IrAnalysisPass::CollectFusionStatis(Argument* argument) { if (!argument->main_graph().Has(framework::ir::kFuseStatisAttr)) { LOG(INFO) << "argument has no fuse statis"; diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc old mode 100644 new mode 100755 index 97f6d81e592..f8c53353ca2 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -246,7 +246,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(opt_cache_dir_); CP_MEMBER(prog_file_); CP_MEMBER(params_file_); - CP_MEMBER(calibration_file_path_); CP_MEMBER(use_fc_padding_); // GPU related. @@ -518,14 +517,6 @@ void AnalysisConfig::EnableMkldnnInt8( Update(); } -void AnalysisConfig::SetCalibrationFilePath( - const std::string &calibration_file_path) { - calibration_file_path_ = calibration_file_path; - VLOG(1) << "Set calibration file path of quantize model: " + - calibration_file_path_; - Update(); -} - MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const { PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_, platform::errors::PreconditionNotMet( @@ -850,8 +841,6 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << prog_file_; ss << params_file_; - ss << calibration_file_path_; - ss << use_gpu_; ss << use_external_stream_; ss << exec_stream_; @@ -1039,9 +1028,6 @@ std::string AnalysisConfig::Summary() { os.InsertRow({"model_file", prog_file_}); os.InsertRow({"params_file", params_file_}); } - if (!(calibration_file_path_.empty())) { - os.InsertRow({"calibration_file_path", calibration_file_path_}); - } if (model_from_memory_) { os.InsertRow({"model_from_memory", params_file_}); diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 1df1425de24..5805b6a2be3 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1189,7 +1189,6 @@ void AnalysisPredictor::PrepareArgument() { argument_.SetQuantizeEnabledOpTypes(config_.quantize_enabled_op_types_); argument_.SetQuantizeExcludedOpIds(config_.quantize_excluded_op_ids_); argument_.SetQuantVarScales({}); - argument_.SetCalibrationFilePath(config_.calibration_file_path_); } #endif diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h old mode 100755 new mode 100644 index 5f75636d854..b92802a7bf0 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -770,18 +770,6 @@ struct PD_INFER_DECL AnalysisConfig { /// void EnableMkldnnQuantizer(); - /// - /// \brief Set the calibration ranges file path of quantize model. - /// - /// - void SetCalibrationFilePath(const std::string& calibration_file_path = ""); - - /// - /// \brief Return the calibration ranges file path of quantize model. - /// - /// - std::string CalibrationFilePath() { return calibration_file_path_; } - /// /// \brief Turn on MKLDNN int8. /// @@ -960,7 +948,6 @@ struct PD_INFER_DECL AnalysisConfig { std::string model_dir_; mutable std::string prog_file_; mutable std::string params_file_; - mutable std::string calibration_file_path_; // Mixed precision. std::unordered_set mixed_black_list_; diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 094b78753a7..221d0af5428 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -782,9 +782,6 @@ void BindAnalysisConfig(py::module *m) { .def("to_native_config", &AnalysisConfig::ToNativeConfig) .def("enable_quantizer", &AnalysisConfig::EnableMkldnnQuantizer) .def("enable_mkldnn_bfloat16", &AnalysisConfig::EnableMkldnnBfloat16) - .def("set_calibration_file_path", - &AnalysisConfig::SetCalibrationFilePath, - py::arg("calibration_file_path") = std::string("")) #ifdef PADDLE_WITH_MKLDNN .def("quantizer_config", &AnalysisConfig::mkldnn_quantizer_config, diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_onnx_format_quantization_mobilenetv1.py b/python/paddle/fluid/tests/unittests/mkldnn/test_onnx_format_quantization_mobilenetv1.py index aa1d35f50c5..82c3338fab5 100755 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_onnx_format_quantization_mobilenetv1.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_onnx_format_quantization_mobilenetv1.py @@ -210,9 +210,6 @@ class TestPostTrainingQuantization(unittest.TestCase): config.set_cpu_math_library_num_threads(1) config.disable_glog_info() if is_quantized_model: - calibration_file_path = os.path.join(model_path, - 'calibration_table.txt') - config.set_calibration_file_path(calibration_file_path) config.enable_mkldnn_int8() predictor = paddle.inference.create_predictor(config) -- GitLab