diff --git a/paddle_inference/paddle/include/paddle_engine.h b/paddle_inference/paddle/include/paddle_engine.h index 8c3370c4bdf7f7030b580cc5b58907290d071965..262a0378bef5caacbfdf5a3d2b46ed6ce598cb10 100755 --- a/paddle_inference/paddle/include/paddle_engine.h +++ b/paddle_inference/paddle/include/paddle_engine.h @@ -44,6 +44,18 @@ static const int max_batch = 32; static const int min_subgraph_size = 3; static PrecisionType precision_type; +std::shared_ptr> PrepareWarmupData() { + auto warmup_data = std::make_shared>(1); + paddle::PaddleTensor images; + images.name = "image"; + images.shape = {2, 3, 300, 300}; + images.dtype = paddle::PaddleDType::FLOAT32; + images.data.Resize(sizeof(float) * 2 * 3 * 300 * 300); + + (*warmup_data)[0] = std::move(images); + return warmup_data; +} + PrecisionType GetPrecision(const std::string& precision_data) { std::string precision_type = predictor::ToLower(precision_data); if (precision_type == "fp32") { @@ -154,6 +166,13 @@ class PaddleInferenceEngine : public EngineCore { } precision_type = GetPrecision(FLAGS_precision); + if (engine_conf.has_enable_ir_optimization() && + !engine_conf.enable_ir_optimization()) { + config.SwitchIrOptim(false); + } else { + config.SwitchIrOptim(true); + } + if (engine_conf.has_use_trt() && engine_conf.use_trt()) { if (!engine_conf.has_use_gpu() || !engine_conf.use_gpu()) { config.EnableUseGpu(2000, FLAGS_gpuid); @@ -174,23 +193,31 @@ class PaddleInferenceEngine : public EngineCore { if ((!engine_conf.has_use_lite() && !engine_conf.has_use_gpu()) || (engine_conf.has_use_lite() && !engine_conf.use_lite() && engine_conf.has_use_gpu() && !engine_conf.use_gpu())) { +#ifdef WITH_MKLML +#ifdef WITH_MKLDNN + config.EnableMKLDNN(); + config.SwitchIrOptim(true); + config.DisableGpu(); + // config.SetCpuMathLibraryNumThreads(2); + if (precision_type == PrecisionType::kInt8) { config.EnableMkldnnQuantizer(); + auto quantizer_config = config.mkldnn_quantizer_config(); + // TODO: warmup data + // quantizer_config -> SetWarmupData(); + // quantizer_config -> SetWarmupBatchSize(); + // quantizer_config -> SetEnabledOpTypes(4); } else if (precision_type == PrecisionType::kHalf) { config.EnableMkldnnBfloat16(); } +#endif +#endif } if (engine_conf.has_use_xpu() && engine_conf.use_xpu()) { // 2 MB l3 cache config.EnableXpu(2 * 1024 * 1024); } - if (engine_conf.has_enable_ir_optimization() && - !engine_conf.enable_ir_optimization()) { - config.SwitchIrOptim(false); - } else { - config.SwitchIrOptim(true); - } if (engine_conf.has_enable_memory_optimization() && engine_conf.enable_memory_optimization()) {