Merge branch 'develop' into uni_benchmark

e327951f · Jiawei Wang · GitHub · edf0f601 · cf81c55c · e327951f
隐藏空白更改
内联并排

Showing with 33 addition and 6 deletion

paddle_inference/paddle/include/paddle_engine.h paddle_inference/paddle/include/paddle_engine.h +33 -6

未找到文件。
--- a/paddle_inference/paddle/include/paddle_engine.h
+++ b/paddle_inference/paddle/include/paddle_engine.h
@@ -44,6 +44,18 @@ static const int max_batch = 32;
 static const int min_subgraph_size = 3;
 static PrecisionType precision_type;
+std::shared_ptr<std::vector<paddle::PaddleTensor>> PrepareWarmupData() {
+  auto warmup_data = std::make_shared<std::vector<paddle::PaddleTensor>>(1);
+  paddle::PaddleTensor images;
+  images.name = "image";
+  images.shape = {2, 3, 300, 300};
+  images.dtype = paddle::PaddleDType::FLOAT32;
+  images.data.Resize(sizeof(float) * 2 * 3 * 300 * 300);
+  (*warmup_data)[0] = std::move(images);
+  return warmup_data;
+}
 PrecisionType GetPrecision(const std::string& precision_data) {
  std::string precision_type = predictor::ToLower(precision_data);
  if (precision_type == "fp32") {
@@ -154,6 +166,13 @@ class PaddleInferenceEngine : public EngineCore {
    }
    precision_type = GetPrecision(FLAGS_precision);
+    if (engine_conf.has_enable_ir_optimization() &&
+        !engine_conf.enable_ir_optimization()) {
+      config.SwitchIrOptim(false);
+    } else {
+      config.SwitchIrOptim(true);
+    }
    if (engine_conf.has_use_trt() && engine_conf.use_trt()) {
      if (!engine_conf.has_use_gpu() || !engine_conf.use_gpu()) {
        config.EnableUseGpu(2000, FLAGS_gpuid);
@@ -174,23 +193,31 @@ class PaddleInferenceEngine : public EngineCore {
    if ((!engine_conf.has_use_lite() && !engine_conf.has_use_gpu()) ||
        (engine_conf.has_use_lite() && !engine_conf.use_lite() &&
         engine_conf.has_use_gpu() && !engine_conf.use_gpu())) {
+#ifdef WITH_MKLML
+#ifdef WITH_MKLDNN
+      config.EnableMKLDNN();
+      config.SwitchIrOptim(true);
+      config.DisableGpu();
+      // config.SetCpuMathLibraryNumThreads(2);
      if (precision_type == PrecisionType::kInt8) {
        config.EnableMkldnnQuantizer();
+        auto quantizer_config = config.mkldnn_quantizer_config();
+        // TODO: warmup data
+        // quantizer_config -> SetWarmupData();
+        // quantizer_config -> SetWarmupBatchSize();
+        // quantizer_config -> SetEnabledOpTypes(4);
      } else if (precision_type == PrecisionType::kHalf) {
        config.EnableMkldnnBfloat16();
      }
+#endif
+#endif
    }
    if (engine_conf.has_use_xpu() && engine_conf.use_xpu()) {
      // 2 MB l3 cache
      config.EnableXpu(2 * 1024 * 1024);
    }
-    if (engine_conf.has_enable_ir_optimization() &&
-        !engine_conf.enable_ir_optimization()) {
-      config.SwitchIrOptim(false);
-    } else {
-      config.SwitchIrOptim(true);
-    }
    if (engine_conf.has_enable_memory_optimization() &&
        engine_conf.enable_memory_optimization()) {