[MKLDNN] Develop fake data demo (#943)

f29efe4b · lidanqing · GitHub · 041d6211 · f29efe4b · f29efe4b
4 changed file
--- a/demo/mkldnn_quant/CMakeLists.txt
+++ b/demo/mkldnn_quant/CMakeLists.txt
@@ -10,6 +10,7 @@ if(NOT DEFINED PADDLE_LIB)
  message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib")
 endif()
 set(DEMO_NAME sample_tester)
+set(DEMO_NAME_FAKE_DATA sample_tester_fake_data)
 if(NOT DEFINED DEMO_NAME)
  message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name")
 endif()
@@ -31,6 +32,7 @@ link_directories("${PADDLE_LIB}/third_party/install/cryptopp/lib")
 link_directories("${PADDLE_LIB}/third_party/install/utf8proc/lib")

 add_executable(${DEMO_NAME} ${DEMO_NAME}.cc)
+add_executable(${DEMO_NAME_FAKE_DATA} ${DEMO_NAME_FAKE_DATA}.cc)

 if(WITH_MKL)
  include_directories("${PADDLE_LIB}/third_party/install/mklml/include")
@@ -61,3 +63,4 @@ set(DEPS ${DEPS}
    ${EXTERNAL_LIB})

 target_link_libraries(${DEMO_NAME} ${DEPS})
+target_link_libraries(${DEMO_NAME_FAKE_DATA} ${DEPS})
--- a/demo/mkldnn_quant/README.md
+++ b/demo/mkldnn_quant/README.md
@@ -104,6 +104,7 @@ val/ILSVRC2012_val_00000002.jpg 0
 ```

 注意：
+- 为了方便测试，你可以直接下载我们上传的二进制100张图片来验证精度：`wget http://paddle-inference-dist.bj.bcebos.com/int8/imagenet_val_100_tail.tar.gz`
 - 为什么将数据集转化为二进制文件？因为paddle中的数据预处理（resize, crop等）都使用pythong.Image模块进行，训练出的模型也是基于Python预处理的图片，但是我们发现Python测试性能开销很大，导致预测性能下降。为了获得良好性能，在量化模型预测阶段，我们决定使用C++测试，而C++只支持Open-CV等库，Paddle不建议使用外部库，因此我们使用Python将图片预处理然后放入二进制文件，再在C++测试中读出。用户根据自己的需要，可以更改C++测试以直接读数据并预处理，精度不会有太大下降。我们还提供了python测试`sample_tester.py`作为参考，与C++测试`sample_tester.cc`相比，用户可以看到Python测试更大的性能开销。

 ### 4.2 部署预测

--- a/demo/mkldnn_quant/run_dummy.sh
+++ b/demo/mkldnn_quant/run_dummy.sh
+#!/bin/bash
+MODEL_DIR=$1
+default_num_threads=1
+num_threads=${2:-$default_num_threads}
+default_batch_size=1
+batch_size=${3:-default_batch_size}
+default_with_accuracy=false
+with_accuracy_layer=${4:-$default_with_accuracy}
+default_with_analysis=true
+with_analysis=${5:-$default_with_analysis}
+default_enable_mkldnn_bfloat16=false
+with_mkldnn_bfloat16=${6:-$default_enable_mkldnn_bfloat16}
+ITERATIONS=0
+
+GLOG_logtostderr=1 ./build/sample_tester_fake_data \
+    --infer_model=${MODEL_DIR} \
+    --batch_size=${batch_size} \
+    --num_threads=${num_threads} \
+    --iterations=${ITERATIONS} \
+    --with_accuracy_layer=${with_accuracy_layer} \
+    --use_analysis=${with_analysis} \
+    --enable_mkldnn_bfloat16=${with_mkldnn_bfloat16}
+
+# KMP_BLOCKTIME=1 KMP_SETTINGS=1 KMP_AFFINITY=granularity=fine,verbose,compact,1,0 numactl bash run_dummy.sh INT8 1 1 false false false
--- a/demo/mkldnn_quant/sample_tester_fake_data.cc
+++ b/demo/mkldnn_quant/sample_tester_fake_data.cc
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+#include <paddle_inference_api.h>
+#include <algorithm>
+#include <chrono>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <numeric>
+#include <sstream>
+#include <string>
+#include <vector>
+
+DEFINE_string(infer_model, "", "path to the model");
+DEFINE_string(infer_data, "", "path to the input data");
+DEFINE_int32(batch_size, 1, "inference batch size");
+DEFINE_int32(iterations,
+             0,
+             "number of batches to process. 0 means testing whole dataset");
+DEFINE_int32(num_threads, 1, "num of threads to run in parallel");
+DEFINE_bool(with_accuracy_layer,
+            true,
+            "Set with_accuracy_layer to true if provided model has accuracy "
+            "layer and requires label input");
+DEFINE_bool(use_analysis,
+            false,
+            "If use_analysis is set to true, the model will be optimized");
+DEFINE_int32(warmup_iter, 2, "number of warmup batches");            
+DEFINE_bool(enable_mkldnn_bfloat16, false, "If enable_mkldnn_bfloat16 is set to true, will start bf16 inference");
+
+struct Timer {
+  std::chrono::high_resolution_clock::time_point start;
+  std::chrono::high_resolution_clock::time_point startu;
+
+  void tic() { start = std::chrono::high_resolution_clock::now(); }
+  double toc() {
+    startu = std::chrono::high_resolution_clock::now();
+    std::chrono::duration<double> time_span =
+        std::chrono::duration_cast<std::chrono::duration<double>>(startu -
+                                                                  start);
+    double used_time_ms = static_cast<double>(time_span.count()) * 1000.0;
+    return used_time_ms;
+  }
+};
+
+int main(int argc, char *argv[]) {
+  // InitFLAGS(argc, argv);
+  google::InitGoogleLogging(*argv);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
+
+  paddle::AnalysisConfig cfg;
+  cfg.SetModel(FLAGS_infer_model);
+  cfg.SetCpuMathLibraryNumThreads(FLAGS_num_threads);
+  if (FLAGS_use_analysis) {
+    cfg.DisableGpu();
+    cfg.SwitchIrOptim();
+    cfg.EnableMKLDNN();
+    if (FLAGS_enable_mkldnn_bfloat16){
+    	cfg.EnableMkldnnBfloat16();
+    }
+  }
+  cfg.SwitchUseFeedFetchOps(false);
+
+  auto predictor = paddle::CreatePaddlePredictor(cfg);
+  int batch_size = FLAGS_batch_size;
+  std::cout<<"Batch size " << FLAGS_batch_size << std::endl ;
+  int channels = 3;
+  int height = 224;
+  int width = 224;
+  int nums = batch_size * channels * height * width;
+  auto input_shape = {batch_size, channels, height, width};
+  float* input = new float[nums];
+  for (int i = 0; i < nums; ++i) input[i] = 0;
+  auto input_names = predictor->GetInputNames();
+
+  auto input_t = predictor->GetInputTensor(input_names[0]);
+  input_t->Reshape(input_shape);
+  input_t->copy_from_cpu<float>(input);
+  if (FLAGS_with_accuracy_layer){
+    int64_t* label = new int64_t[batch_size];
+    for (auto i=0;i<batch_size;i++) label[i]=batch_size%10+1;
+    auto input_l = predictor->GetInputTensor(input_names[1]);
+    input_l->Reshape({batch_size,1});
+    input_l->copy_from_cpu<int64_t>(label);
+  } 
+
+  for (auto iter = 0; iter<FLAGS_warmup_iter; iter++){
+    predictor->ZeroCopyRun();
+    LOG(INFO) <<"Warmup " << iter << " batches";
+  }
+  Timer run_timer;
+  double elapsed_time=0;
+  run_timer.tic();
+  FLAGS_iterations= (FLAGS_iterations==0) ? (10/FLAGS_batch_size) : FLAGS_iterations;
+  for (auto iter = 0; iter<FLAGS_iterations; iter++){
+    predictor->ZeroCopyRun();
+  }
+
+  LOG(INFO) <<"Iterations executed are " << FLAGS_iterations;
+  elapsed_time+=run_timer.toc();
+  auto batch_latency = elapsed_time / FLAGS_iterations;
+  auto sample_latency = batch_latency / FLAGS_batch_size;
+  // How to calculate fps. Using 1000.f/amounts ?
+  std::cout<<"Batch_latency: " << batch_latency << std::endl;
+  std::cout<<"Sample_latency: " << sample_latency << std::endl;
+  std::cout<<"FPS: " << 1000.f/sample_latency << std::endl;
+}