From f29efe4b93a5f31349c14e0922d7c8f3b8d5093f Mon Sep 17 00:00:00 2001 From: lidanqing Date: Thu, 23 Jun 2022 20:30:09 +0800 Subject: [PATCH] [MKLDNN] Develop fake data demo (#943) --- demo/mkldnn_quant/CMakeLists.txt | 3 + demo/mkldnn_quant/README.md | 1 + demo/mkldnn_quant/run_dummy.sh | 24 ++++ demo/mkldnn_quant/sample_tester_fake_data.cc | 121 +++++++++++++++++++ 4 files changed, 149 insertions(+) create mode 100644 demo/mkldnn_quant/run_dummy.sh create mode 100644 demo/mkldnn_quant/sample_tester_fake_data.cc diff --git a/demo/mkldnn_quant/CMakeLists.txt b/demo/mkldnn_quant/CMakeLists.txt index f288231e..bec142a3 100644 --- a/demo/mkldnn_quant/CMakeLists.txt +++ b/demo/mkldnn_quant/CMakeLists.txt @@ -10,6 +10,7 @@ if(NOT DEFINED PADDLE_LIB) message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib") endif() set(DEMO_NAME sample_tester) +set(DEMO_NAME_FAKE_DATA sample_tester_fake_data) if(NOT DEFINED DEMO_NAME) message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name") endif() @@ -31,6 +32,7 @@ link_directories("${PADDLE_LIB}/third_party/install/cryptopp/lib") link_directories("${PADDLE_LIB}/third_party/install/utf8proc/lib") add_executable(${DEMO_NAME} ${DEMO_NAME}.cc) +add_executable(${DEMO_NAME_FAKE_DATA} ${DEMO_NAME_FAKE_DATA}.cc) if(WITH_MKL) include_directories("${PADDLE_LIB}/third_party/install/mklml/include") @@ -61,3 +63,4 @@ set(DEPS ${DEPS} ${EXTERNAL_LIB}) target_link_libraries(${DEMO_NAME} ${DEPS}) +target_link_libraries(${DEMO_NAME_FAKE_DATA} ${DEPS}) diff --git a/demo/mkldnn_quant/README.md b/demo/mkldnn_quant/README.md index 2e66ab21..7a82d9c7 100644 --- a/demo/mkldnn_quant/README.md +++ b/demo/mkldnn_quant/README.md @@ -104,6 +104,7 @@ val/ILSVRC2012_val_00000002.jpg 0 ``` 注意: +- 为了方便测试,你可以直接下载我们上传的二进制100张图片来验证精度:`wget http://paddle-inference-dist.bj.bcebos.com/int8/imagenet_val_100_tail.tar.gz` - 为什么将数据集转化为二进制文件?因为paddle中的数据预处理(resize, crop等)都使用pythong.Image模块进行,训练出的模型也是基于Python预处理的图片,但是我们发现Python测试性能开销很大,导致预测性能下降。为了获得良好性能,在量化模型预测阶段,我们决定使用C++测试,而C++只支持Open-CV等库,Paddle不建议使用外部库,因此我们使用Python将图片预处理然后放入二进制文件,再在C++测试中读出。用户根据自己的需要,可以更改C++测试以直接读数据并预处理,精度不会有太大下降。我们还提供了python测试`sample_tester.py`作为参考,与C++测试`sample_tester.cc`相比,用户可以看到Python测试更大的性能开销。 ### 4.2 部署预测 diff --git a/demo/mkldnn_quant/run_dummy.sh b/demo/mkldnn_quant/run_dummy.sh new file mode 100644 index 00000000..488697d9 --- /dev/null +++ b/demo/mkldnn_quant/run_dummy.sh @@ -0,0 +1,24 @@ +#!/bin/bash +MODEL_DIR=$1 +default_num_threads=1 +num_threads=${2:-$default_num_threads} +default_batch_size=1 +batch_size=${3:-default_batch_size} +default_with_accuracy=false +with_accuracy_layer=${4:-$default_with_accuracy} +default_with_analysis=true +with_analysis=${5:-$default_with_analysis} +default_enable_mkldnn_bfloat16=false +with_mkldnn_bfloat16=${6:-$default_enable_mkldnn_bfloat16} +ITERATIONS=0 + +GLOG_logtostderr=1 ./build/sample_tester_fake_data \ + --infer_model=${MODEL_DIR} \ + --batch_size=${batch_size} \ + --num_threads=${num_threads} \ + --iterations=${ITERATIONS} \ + --with_accuracy_layer=${with_accuracy_layer} \ + --use_analysis=${with_analysis} \ + --enable_mkldnn_bfloat16=${with_mkldnn_bfloat16} + +# KMP_BLOCKTIME=1 KMP_SETTINGS=1 KMP_AFFINITY=granularity=fine,verbose,compact,1,0 numactl bash run_dummy.sh INT8 1 1 false false false diff --git a/demo/mkldnn_quant/sample_tester_fake_data.cc b/demo/mkldnn_quant/sample_tester_fake_data.cc new file mode 100644 index 00000000..fb77663d --- /dev/null +++ b/demo/mkldnn_quant/sample_tester_fake_data.cc @@ -0,0 +1,121 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_string(infer_model, "", "path to the model"); +DEFINE_string(infer_data, "", "path to the input data"); +DEFINE_int32(batch_size, 1, "inference batch size"); +DEFINE_int32(iterations, + 0, + "number of batches to process. 0 means testing whole dataset"); +DEFINE_int32(num_threads, 1, "num of threads to run in parallel"); +DEFINE_bool(with_accuracy_layer, + true, + "Set with_accuracy_layer to true if provided model has accuracy " + "layer and requires label input"); +DEFINE_bool(use_analysis, + false, + "If use_analysis is set to true, the model will be optimized"); +DEFINE_int32(warmup_iter, 2, "number of warmup batches"); +DEFINE_bool(enable_mkldnn_bfloat16, false, "If enable_mkldnn_bfloat16 is set to true, will start bf16 inference"); + +struct Timer { + std::chrono::high_resolution_clock::time_point start; + std::chrono::high_resolution_clock::time_point startu; + + void tic() { start = std::chrono::high_resolution_clock::now(); } + double toc() { + startu = std::chrono::high_resolution_clock::now(); + std::chrono::duration time_span = + std::chrono::duration_cast>(startu - + start); + double used_time_ms = static_cast(time_span.count()) * 1000.0; + return used_time_ms; + } +}; + +int main(int argc, char *argv[]) { + // InitFLAGS(argc, argv); + google::InitGoogleLogging(*argv); + gflags::ParseCommandLineFlags(&argc, &argv, true); + + paddle::AnalysisConfig cfg; + cfg.SetModel(FLAGS_infer_model); + cfg.SetCpuMathLibraryNumThreads(FLAGS_num_threads); + if (FLAGS_use_analysis) { + cfg.DisableGpu(); + cfg.SwitchIrOptim(); + cfg.EnableMKLDNN(); + if (FLAGS_enable_mkldnn_bfloat16){ + cfg.EnableMkldnnBfloat16(); + } + } + cfg.SwitchUseFeedFetchOps(false); + + auto predictor = paddle::CreatePaddlePredictor(cfg); + int batch_size = FLAGS_batch_size; + std::cout<<"Batch size " << FLAGS_batch_size << std::endl ; + int channels = 3; + int height = 224; + int width = 224; + int nums = batch_size * channels * height * width; + auto input_shape = {batch_size, channels, height, width}; + float* input = new float[nums]; + for (int i = 0; i < nums; ++i) input[i] = 0; + auto input_names = predictor->GetInputNames(); + + auto input_t = predictor->GetInputTensor(input_names[0]); + input_t->Reshape(input_shape); + input_t->copy_from_cpu(input); + if (FLAGS_with_accuracy_layer){ + int64_t* label = new int64_t[batch_size]; + for (auto i=0;iGetInputTensor(input_names[1]); + input_l->Reshape({batch_size,1}); + input_l->copy_from_cpu(label); + } + + for (auto iter = 0; iterZeroCopyRun(); + LOG(INFO) <<"Warmup " << iter << " batches"; + } + Timer run_timer; + double elapsed_time=0; + run_timer.tic(); + FLAGS_iterations= (FLAGS_iterations==0) ? (10/FLAGS_batch_size) : FLAGS_iterations; + for (auto iter = 0; iterZeroCopyRun(); + } + + LOG(INFO) <<"Iterations executed are " << FLAGS_iterations; + elapsed_time+=run_timer.toc(); + auto batch_latency = elapsed_time / FLAGS_iterations; + auto sample_latency = batch_latency / FLAGS_batch_size; + // How to calculate fps. Using 1000.f/amounts ? + std::cout<<"Batch_latency: " << batch_latency << std::endl; + std::cout<<"Sample_latency: " << sample_latency << std::endl; + std::cout<<"FPS: " << 1000.f/sample_latency << std::endl; +} -- GitLab