From 076f83311094e073eb2a60b495fa48ae25793a5e Mon Sep 17 00:00:00 2001 From: Tao Luo Date: Thu, 11 Jul 2019 16:27:34 +0800 Subject: [PATCH] add config.SetMkldnnCacheCapacity api for mkldnn cache clear strategy (#18580) * add config.SetMkldnnCacheCapacity api for mkldnn cache clear strategy test=develop * enhance MkldnnPostReset test=develop * add comments for mkldnn_cache_capacity field test=develop --- paddle/fluid/inference/analysis/argument.h | 2 + paddle/fluid/inference/api/analysis_config.cc | 11 ++++ .../fluid/inference/api/analysis_predictor.cc | 44 +++++++++++++- .../fluid/inference/api/analysis_predictor.h | 5 ++ .../inference/api/paddle_analysis_config.h | 7 +++ .../tests/api/analyzer_mm_dnn_tester.cc | 58 +++++++++++++++---- paddle/fluid/platform/device_context.cc | 3 +- 7 files changed, 116 insertions(+), 14 deletions(-) diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index e468bc22678..3fcf579cebc 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -148,6 +148,8 @@ struct Argument { // Pass a set of op types to enable its mkldnn kernel DECL_ARGUMENT_FIELD(mkldnn_enabled_op_types, MKLDNNEnabledOpTypes, std::unordered_set); + // The cache capacity of different input shapes for mkldnn. + DECL_ARGUMENT_FIELD(mkldnn_cache_capacity, MkldnnCacheCapacity, int); #ifdef PADDLE_WITH_MKLDNN // A set of op types to enable their quantized kernels diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 4d0bf77460d..0ea2600065a 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -115,6 +115,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { // MKLDNN related. CP_MEMBER(use_mkldnn_); CP_MEMBER(mkldnn_enabled_op_types_); + CP_MEMBER(mkldnn_cache_capacity_); // Quantization related. CP_MEMBER(use_mkldnn_quantizer_); CP_MEMBER(mkldnn_quantizer_config_); @@ -162,6 +163,15 @@ void AnalysisConfig::EnableMKLDNN() { Update(); } +void AnalysisConfig::SetMkldnnCacheCapacity(int capacity) { +#ifdef PADDLE_WITH_MKLDNN + mkldnn_cache_capacity_ = capacity; +#else + LOG(ERROR) << "Please compile with MKLDNN first to set MKLDNN Thread Id"; + mkldnn_cache_capacity_ = 0; +#endif +} + void AnalysisConfig::EnableMkldnnQuantizer() { #ifdef PADDLE_WITH_MKLDNN if (!mkldnn_quantizer_config_) @@ -343,6 +353,7 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << use_ngraph_; ss << use_mkldnn_; + ss << mkldnn_cache_capacity_; for (auto &item : mkldnn_enabled_op_types_) ss << item; ss << ";"; diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 019f9340c99..7650b2e90a0 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -185,10 +185,49 @@ bool AnalysisPredictor::PrepareExecutor() { return true; } +void AnalysisPredictor::MkldnnPreSet(const std::vector &inputs) { +#ifdef PADDLE_WITH_MKLDNN + VLOG(2) << "AnalysisPredictor::Run get_cur_mkldnn_session_id=" + << platform::get_cur_mkldnn_session_id(); + // In cache clearing mode. + if (config_.mkldnn_cache_capacity_ > 0) { + VLOG(2) << "In mkldnn cache clear mode."; + platform::set_cur_mkldnn_session_id( + platform::kMKLDNNSessionID_CacheClearing); + platform::set_cur_input_shape_cache_capacity( + config_.mkldnn_cache_capacity_); + // Set current_input_shape for caching dynamic shape. + std::stringstream ss; + for (size_t i = 0; i < inputs.size(); ++i) { + for (size_t j = 0; j < inputs[i].shape.size(); ++j) { + ss << inputs[i].shape[j] << "-"; + } + } + VLOG(2) << "Set input shape=" << ss.str(); + platform::set_cur_input_shape_str(ss.str()); + } +#endif +} + +void AnalysisPredictor::MkldnnPostReset() { +#ifdef PADDLE_WITH_MKLDNN + // In cache clearing mode. + if (config_.mkldnn_cache_capacity_ > 0) { + paddle::platform::set_cur_mkldnn_session_id( + platform::kMKLDNNSessionID_Default); + platform::set_cur_input_shape_cache_capacity(0); + platform::set_cur_input_shape_str(""); + } +#endif +} + bool AnalysisPredictor::Run(const std::vector &inputs, std::vector *output_data, int batch_size) { paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); +#ifdef PADDLE_WITH_MKLDNN + if (config_.use_mkldnn_) MkldnnPreSet(inputs); +#endif VLOG(3) << "Predictor::predict"; inference::Timer timer; timer.tic(); @@ -230,7 +269,9 @@ bool AnalysisPredictor::Run(const std::vector &inputs, // recover the cpu_math_library_num_threads to 1, in order to avoid thread // conflict when integrating it into deployment service. paddle::platform::SetNumThreads(1); - +#ifdef PADDLE_WITH_MKLDNN + if (config_.use_mkldnn_) MkldnnPostReset(); +#endif return true; } @@ -595,7 +636,6 @@ bool AnalysisPredictor::ZeroCopyRun() { // recover the cpu_math_library_num_threads to 1, in order to avoid thread // conflict when integrating it into deployment service. paddle::platform::SetNumThreads(1); - return true; } diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index 551ca5ba0cd..7a366b10c7b 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -109,6 +109,11 @@ class AnalysisPredictor : public PaddlePredictor { template void GetFetchOne(const framework::LoDTensor &fetchs, PaddleTensor *output_data); + // PreSet and PostReset for Mkldnn multi-thread and dynamic shape input. + // Used in AnalysisPredictor::Run(), do not support + // AnalysisPredictor::ZeroRun() now. + void MkldnnPreSet(const std::vector &inputs); + void MkldnnPostReset(); #if PADDLE_WITH_TENSORRT // When we use Paddle-TRT INT8 engine, we need to generate calibration table diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index e94ca5e9626..83143be07a7 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -184,6 +184,10 @@ struct AnalysisConfig { /** Turn on MKLDNN. */ void EnableMKLDNN(); + /** set the cache capacity of different input shapes for MKLDNN. + * Default 0 means don't cache any shape. + */ + void SetMkldnnCacheCapacity(int capacity); /** A boolean state telling whether to use the MKLDNN. */ bool mkldnn_enabled() const { return use_mkldnn_; } @@ -316,8 +320,11 @@ struct AnalysisConfig { std::vector anakin_passes_filter_; std::vector anakin_ops_filter_; + // mkldnn related. + int mkldnn_cache_capacity_{0}; bool use_mkldnn_quantizer_{false}; std::shared_ptr mkldnn_quantizer_config_; + // If the config is already used on a predictor, it becomes invalid. // Any config can only be used with one predictor. // Variables held by config can take up a lot of memory in some cases. diff --git a/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc index ce9ad6ff125..70478d692cc 100644 --- a/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc @@ -173,12 +173,47 @@ TEST(Analyzer_MM_DNN, compare_determine) { } #ifdef PADDLE_WITH_MKLDNN -void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) { +void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity, + std::vector> *outputs) { AnalysisConfig config; SetConfig(&config); config.EnableMKLDNN(); - // TODO(luotao): explicit following settings will be deprecated after enhance - // config.EnableMKLDNN() interface. + config.SetMkldnnCacheCapacity(mkldnn_input_shape_cache_capacity); + + std::vector input; + auto predictor = CreatePaddlePredictor(config); + + int sample_num = 10; + DataRecord data(FLAGS_infer_data, FLAGS_batch_size); + outputs->resize(sample_num); + + for (int i = 0; i < sample_num; i++) { + PrepareInputs(&input, &data, FLAGS_batch_size); + predictor->Run(input, &(*outputs)[i], 1); + } +} + +TEST(Analyzer_MM_DNN, mkldnn_cache_clear) { + std::vector> outputs, cache_outputs; + // 0 means do not use cache clear strategy. + TestMkldnnCacheClear(0, &outputs); + // 4 means use cache clear strategy, and the + // mkldnn_input_shape_cache_capacity is 4. + TestMkldnnCacheClear(4, &cache_outputs); + // compare the result. + for (size_t i = 0; i < outputs.size(); i++) { + CompareResult(outputs[i], cache_outputs[i]); + } +} + +void TestMkldnnShapeBlobSize(int mkldnn_input_shape_cache_capacity) { + AnalysisConfig config; + SetConfig(&config); + config.EnableMKLDNN(); + config.SwitchUseFeedFetchOps(false); + // Since AnalysisPredictor::Run() will reset cur_mkldnn_session_id to default + // before its finished, we use AnalysisPredictor::ZeroCopyRun() here to check + // the mkldnn_shape_blob_size. if (mkldnn_input_shape_cache_capacity > 0) { platform::set_cur_mkldnn_session_id( platform::kMKLDNNSessionID_CacheClearing); @@ -186,7 +221,7 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) { mkldnn_input_shape_cache_capacity); } - std::vector input, output; + std::vector input; auto predictor = CreatePaddlePredictor(config); int sample_num = 10; @@ -195,8 +230,12 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) { auto &pool = platform::DeviceContextPool::Instance(); auto *dev_ctx = dynamic_cast( pool.Get(platform::CPUPlace())); + // clear before test + dev_ctx->ResetBlobMap(); + for (int i = 0; i < sample_num; i++) { PrepareInputs(&input, &data, FLAGS_batch_size); + ConvertPaddleTensorToZeroCopyTensor(predictor.get(), input); if (mkldnn_input_shape_cache_capacity > 0) { std::stringstream ss; for (size_t i = 0; i < input.size(); i++) { @@ -204,11 +243,9 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) { ss << input[i].shape[j] << "-"; } } - // TODO(luotao): explicit following settings will be deprecated after - // enhance config.EnableMKLDNN() interface. platform::set_cur_input_shape_str(ss.str()); } - predictor->Run(input, &output, 1); + predictor->ZeroCopyRun(); } if (mkldnn_input_shape_cache_capacity > 0) { PADDLE_ENFORCE_EQ(dev_ctx->GetShapeBlobSize(), @@ -216,15 +253,14 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) { } else { PADDLE_ENFORCE_EQ(dev_ctx->GetShapeBlobSize(), 1UL); } - dev_ctx->ResetBlobMap(); } -TEST(Analyzer_MM_DNN, mkldnn_cache_clear) { +TEST(Analyzer_MM_DNN, mkldnn_shape_blob_size) { // 0 means do not use cache clear strategy. - TestMkldnnCacheClear(0); + TestMkldnnShapeBlobSize(0); // 4 means use cache clear strategy, and the // mkldnn_input_shape_cache_capacity is 4. - TestMkldnnCacheClear(4); + TestMkldnnShapeBlobSize(4); } #endif diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 87b82ec5e39..c9ce7ed12e4 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -462,7 +462,8 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, if (key_it == sBlob->end()) { // In cache clearing mode, cur_input_shape_cache_capacity defines // max pblob capacity - if ((sid == kMKLDNNSessionID_CacheClearing) && + if ((static_cast(sid) == kMKLDNNSessionID_CacheClearing) && + sBlob->size() && (sBlob->size() >= static_cast(cur_input_shape_cache_capacity))) { VLOG(2) << "sid=" << sid -- GitLab