diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index e0dc0f72f17dc535fc7a1524fbe44cd93ae5906a..2f5f9ca9af3d623ebff7d15f6f9c5d1e8be84cb6 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -343,8 +343,6 @@ void AnalysisPredictor::MkldnnPreSet( platform::MKLDNNDeviceContext::tls().set_cur_mkldnn_session_id( platform::MKLDNNDeviceContextThreadLocals:: kMKLDNNSessionID_CacheClearing); - platform::MKLDNNDeviceContext::tls().set_cur_input_shape_cache_capacity( - config_.mkldnn_cache_capacity_); // Set current_input_shape for caching dynamic shape. std::stringstream ss; for (size_t i = 0; i < inputs_shape.size(); ++i) { @@ -355,6 +353,9 @@ void AnalysisPredictor::MkldnnPreSet( VLOG(2) << "Set input shape=" << ss.str(); platform::MKLDNNDeviceContext::tls().set_cur_input_shape_str(ss.str()); } + platform::MKLDNNDeviceContext::tls().set_cur_input_shape_cache_capacity( + config_.mkldnn_cache_capacity_); + #endif } @@ -370,10 +371,9 @@ void AnalysisPredictor::MkldnnPostReset() { CHECK_LE(shape_blob_size, static_cast(config_.mkldnn_cache_capacity_)); } - paddle::platform::MKLDNNDeviceContext::tls().set_cur_mkldnn_session_id( - platform::MKLDNNDeviceContextThreadLocals::kMKLDNNSessionID_Default); - platform::MKLDNNDeviceContext::tls().set_cur_input_shape_cache_capacity(0); - platform::MKLDNNDeviceContext::tls().set_cur_input_shape_str(""); + // We cannot reset to the default cache settings + // as there maybe CopyToCPU method used and oneDNN + // primitives are used there so cache would grow } #endif } diff --git a/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc index f157f6b0b82ea9a4759d68d522acd614a98a5f6c..384bef8a4b439d8543127d5e7a1110525f06d282 100644 --- a/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc @@ -120,6 +120,19 @@ void validate_cache_onednn(int cache_capacity = 1) { file.close(); infer_file.close(); + // Pick first output tensor from model + // as internally reorders may be called + // so it will impact cache size + auto output_names = predictor->GetOutputNames(); + auto output_t = predictor->GetOutputTensor(output_names[0]); + std::vector output_shape = output_t->shape(); + size_t out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, + std::multiplies()); + std::vector out_data; + out_data.resize(out_num); + output_t->CopyToCpu(out_data.data()); + + // Release predictor (relevant cache should be emptied) predictor.reset(nullptr); cache_filling.push_back(GetNumCachedObjects());