diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 07208d016a79083079707e38dd0207b4d1c282a2..f0eb0d1fa675b7e88aae44acd79e425a2bc70e47 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -325,11 +325,10 @@ inference_analysis_api_test(test_analyzer_ocr ${OCR_INSTALL_DIR} analyzer_vis_te # densebox set(DENSEBOX_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/densebox") download_data_without_verify(${DENSEBOX_INSTALL_DIR} "densebox.tar.gz") -#inference_analysis_test(test_analyzer_detect SRCS analyzer_detect_tester.cc -# EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} -# ARGS --infer_model=${DENSEBOX_INSTALL_DIR}/model --infer_data=${DENSEBOX_INSTALL_DIR}/detect_input_50.txt -# --infer_shape=${DENSEBOX_INSTALL_DIR}/shape_50.txt) -#set_property(TEST test_analyzer_detect PROPERTY ENVIRONMENT GLOG_vmodule=analysis_predictor=2) +inference_analysis_test(test_analyzer_detect_functional_mkldnn SRCS analyzer_detect_functional_mkldnn_tester.cc + EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} + ARGS --infer_model=${DENSEBOX_INSTALL_DIR}/model --infer_data=${DENSEBOX_INSTALL_DIR}/detect_input_50.txt + --infer_shape=${DENSEBOX_INSTALL_DIR}/shape_50.txt) # mobilenet with transpose op set(MOBILENET_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/mobilenet") diff --git a/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..f157f6b0b82ea9a4759d68d522acd614a98a5f6c --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc @@ -0,0 +1,153 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include "paddle/fluid/inference/tests/api/tester_helper.h" +#include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/place.h" + +DEFINE_string(infer_shape, "", "data shape file"); +DEFINE_int32(sample, 20, "number of sample"); + +namespace paddle { +namespace inference { +namespace analysis { + +struct Record { + std::vector data; + std::vector shape; +}; + +Record ProcessALine(const std::string &line, const std::string &shape_line) { + VLOG(3) << "process a line"; + + Record record; + std::vector data_strs; + split(line, ' ', &data_strs); + for (auto &d : data_strs) { + record.data.push_back(std::stof(d)); + } + + std::vector shape_strs; + split(shape_line, ' ', &shape_strs); + for (auto &s : shape_strs) { + record.shape.push_back(std::stoi(s)); + } + return record; +} + +void SetConfig(AnalysisConfig *cfg) { + cfg->SetModel(FLAGS_infer_model + "/model", FLAGS_infer_model + "/params"); + cfg->DisableGpu(); + // cfg->SwitchIrDebug(); // Enable to have graphs dumped + cfg->SwitchSpecifyInputNames(false); + cfg->SetCpuMathLibraryNumThreads(FLAGS_cpu_num_threads); +} + +void SetInput(std::vector> *inputs, + const std::string &line, const std::string &shape_line) { + auto record = ProcessALine(line, shape_line); + + PaddleTensor input; + input.shape = record.shape; + input.dtype = PaddleDType::FLOAT32; + size_t input_size = record.data.size() * sizeof(float); + input.data.Resize(input_size); + memcpy(input.data.data(), record.data.data(), input_size); + std::vector input_slots; + input_slots.assign({input}); + (*inputs).emplace_back(input_slots); +} + +#ifdef PADDLE_WITH_MKLDNN +int GetNumCachedObjects(void) { + auto &pool = platform::DeviceContextPool::Instance(); + platform::CPUPlace place; + auto onednn_dev_ctx = + dynamic_cast(pool.Get(place)); + return onednn_dev_ctx->GetCachedObjectsNumber(); +} + +void validate_cache_onednn(int cache_capacity = 1) { + AnalysisConfig cfg; + SetConfig(&cfg); + cfg.EnableMKLDNN(); + cfg.SetMkldnnCacheCapacity(cache_capacity); + + auto predictor = CreatePaddlePredictor(cfg); + std::vector> ref_outputs; + std::vector> input_slots_all; + + std::ifstream file(FLAGS_infer_data); + std::ifstream infer_file(FLAGS_infer_shape); + std::vector lines; + std::vector shape_lines; + + // Let's work with 4 samples + auto num_samples = 4; + ref_outputs.resize(num_samples); + lines.resize(num_samples); + shape_lines.resize(num_samples); + + // Let's remember number of cached objects before + // execution and after every single execution + std::vector cache_filling; + cache_filling.push_back(GetNumCachedObjects()); + + // compute sequentially prediction + for (int i = 0; i < num_samples; ++i) { + std::getline(file, lines[i]); + std::getline(infer_file, shape_lines[i]); + SetInput(&input_slots_all, lines[i], shape_lines[i]); + predictor->Run(input_slots_all[i], &ref_outputs[i], FLAGS_batch_size); + // record number of cached objects + cache_filling.push_back(GetNumCachedObjects()); + } + + file.close(); + infer_file.close(); + + predictor.reset(nullptr); + cache_filling.push_back(GetNumCachedObjects()); + + // Compare results + // First and last value should be equal e.g. before using cache (empty) and + // after releasing executor + PADDLE_ENFORCE_EQ( + cache_filling[0], cache_filling[cache_filling.size() - 1], + platform::errors::Fatal("Cache size before execution and after " + "releasing Executor do not match")); + + // Iterate to check if cache is not increasing + // over exceeding cache capacity + if (cache_capacity != 0) { + for (int i = cache_capacity + 1; i < num_samples + 1; ++i) { + PADDLE_ENFORCE_EQ( + cache_filling[cache_capacity], cache_filling[i], + platform::errors::Fatal("Cache capacity should not increase " + "after full capacity is used")); + } + } +} + +TEST(Analyzer_detect, validate_cache_onednn) { + validate_cache_onednn(2 /*cache_capacity */); +} +#endif + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 7e983eb54ae2cdb44cf4ae5a949f0fac40ec4835..1179677fd6b9f57152cf7821f6fd088b8945c129 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -563,7 +563,7 @@ Place CUDAPinnedDeviceContext::GetPlace() const { return place_; } MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place) : CPUDeviceContext(place), p_blobmap_() { p_blobmap_.reset(new BlobMap()); - p_exec_items_.reset(new ExecMap()); + p_exec_items_.reset(new ExecShape()); p_mutex_.reset(new std::mutex()); } @@ -644,10 +644,15 @@ void MKLDNNDeviceContext::ResetBlobMap(void* ptr) { if (ptr == nullptr) { p_blobmap_->clear(); } else { - for (auto& v : (*p_exec_items_)[ptr]) { - (v.first)->erase(v.second); + // Iterate through all shapes and release + // for each shape and active executor all entries + // of this executor + for (auto& s : *p_exec_items_) { + for (auto& v : (*s.second)[ptr]) { + (v.first)->erase(v.second); + } + s.second->erase(ptr); } - p_exec_items_->erase(ptr); } } else { VLOG(3) << "Prevented Clearing DNNL cache."; @@ -655,11 +660,24 @@ void MKLDNNDeviceContext::ResetBlobMap(void* ptr) { } } +void MKLDNNDeviceContext::RemoveShapeEntriesWithExecutor(void) const { + p_exec_items_->erase(p_exec_items_->begin()); +} + void MKLDNNDeviceContext::LinkEntryWithExecutor(BlobPtr_t pblob, KeyBlob::iterator it) const { + // Take current input shape from TLS // Take current executor addess from TLS // and for this executor's items add the one defined with arguments - (*p_exec_items_)[tls().get_curr_exec()].push_back(std::make_pair(pblob, it)); + auto key_it = p_exec_items_ + ->insert(std::make_pair(tls().cur_input_shape_str, + std::make_shared())) + .first; + (*key_it->second)[tls().get_curr_exec()].push_back(std::make_pair(pblob, it)); + + VLOG(3) << "LinkEntryWithExecutor, shapes: " << p_exec_items_->size() + << " curr exec size: " + << (*key_it->second)[tls().get_curr_exec()].size() << "\n"; } void MKLDNNDeviceContext::BlockNextCacheClearing() { @@ -716,6 +734,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, VLOG(2) << "sid=" << sid << ", remove all blobs of shape: " << sBlob->begin()->first; sBlob->erase(sBlob->begin()->first); + RemoveShapeEntriesWithExecutor(); } pBlob = std::make_shared(); (*sBlob)[tls().cur_input_shape_str] = pBlob; @@ -739,7 +758,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, return; } -unsigned int MKLDNNDeviceContext::GetCachedObjectsNumber(void) { +unsigned int MKLDNNDeviceContext::GetCachedObjectsNumber(void) const { unsigned int num_entries = 0; for (auto const& l3 : *p_blobmap_) { for (auto const& l2 : *(l3.second)) { diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 8d9d1fd96f463c8e05e9c7e6ba7ed42672459bec..e2dbc90b5d1444b7f27ac00439a769ee3165a911 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -749,8 +749,14 @@ class MKLDNNDeviceContext : public CPUDeviceContext { using ShapeBlob = umap_key_string_t; using BlobMap = umap_value_smart_t; - using ExecMap = std::unordered_map< - void*, std::vector, KeyBlob::iterator>>>; + // Auxillary two-level structure (shape, executor) to easier control + // clearing cache objects related to specific executor + + using ExecKey = void*; + using ExecMapCacheIterPair = std::pair, KeyBlob::iterator>; + using ExecMap = + std::unordered_map>; + using ExecShape = std::unordered_map>; explicit MKLDNNDeviceContext(CPUPlace place); @@ -759,6 +765,7 @@ class MKLDNNDeviceContext : public CPUDeviceContext { // Register object to currently used executor's map void LinkEntryWithExecutor(BlobPtr_t, KeyBlob::iterator) const; + void RemoveShapeEntriesWithExecutor(void) const; // Remove all entries from the blob map void ResetBlobMap(void* ptr); @@ -773,7 +780,7 @@ class MKLDNNDeviceContext : public CPUDeviceContext { void SetBlob(const std::string& name, std::shared_ptr data) const; // Calculate number of oneDNN objects cached - unsigned int GetCachedObjectsNumber(void); + unsigned int GetCachedObjectsNumber(void) const; // Find a saved blob. Return nullptr if not found std::shared_ptr GetBlob(const std::string& name) const; @@ -786,7 +793,7 @@ class MKLDNNDeviceContext : public CPUDeviceContext { std::shared_ptr p_blobmap_; // Map key is pointer of executor and value is a data(iterator in map) needed // to erase - std::shared_ptr p_exec_items_; + std::shared_ptr p_exec_items_; std::shared_ptr p_mutex_; bool block_next_cache_clearing_ = false; };