未验证 提交 df7fabee 编写于 作者: W Wilber 提交者: GitHub

Fix memory leak for mkldnn. (#27493)

上级 b7319ef5
......@@ -245,7 +245,18 @@ bool AnalysisPredictor::PrepareExecutor() {
void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
#ifdef PADDLE_WITH_MKLDNN
VLOG(2) << "AnalysisPredictor::Run get_cur_mkldnn_session_id="
std::vector<std::vector<int>> inputs_shape;
for (size_t i = 0; i < inputs.size(); ++i) {
inputs_shape.emplace_back(inputs[i].shape);
}
MkldnnPreSet(inputs_shape);
#endif
}
void AnalysisPredictor::MkldnnPreSet(
const std::vector<std::vector<int>> &inputs_shape) {
#ifdef PADDLE_WITH_MKLDNN
VLOG(2) << "AnalysisPredictor::ZeroCopyRun get_cur_mkldnn_session_id="
<< platform::MKLDNNDeviceContext::tls().get_cur_mkldnn_session_id();
// In cache clearing mode.
if (config_.mkldnn_cache_capacity_ > 0) {
......@@ -257,9 +268,9 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
config_.mkldnn_cache_capacity_);
// Set current_input_shape for caching dynamic shape.
std::stringstream ss;
for (size_t i = 0; i < inputs.size(); ++i) {
for (size_t j = 0; j < inputs[i].shape.size(); ++j) {
ss << inputs[i].shape[j] << "-";
for (size_t i = 0; i < inputs_shape.size(); ++i) {
for (size_t j = 0; j < inputs_shape[i].size(); ++j) {
ss << inputs_shape[i][j] << "-";
}
}
VLOG(2) << "Set input shape=" << ss.str();
......@@ -742,6 +753,18 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
bool AnalysisPredictor::ZeroCopyRun() {
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) {
std::vector<std::vector<int>> shape_vector;
auto names = GetInputNames();
for (size_t i = 0; i < names.size(); ++i) {
auto in_tensor = GetInputTensor(names[i]);
shape_vector.emplace_back(in_tensor->shape());
}
MkldnnPreSet(shape_vector);
}
#endif
executor_->Run();
// Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_);
......@@ -750,6 +773,9 @@ bool AnalysisPredictor::ZeroCopyRun() {
// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service.
paddle::platform::SetNumThreads(1);
#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) MkldnnPostReset();
#endif
#if defined(PADDLE_WITH_MKLML)
// Frees unused memory allocated by the Intel® MKL Memory Allocator to
// avoid memory leak. See:
......
......@@ -317,6 +317,17 @@ class AnalysisPredictor : public PaddlePredictor {
/// \param[in] inputs tensors
///
void MkldnnPreSet(const std::vector<PaddleTensor> &inputs);
///
/// \brief PreSet for Mkldnn multi-thread and dynamic shape input.
///
/// Used in AnalysisPredictor::Run(), do not support
/// AnalysisPredictor::ZeroCopyRun() now.
///
/// \param[in] inputs tensor shape
///
void MkldnnPreSet(const std::vector<std::vector<int>> &inputs_shape);
///
/// \brief PostReset for Mkldnn multi-thread and dynamic shape input.
///
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册