diff --git a/paddle/fluid/inference/analysis/analysis_pass.h b/paddle/fluid/inference/analysis/analysis_pass.h index 299f235a74ae0ffb663be61079607d8ac1105a97..d5a972fab3beae4d4e2e512d1ccda3f0b8356682 100644 --- a/paddle/fluid/inference/analysis/analysis_pass.h +++ b/paddle/fluid/inference/analysis/analysis_pass.h @@ -46,8 +46,6 @@ class AnalysisPass { protected: // User should implement these. virtual void RunImpl(Argument* argument) = 0; - - Argument* argument_{nullptr}; }; } // namespace analysis diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 1862f61f0f4b94c9fa9636e876e943113d9aebd4..391330a7c0f2dda731fe8455fdab81b276e3f272 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -190,9 +190,13 @@ bool AnalysisPredictor::Run(const std::vector &inputs, } VLOG(3) << "predict cost: " << timer.toc() << "ms"; - // Fix TensorArray reuse not cleaned bug. - tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get()); - tensor_array_batch_cleaner_.ResetTensorArray(); + // All the containers in the scope will be hold in inference, but the + // operators assume that the container will be reset after each batch. + // Here is a bugfix, collect all the container variables, and reset then to a + // bool; the next time, the operator will call MutableData and construct a new + // container again, so that the container will be empty for each batch. + tensor_array_batch_cleaner_.CollectNoTensorVars(sub_scope_); + tensor_array_batch_cleaner_.ResetNoTensorVars(); return true; } @@ -417,7 +421,7 @@ std::unique_ptr AnalysisPredictor::GetOutputTensor( bool AnalysisPredictor::ZeroCopyRun() { executor_->Run(); // Fix TensorArray reuse not cleaned bug. - tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get()); + tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_); tensor_array_batch_cleaner_.ResetTensorArray(); return true; } diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index 74369e886692fef3172d24c637b03a5bcf81a6c2..4c5b412a2c1717b8edbb17c238caaa11aeccebd3 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -154,9 +154,9 @@ bool NativePaddlePredictor::Run(const std::vector &inputs, } VLOG(3) << "predict cost: " << timer.toc() << "ms"; - // Fix TensorArray reuse not cleaned bug. - tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get()); - tensor_array_batch_cleaner_.ResetTensorArray(); + // For some other vector like containers not cleaned after each batch. + tensor_array_batch_cleaner_.CollectNoTensorVars(scope_.get()); + tensor_array_batch_cleaner_.ResetNoTensorVars(); return true; } diff --git a/paddle/fluid/inference/api/details/reset_tensor_array.cc b/paddle/fluid/inference/api/details/reset_tensor_array.cc index 4ae6c6dc9f44650c1c62f5be5448864d817513b1..569a487328e2f1febe2ca5014b232dbd51d28079 100644 --- a/paddle/fluid/inference/api/details/reset_tensor_array.cc +++ b/paddle/fluid/inference/api/details/reset_tensor_array.cc @@ -46,5 +46,28 @@ void TensorArrayBatchCleaner::ResetTensorArray() { } } +void TensorArrayBatchCleaner::CollectNoTensorVars(framework::Scope *scope) { + if (no_tensor_flag_) { + for (auto &var_name : scope->LocalVarNames()) { + auto *var = scope->FindVar(var_name); + if (!var->IsInitialized()) continue; + if (!valid_types_.count(var->Type())) { + no_tensor_vars_.insert(var); + } + } + + for (auto *kid : scope->kids()) { + CollectTensorArrays(kid); + } + no_tensor_flag_ = false; // Only collect one time. + } +} + +void TensorArrayBatchCleaner::ResetNoTensorVars() { + for (auto *var : no_tensor_vars_) { + var->Clear(); + } +} + } // namespace details } // namespace paddle diff --git a/paddle/fluid/inference/api/details/reset_tensor_array.h b/paddle/fluid/inference/api/details/reset_tensor_array.h index a39449ff0e67786815dfb8d2d30d79dcdba757d7..6a5ea64de66fcac44117d0d8f7798e8875703ec6 100644 --- a/paddle/fluid/inference/api/details/reset_tensor_array.h +++ b/paddle/fluid/inference/api/details/reset_tensor_array.h @@ -14,9 +14,11 @@ #pragma once +#include #include #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/framework/variable.h" namespace paddle { namespace details { @@ -24,13 +26,28 @@ namespace details { // Clean the TensorArray each batch to make the behavior the same with the // training phase. struct TensorArrayBatchCleaner { + TensorArrayBatchCleaner() { + valid_types_.insert(typeid(framework::Tensor)); + valid_types_.insert(typeid(framework::LoDTensor)); + } + // Collect the variables that are not Tensor or LoDTensor, and reset them to a + // bool(trick), because some of them are containers, and some operators just + // keep inserting new items without clearing the containers first; So the + // memory grow larger and larger in inference service deployed online. + void CollectNoTensorVars(framework::Scope *scope); + void ResetNoTensorVars(); + // Fix the tensor array not clear in the inference scenarios. void CollectTensorArrays(framework::Scope *scope); void ResetTensorArray(); private: bool flag_{true}; + bool no_tensor_flag_{true}; std::vector arrays_; + + std::unordered_set valid_types_; + std::unordered_set no_tensor_vars_; }; } // namespace details