未验证 提交 8226d44b 编写于 作者: Y Yan Chunwei 提交者: GitHub

fix container not clear bug for inference (#14674)

上级 c7153f88
...@@ -46,8 +46,6 @@ class AnalysisPass { ...@@ -46,8 +46,6 @@ class AnalysisPass {
protected: protected:
// User should implement these. // User should implement these.
virtual void RunImpl(Argument* argument) = 0; virtual void RunImpl(Argument* argument) = 0;
Argument* argument_{nullptr};
}; };
} // namespace analysis } // namespace analysis
......
...@@ -190,9 +190,13 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs, ...@@ -190,9 +190,13 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
} }
VLOG(3) << "predict cost: " << timer.toc() << "ms"; VLOG(3) << "predict cost: " << timer.toc() << "ms";
// Fix TensorArray reuse not cleaned bug. // All the containers in the scope will be hold in inference, but the
tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get()); // operators assume that the container will be reset after each batch.
tensor_array_batch_cleaner_.ResetTensorArray(); // Here is a bugfix, collect all the container variables, and reset then to a
// bool; the next time, the operator will call MutableData and construct a new
// container again, so that the container will be empty for each batch.
tensor_array_batch_cleaner_.CollectNoTensorVars(sub_scope_);
tensor_array_batch_cleaner_.ResetNoTensorVars();
return true; return true;
} }
...@@ -417,7 +421,7 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor( ...@@ -417,7 +421,7 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
bool AnalysisPredictor::ZeroCopyRun() { bool AnalysisPredictor::ZeroCopyRun() {
executor_->Run(); executor_->Run();
// Fix TensorArray reuse not cleaned bug. // Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get()); tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_);
tensor_array_batch_cleaner_.ResetTensorArray(); tensor_array_batch_cleaner_.ResetTensorArray();
return true; return true;
} }
......
...@@ -154,9 +154,9 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs, ...@@ -154,9 +154,9 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
} }
VLOG(3) << "predict cost: " << timer.toc() << "ms"; VLOG(3) << "predict cost: " << timer.toc() << "ms";
// Fix TensorArray reuse not cleaned bug. // For some other vector like containers not cleaned after each batch.
tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get()); tensor_array_batch_cleaner_.CollectNoTensorVars(scope_.get());
tensor_array_batch_cleaner_.ResetTensorArray(); tensor_array_batch_cleaner_.ResetNoTensorVars();
return true; return true;
} }
......
...@@ -46,5 +46,28 @@ void TensorArrayBatchCleaner::ResetTensorArray() { ...@@ -46,5 +46,28 @@ void TensorArrayBatchCleaner::ResetTensorArray() {
} }
} }
void TensorArrayBatchCleaner::CollectNoTensorVars(framework::Scope *scope) {
if (no_tensor_flag_) {
for (auto &var_name : scope->LocalVarNames()) {
auto *var = scope->FindVar(var_name);
if (!var->IsInitialized()) continue;
if (!valid_types_.count(var->Type())) {
no_tensor_vars_.insert(var);
}
}
for (auto *kid : scope->kids()) {
CollectTensorArrays(kid);
}
no_tensor_flag_ = false; // Only collect one time.
}
}
void TensorArrayBatchCleaner::ResetNoTensorVars() {
for (auto *var : no_tensor_vars_) {
var->Clear();
}
}
} // namespace details } // namespace details
} // namespace paddle } // namespace paddle
...@@ -14,9 +14,11 @@ ...@@ -14,9 +14,11 @@
#pragma once #pragma once
#include <unordered_set>
#include <vector> #include <vector>
#include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable.h"
namespace paddle { namespace paddle {
namespace details { namespace details {
...@@ -24,13 +26,28 @@ namespace details { ...@@ -24,13 +26,28 @@ namespace details {
// Clean the TensorArray each batch to make the behavior the same with the // Clean the TensorArray each batch to make the behavior the same with the
// training phase. // training phase.
struct TensorArrayBatchCleaner { struct TensorArrayBatchCleaner {
TensorArrayBatchCleaner() {
valid_types_.insert(typeid(framework::Tensor));
valid_types_.insert(typeid(framework::LoDTensor));
}
// Collect the variables that are not Tensor or LoDTensor, and reset them to a
// bool(trick), because some of them are containers, and some operators just
// keep inserting new items without clearing the containers first; So the
// memory grow larger and larger in inference service deployed online.
void CollectNoTensorVars(framework::Scope *scope);
void ResetNoTensorVars();
// Fix the tensor array not clear in the inference scenarios. // Fix the tensor array not clear in the inference scenarios.
void CollectTensorArrays(framework::Scope *scope); void CollectTensorArrays(framework::Scope *scope);
void ResetTensorArray(); void ResetTensorArray();
private: private:
bool flag_{true}; bool flag_{true};
bool no_tensor_flag_{true};
std::vector<framework::LoDTensorArray *> arrays_; std::vector<framework::LoDTensorArray *> arrays_;
std::unordered_set<std::type_index> valid_types_;
std::unordered_set<framework::Variable *> no_tensor_vars_;
}; };
} // namespace details } // namespace details
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册