未验证 提交 1bf48365 编写于 作者: W Wilber 提交者: GitHub

[Inference] Add TryShrinkMemory interface. (#28409)

上级 26d292b1
...@@ -175,7 +175,10 @@ bool AnalysisPredictor::PrepareScope( ...@@ -175,7 +175,10 @@ bool AnalysisPredictor::PrepareScope(
status_is_cloned_ = true; status_is_cloned_ = true;
} else { } else {
paddle::framework::InitDevices(false); paddle::framework::InitDevices(false);
scope_.reset(new paddle::framework::Scope()); scope_.reset(new paddle::framework::Scope(), [&](framework::Scope *scope) {
delete scope;
memory::Release(place_);
});
status_is_cloned_ = false; status_is_cloned_ = false;
} }
sub_scope_ = &scope_->NewScope(); sub_scope_ = &scope_->NewScope();
...@@ -591,7 +594,6 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< ...@@ -591,7 +594,6 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
gflags.push_back("--allocator_strategy=thread_local"); gflags.push_back("--allocator_strategy=thread_local");
process_level_allocator_enabled = false; process_level_allocator_enabled = false;
} else { } else {
gflags.push_back("--allocator_strategy=naive_best_fit");
process_level_allocator_enabled = true; process_level_allocator_enabled = true;
} }
...@@ -890,6 +892,11 @@ bool AnalysisPredictor::LoadParameters() { ...@@ -890,6 +892,11 @@ bool AnalysisPredictor::LoadParameters() {
return true; return true;
} }
uint64_t AnalysisPredictor::TryShrinkMemory() {
ClearIntermediateTensor();
return paddle::memory::Release(place_);
}
void AnalysisPredictor::ClearIntermediateTensor() { void AnalysisPredictor::ClearIntermediateTensor() {
PADDLE_ENFORCE_NOT_NULL(inference_program_.get(), PADDLE_ENFORCE_NOT_NULL(inference_program_.get(),
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
...@@ -985,6 +992,8 @@ AnalysisPredictor::~AnalysisPredictor() { ...@@ -985,6 +992,8 @@ AnalysisPredictor::~AnalysisPredictor() {
mkldnn_quantizer_ = nullptr; mkldnn_quantizer_ = nullptr;
} }
#endif #endif
memory::Release(place_);
} }
std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone() { std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone() {
...@@ -1142,6 +1151,8 @@ void Predictor::ClearIntermediateTensor() { ...@@ -1142,6 +1151,8 @@ void Predictor::ClearIntermediateTensor() {
predictor_->ClearIntermediateTensor(); predictor_->ClearIntermediateTensor();
} }
uint64_t Predictor::TryShrinkMemory() { return predictor_->TryShrinkMemory(); }
int GetNumBytesOfDataType(DataType dtype) { int GetNumBytesOfDataType(DataType dtype) {
switch (dtype) { switch (dtype) {
case DataType::FLOAT32: case DataType::FLOAT32:
......
...@@ -193,6 +193,17 @@ class AnalysisPredictor : public PaddlePredictor { ...@@ -193,6 +193,17 @@ class AnalysisPredictor : public PaddlePredictor {
/// ///
void ClearIntermediateTensor(); void ClearIntermediateTensor();
///
/// \brief Release all tmp tensor to compress the size of the memory pool.
/// The memory pool is considered to be composed of a list of chunks, if
/// the chunk is not occupied, it can be released.
///
/// \return Number of bytes released. It may be smaller than the actual
/// released memory, because part of the memory is not managed by the
/// MemoryPool.
///
uint64_t TryShrinkMemory() override;
/// ///
/// \brief Get the argument used by predictor /// \brief Get the argument used by predictor
/// ///
......
...@@ -135,6 +135,7 @@ TEST(AnalysisPredictor, ZeroCopy) { ...@@ -135,6 +135,7 @@ TEST(AnalysisPredictor, ZeroCopy) {
auto* out_data = out->data<float>(&place, &size); auto* out_data = out->data<float>(&place, &size);
LOG(INFO) << "output size: " << size / sizeof(float); LOG(INFO) << "output size: " << size / sizeof(float);
LOG(INFO) << "output_data: " << out_data; LOG(INFO) << "output_data: " << out_data;
predictor->TryShrinkMemory();
} }
TEST(AnalysisPredictor, Clone) { TEST(AnalysisPredictor, Clone) {
...@@ -253,8 +254,7 @@ class MkldnnQuantizerTest : public testing::Test { ...@@ -253,8 +254,7 @@ class MkldnnQuantizerTest : public testing::Test {
public: public:
MkldnnQuantizerTest() { MkldnnQuantizerTest() {
AnalysisConfig config(FLAGS_dirname); AnalysisConfig config(FLAGS_dirname);
predictor = std::move(CreatePaddlePredictor(config));
predictor.reset(new AnalysisPredictor(config));
auto* predictor_p = static_cast<AnalysisPredictor*>(predictor.get()); auto* predictor_p = static_cast<AnalysisPredictor*>(predictor.get());
auto qconfig = new MkldnnQuantizerConfig(); auto qconfig = new MkldnnQuantizerConfig();
...@@ -507,3 +507,45 @@ TEST(AnalysisPredictor, bf16_pass_strategy) { ...@@ -507,3 +507,45 @@ TEST(AnalysisPredictor, bf16_pass_strategy) {
} }
} // namespace paddle } // namespace paddle
namespace paddle_infer {
TEST(Predictor, Run) {
Config config;
config.SetModel(FLAGS_dirname);
auto predictor = CreatePredictor(config);
auto w0 = predictor->GetInputHandle("firstw");
auto w1 = predictor->GetInputHandle("secondw");
auto w2 = predictor->GetInputHandle("thirdw");
auto w3 = predictor->GetInputHandle("forthw");
w0->Reshape({4, 1});
w1->Reshape({4, 1});
w2->Reshape({4, 1});
w3->Reshape({4, 1});
auto* w0_data = w0->mutable_data<int64_t>(PlaceType::kCPU);
auto* w1_data = w1->mutable_data<int64_t>(PlaceType::kCPU);
auto* w2_data = w2->mutable_data<int64_t>(PlaceType::kCPU);
auto* w3_data = w3->mutable_data<int64_t>(PlaceType::kCPU);
for (int i = 0; i < 4; i++) {
w0_data[i] = i;
w1_data[i] = i;
w2_data[i] = i;
w3_data[i] = i;
}
predictor->Run();
auto out = predictor->GetOutputHandle("fc_1.tmp_2");
PlaceType place;
int size = 0;
out->data<float>(&place, &size);
LOG(INFO) << "output size: " << size / sizeof(float);
predictor->TryShrinkMemory();
}
} // namespace paddle_infer
...@@ -60,6 +60,7 @@ TEST(paddle_inference_api, demo) { ...@@ -60,6 +60,7 @@ TEST(paddle_inference_api, demo) {
auto predictor = CreatePaddlePredictor(config); auto predictor = CreatePaddlePredictor(config);
std::vector<PaddleTensor> outputs; std::vector<PaddleTensor> outputs;
predictor->Run({}, &outputs); predictor->Run({}, &outputs);
predictor->TryShrinkMemory();
} }
TEST(paddle_inference_api, get_version) { TEST(paddle_inference_api, get_version) {
......
...@@ -319,6 +319,17 @@ class PD_INFER_DECL PaddlePredictor { ...@@ -319,6 +319,17 @@ class PD_INFER_DECL PaddlePredictor {
/// ///
virtual void ClearIntermediateTensor() {} virtual void ClearIntermediateTensor() {}
///
/// \brief Release all tmp tensor to compress the size of the memory pool.
/// The memory pool is considered to be composed of a list of chunks, if
/// the chunk is not occupied, it can be released.
///
/// \return Number of bytes released. It may be smaller than the actual
/// released memory, because part of the memory is not managed by the
/// MemoryPool.
///
virtual uint64_t TryShrinkMemory() { return 0; }
/// \brief Clone an existing predictor /// \brief Clone an existing predictor
/// When using clone, the same network will be created, /// When using clone, the same network will be created,
/// and the parameters between them are shared. /// and the parameters between them are shared.
......
...@@ -224,6 +224,17 @@ class PD_INFER_DECL Predictor { ...@@ -224,6 +224,17 @@ class PD_INFER_DECL Predictor {
/// \brief Clear the intermediate tensors of the predictor /// \brief Clear the intermediate tensors of the predictor
void ClearIntermediateTensor(); void ClearIntermediateTensor();
///
/// \brief Release all tmp tensor to compress the size of the memory pool.
/// The memory pool is considered to be composed of a list of chunks, if
/// the chunk is not occupied, it can be released.
///
/// \return Number of bytes released. It may be smaller than the actual
/// released memory, because part of the memory is not managed by the
/// MemoryPool.
///
uint64_t TryShrinkMemory();
private: private:
std::unique_ptr<paddle::PaddlePredictor> predictor_; std::unique_ptr<paddle::PaddlePredictor> predictor_;
}; };
......
...@@ -566,6 +566,7 @@ void BindAnalysisPredictor(py::module *m) { ...@@ -566,6 +566,7 @@ void BindAnalysisPredictor(py::module *m) {
.def("zero_copy_run", &AnalysisPredictor::ZeroCopyRun) .def("zero_copy_run", &AnalysisPredictor::ZeroCopyRun)
.def("clear_intermediate_tensor", .def("clear_intermediate_tensor",
&AnalysisPredictor::ClearIntermediateTensor) &AnalysisPredictor::ClearIntermediateTensor)
.def("try_shrink_memory", &AnalysisPredictor::TryShrinkMemory)
.def("create_feed_fetch_var", &AnalysisPredictor::CreateFeedFetchVar) .def("create_feed_fetch_var", &AnalysisPredictor::CreateFeedFetchVar)
.def("prepare_feed_fetch", &AnalysisPredictor::PrepareFeedFetch) .def("prepare_feed_fetch", &AnalysisPredictor::PrepareFeedFetch)
.def("prepare_argument", &AnalysisPredictor::PrepareArgument) .def("prepare_argument", &AnalysisPredictor::PrepareArgument)
...@@ -593,6 +594,7 @@ void BindPaddleInferPredictor(py::module *m) { ...@@ -593,6 +594,7 @@ void BindPaddleInferPredictor(py::module *m) {
.def("get_output_handle", &paddle_infer::Predictor::GetOutputHandle) .def("get_output_handle", &paddle_infer::Predictor::GetOutputHandle)
.def("run", &paddle_infer::Predictor::Run) .def("run", &paddle_infer::Predictor::Run)
.def("clone", &paddle_infer::Predictor::Clone) .def("clone", &paddle_infer::Predictor::Clone)
.def("try_shrink_memory", &paddle_infer::Predictor::TryShrinkMemory)
.def("clear_intermediate_tensor", .def("clear_intermediate_tensor",
&paddle_infer::Predictor::ClearIntermediateTensor); &paddle_infer::Predictor::ClearIntermediateTensor);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册