未验证 提交 1bf48365 编写于 作者: W Wilber 提交者: GitHub

[Inference] Add TryShrinkMemory interface. (#28409)

上级 26d292b1
......@@ -175,7 +175,10 @@ bool AnalysisPredictor::PrepareScope(
status_is_cloned_ = true;
} else {
paddle::framework::InitDevices(false);
scope_.reset(new paddle::framework::Scope());
scope_.reset(new paddle::framework::Scope(), [&](framework::Scope *scope) {
delete scope;
memory::Release(place_);
});
status_is_cloned_ = false;
}
sub_scope_ = &scope_->NewScope();
......@@ -591,7 +594,6 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
gflags.push_back("--allocator_strategy=thread_local");
process_level_allocator_enabled = false;
} else {
gflags.push_back("--allocator_strategy=naive_best_fit");
process_level_allocator_enabled = true;
}
......@@ -890,6 +892,11 @@ bool AnalysisPredictor::LoadParameters() {
return true;
}
uint64_t AnalysisPredictor::TryShrinkMemory() {
ClearIntermediateTensor();
return paddle::memory::Release(place_);
}
void AnalysisPredictor::ClearIntermediateTensor() {
PADDLE_ENFORCE_NOT_NULL(inference_program_.get(),
platform::errors::PreconditionNotMet(
......@@ -985,6 +992,8 @@ AnalysisPredictor::~AnalysisPredictor() {
mkldnn_quantizer_ = nullptr;
}
#endif
memory::Release(place_);
}
std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone() {
......@@ -1142,6 +1151,8 @@ void Predictor::ClearIntermediateTensor() {
predictor_->ClearIntermediateTensor();
}
uint64_t Predictor::TryShrinkMemory() { return predictor_->TryShrinkMemory(); }
int GetNumBytesOfDataType(DataType dtype) {
switch (dtype) {
case DataType::FLOAT32:
......
......@@ -193,6 +193,17 @@ class AnalysisPredictor : public PaddlePredictor {
///
void ClearIntermediateTensor();
///
/// \brief Release all tmp tensor to compress the size of the memory pool.
/// The memory pool is considered to be composed of a list of chunks, if
/// the chunk is not occupied, it can be released.
///
/// \return Number of bytes released. It may be smaller than the actual
/// released memory, because part of the memory is not managed by the
/// MemoryPool.
///
uint64_t TryShrinkMemory() override;
///
/// \brief Get the argument used by predictor
///
......
......@@ -135,6 +135,7 @@ TEST(AnalysisPredictor, ZeroCopy) {
auto* out_data = out->data<float>(&place, &size);
LOG(INFO) << "output size: " << size / sizeof(float);
LOG(INFO) << "output_data: " << out_data;
predictor->TryShrinkMemory();
}
TEST(AnalysisPredictor, Clone) {
......@@ -253,8 +254,7 @@ class MkldnnQuantizerTest : public testing::Test {
public:
MkldnnQuantizerTest() {
AnalysisConfig config(FLAGS_dirname);
predictor.reset(new AnalysisPredictor(config));
predictor = std::move(CreatePaddlePredictor(config));
auto* predictor_p = static_cast<AnalysisPredictor*>(predictor.get());
auto qconfig = new MkldnnQuantizerConfig();
......@@ -507,3 +507,45 @@ TEST(AnalysisPredictor, bf16_pass_strategy) {
}
} // namespace paddle
namespace paddle_infer {
TEST(Predictor, Run) {
Config config;
config.SetModel(FLAGS_dirname);
auto predictor = CreatePredictor(config);
auto w0 = predictor->GetInputHandle("firstw");
auto w1 = predictor->GetInputHandle("secondw");
auto w2 = predictor->GetInputHandle("thirdw");
auto w3 = predictor->GetInputHandle("forthw");
w0->Reshape({4, 1});
w1->Reshape({4, 1});
w2->Reshape({4, 1});
w3->Reshape({4, 1});
auto* w0_data = w0->mutable_data<int64_t>(PlaceType::kCPU);
auto* w1_data = w1->mutable_data<int64_t>(PlaceType::kCPU);
auto* w2_data = w2->mutable_data<int64_t>(PlaceType::kCPU);
auto* w3_data = w3->mutable_data<int64_t>(PlaceType::kCPU);
for (int i = 0; i < 4; i++) {
w0_data[i] = i;
w1_data[i] = i;
w2_data[i] = i;
w3_data[i] = i;
}
predictor->Run();
auto out = predictor->GetOutputHandle("fc_1.tmp_2");
PlaceType place;
int size = 0;
out->data<float>(&place, &size);
LOG(INFO) << "output size: " << size / sizeof(float);
predictor->TryShrinkMemory();
}
} // namespace paddle_infer
......@@ -60,6 +60,7 @@ TEST(paddle_inference_api, demo) {
auto predictor = CreatePaddlePredictor(config);
std::vector<PaddleTensor> outputs;
predictor->Run({}, &outputs);
predictor->TryShrinkMemory();
}
TEST(paddle_inference_api, get_version) {
......
......@@ -319,6 +319,17 @@ class PD_INFER_DECL PaddlePredictor {
///
virtual void ClearIntermediateTensor() {}
///
/// \brief Release all tmp tensor to compress the size of the memory pool.
/// The memory pool is considered to be composed of a list of chunks, if
/// the chunk is not occupied, it can be released.
///
/// \return Number of bytes released. It may be smaller than the actual
/// released memory, because part of the memory is not managed by the
/// MemoryPool.
///
virtual uint64_t TryShrinkMemory() { return 0; }
/// \brief Clone an existing predictor
/// When using clone, the same network will be created,
/// and the parameters between them are shared.
......
......@@ -224,6 +224,17 @@ class PD_INFER_DECL Predictor {
/// \brief Clear the intermediate tensors of the predictor
void ClearIntermediateTensor();
///
/// \brief Release all tmp tensor to compress the size of the memory pool.
/// The memory pool is considered to be composed of a list of chunks, if
/// the chunk is not occupied, it can be released.
///
/// \return Number of bytes released. It may be smaller than the actual
/// released memory, because part of the memory is not managed by the
/// MemoryPool.
///
uint64_t TryShrinkMemory();
private:
std::unique_ptr<paddle::PaddlePredictor> predictor_;
};
......
......@@ -566,6 +566,7 @@ void BindAnalysisPredictor(py::module *m) {
.def("zero_copy_run", &AnalysisPredictor::ZeroCopyRun)
.def("clear_intermediate_tensor",
&AnalysisPredictor::ClearIntermediateTensor)
.def("try_shrink_memory", &AnalysisPredictor::TryShrinkMemory)
.def("create_feed_fetch_var", &AnalysisPredictor::CreateFeedFetchVar)
.def("prepare_feed_fetch", &AnalysisPredictor::PrepareFeedFetch)
.def("prepare_argument", &AnalysisPredictor::PrepareArgument)
......@@ -593,6 +594,7 @@ void BindPaddleInferPredictor(py::module *m) {
.def("get_output_handle", &paddle_infer::Predictor::GetOutputHandle)
.def("run", &paddle_infer::Predictor::Run)
.def("clone", &paddle_infer::Predictor::Clone)
.def("try_shrink_memory", &paddle_infer::Predictor::TryShrinkMemory)
.def("clear_intermediate_tensor",
&paddle_infer::Predictor::ClearIntermediateTensor);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册