From ba8d089d18d457d1aa30b6c7741e0a91905defe0 Mon Sep 17 00:00:00 2001 From: hjchen2 Date: Tue, 19 Mar 2019 12:15:12 +0800 Subject: [PATCH] Refine memory optimize --- src/framework/executor.cpp | 8 ++++---- src/framework/tensor.h | 22 ++++++++++++++++++++-- src/framework/tensor_base.h | 2 ++ src/pass/memory_optimize.cpp | 18 ++++++++++++------ src/pass/memory_optimize.h | 1 - 5 files changed, 38 insertions(+), 13 deletions(-) diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp index 750c0da540..f1d50d6e99 100644 --- a/src/framework/executor.cpp +++ b/src/framework/executor.cpp @@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "framework/executor.h" #include #include #include #include "common/enforce.h" #include "common/log.h" +#include "memory/t_malloc.h" #include "framework/context.h" #include "framework/framework.pb-c.h" #include "framework/lod_tensor.h" @@ -27,9 +27,8 @@ limitations under the License. */ #include "framework/program/var_desc.h" #include "framework/scope.h" #include "framework/tensor.h" -#include "memory/t_malloc.h" +#include "framework/executor.h" #include "pass/memory_optimize.h" - #ifdef PADDLE_MOBILE_CL #include "framework/cl/cl_image.h" #endif @@ -217,6 +216,7 @@ void Executor::InitMemory() { var->template GetMutable(); continue; } + DLOG << "init persistable var: " << var_desc->Name(); char *origin_data = ReadFileToBuff(program_.model_path + "/" + var_desc->Name()); char *data = origin_data; @@ -329,7 +329,6 @@ bool Executor::varInputMemory( if (type == VARTYPE_TYPE_LOD_TENSOR) { auto data_type = var_desc->Tensor_desc().DataType(); framework::LoDTensor *tensor = var->template GetMutable(); - tensor->mutable_data(TypeId(data_type)); } else if (type == VARTYPE_TYPE_STEP_SCOPES) { std::vector *step_scopes = var->template GetMutable>(); @@ -465,6 +464,7 @@ PMStatus Executor::Predict() { clock_gettime(CLOCK_MONOTONIC, &ts); profile[op_index].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec; #endif + DLOG << "run op: " << op_handler->Type(); if (lod_mode_) { op_handler->InferShape(); } diff --git a/src/framework/tensor.h b/src/framework/tensor.h index 63f074f4af..8b56dc77c9 100644 --- a/src/framework/tensor.h +++ b/src/framework/tensor.h @@ -28,6 +28,8 @@ limitations under the License. */ #include "framework/tensor_base.h" #include "memory/t_malloc.h" +#include + namespace paddle_mobile { namespace framework { @@ -69,7 +71,6 @@ class Tensor : public TensorBase { inline Tensor &ShareDataWith(const Tensor &src) { src.check_memory_size(); if (holder_.get() != src.holder_.get()) { - // *this = src; holder_ = src.holder_; } return *this; @@ -82,7 +83,13 @@ class Tensor : public TensorBase { PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor's numel must >=0.") int64_t size = numel() * SizeOfType(type); if (holder_ == nullptr || holder_->size() < size + offset_) { - holder_.reset(new PlaceholderImpl(size, type)); + if (holder_ == nullptr) { + std::cout << "reset holder... size " << size << std::endl; + holder_.reset(new PlaceholderImpl(size, type)); + } else { + std::cout << "resize holder... size " << size << std::endl; + holder_->resize(size); + } offset_ = 0; } return reinterpret_cast( @@ -181,6 +188,7 @@ class Tensor : public TensorBase { : ptr_(static_cast(memory::Alloc(size)), memory::PODDeleter()), size_(size), + capatity_(size), type_(type) { PADDLE_MOBILE_ENFORCE(ptr_ != nullptr, "Insufficient memory to allocation"); @@ -194,11 +202,21 @@ class Tensor : public TensorBase { virtual void set_type(std::type_index type) { type_ = type; } + virtual void resize(size_t size) { + if (size > capatity_) { + capatity_ = size; + ptr_.reset(static_cast(memory::Alloc(capatity_))); + } + size_ = size; + } + std::unique_ptr> ptr_; /*! the size of memory block. */ size_t size_; + size_t capatity_; + /* the current type of memory */ std::type_index type_; }; diff --git a/src/framework/tensor_base.h b/src/framework/tensor_base.h index b41d7786c1..e5ab7793c0 100644 --- a/src/framework/tensor_base.h +++ b/src/framework/tensor_base.h @@ -117,6 +117,8 @@ class TensorBase { virtual std::type_index type() const = 0; virtual void set_type(std::type_index type) = 0; + + virtual void resize(size_t size) = 0; }; /** diff --git a/src/pass/memory_optimize.cpp b/src/pass/memory_optimize.cpp index 7da698866b..68c7d442f8 100644 --- a/src/pass/memory_optimize.cpp +++ b/src/pass/memory_optimize.cpp @@ -54,7 +54,6 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program, // access all variables in block, and stored in map InitBlockVars(block.get()); - visited_nodes_.clear(); reused_nodes_.clear(); // collect all not persistable variables, and accumulate // it's reference count @@ -63,8 +62,7 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program, for (const auto &op : block->Ops()) { DLOG << "op_desc->Type(): " << op->Type(); - const auto &outputs_map = op->GetOutputs(); - for (const auto &outputs : outputs_map) { + for (const auto &outputs : op->GetOutputs()) { for (const auto &output : outputs.second) { if (!IsPersistable(output)) { DLOG << "output: " << output; @@ -73,8 +71,7 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program, } } } - const auto &inputs_map = op->GetInputs(); - for (const auto &inputs : inputs_map) { + for (const auto &inputs : op->GetInputs()) { for (const auto &input : inputs.second) { if (!IsPersistable(input)) { DLOG << "input: " << input; @@ -83,6 +80,15 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program, } } } + for (const auto &outputs : op->GetOutputs()) { + for (const auto &output : outputs.second) { + if (!IsPersistable(output)) { + DLOG << "output: " << output; + VarNode *node = CreateNode(output); + analysis_nodes_.push(node); + } + } + } } // apply optimize @@ -115,7 +121,7 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program, // shared data within all variables in the same reused list for (const auto &list : reused_nodes_) { DLOG << "\n"; - DLOG << "share data within these variables"; + DLOG << "share memory within these variables"; std::string name = list[0]->name; auto *reused_var = scope->Var(name); auto *reuse_tensor = diff --git a/src/pass/memory_optimize.h b/src/pass/memory_optimize.h index f4e9b6c851..466af72e46 100644 --- a/src/pass/memory_optimize.h +++ b/src/pass/memory_optimize.h @@ -59,7 +59,6 @@ class MemoryOptPass : public PassBase { std::stack analysis_nodes_; std::vector> reused_nodes_; std::unordered_map created_nodes_; - std::unordered_map visited_nodes_; std::unordered_map block_vars_; }; -- GitLab