From 0b09e61aa103daf4f42f8f1cbdc5a17199fafbc4 Mon Sep 17 00:00:00 2001 From: hjchen2 Date: Mon, 18 Mar 2019 21:25:32 +0800 Subject: [PATCH] Add memory optimize pass --- src/framework/executor.cpp | 2 + src/framework/program/program_desc.cpp | 2 +- src/framework/program/program_desc.h | 19 +-- src/framework/tensor.h | 3 +- .../kernel/arm/convolution/conv_common.cpp | 2 +- src/pass/memory_optimize.cpp | 134 ++++++++++++++++++ src/pass/memory_optimize.h | 67 +++++++++ 7 files changed, 218 insertions(+), 11 deletions(-) create mode 100644 src/pass/memory_optimize.cpp create mode 100644 src/pass/memory_optimize.h diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp index a15c0e6b4e..750c0da540 100644 --- a/src/framework/executor.cpp +++ b/src/framework/executor.cpp @@ -28,6 +28,7 @@ limitations under the License. */ #include "framework/scope.h" #include "framework/tensor.h" #include "memory/t_malloc.h" +#include "pass/memory_optimize.h" #ifdef PADDLE_MOBILE_CL #include "framework/cl/cl_image.h" @@ -62,6 +63,7 @@ Executor::Executor(const Program &program, use_optimize_ ? program_.optimizeProgram : program_.originProgram; PADDLE_MOBILE_ENFORCE(program_desc_ != nullptr, "program_desc_ should not be nullptr"); + pass::MemoryOptPass()(program_desc_.get(), program_.scope.get()); // resize feed and fetch list // should init feed and fetch variables before infer shape InitFeedFetchList(); diff --git a/src/framework/program/program_desc.cpp b/src/framework/program/program_desc.cpp index b66c7a0dcf..23781fe779 100644 --- a/src/framework/program/program_desc.cpp +++ b/src/framework/program/program_desc.cpp @@ -46,7 +46,7 @@ ProgramDesc::ProgramDesc(PaddleMobile__Framework__Proto__ProgramDesc *desc) { } } -void ProgramDesc::Description(std::string header) { +void ProgramDesc::Description(std::string header) const { #ifdef PADDLE_MOBILE_DEBUG if (header.size()) { LOG(kLOG_INFO) << header; diff --git a/src/framework/program/program_desc.h b/src/framework/program/program_desc.h index 5c75c91522..f4551509ee 100644 --- a/src/framework/program/program_desc.h +++ b/src/framework/program/program_desc.h @@ -30,6 +30,14 @@ class ProgramDesc { friend class ProgramOptimize; explicit ProgramDesc(PaddleMobile__Framework__Proto__ProgramDesc *desc); + ProgramDesc(const ProgramDesc &program_desc) { + for (auto &block : program_desc.blocks_) { + std::shared_ptr copy_block = + std::make_shared(*block); + blocks_.push_back(copy_block); + } + } + std::shared_ptr Block(size_t idx); BlockDesc *MutableBlock(size_t idx) { @@ -40,16 +48,11 @@ class ProgramDesc { } } - const std::vector> &Blocks() { return blocks_; } - ProgramDesc(const ProgramDesc &program_desc) { - for (auto &block : program_desc.blocks_) { - std::shared_ptr copy_block = - std::make_shared(*block); - blocks_.push_back(copy_block); - } + const std::vector> &Blocks() const { + return blocks_; } - void Description(std::string header = ""); + void Description(std::string header = "") const; private: std::vector> blocks_; diff --git a/src/framework/tensor.h b/src/framework/tensor.h index 24f09662ea..63f074f4af 100644 --- a/src/framework/tensor.h +++ b/src/framework/tensor.h @@ -69,7 +69,8 @@ class Tensor : public TensorBase { inline Tensor &ShareDataWith(const Tensor &src) { src.check_memory_size(); if (holder_.get() != src.holder_.get()) { - *this = src; + // *this = src; + holder_ = src.holder_; } return *this; } diff --git a/src/operators/kernel/arm/convolution/conv_common.cpp b/src/operators/kernel/arm/convolution/conv_common.cpp index 2a3a5e17e1..b0d6c4a5d1 100644 --- a/src/operators/kernel/arm/convolution/conv_common.cpp +++ b/src/operators/kernel/arm/convolution/conv_common.cpp @@ -52,7 +52,7 @@ void InitBaseConvKernel(ConvParam *param) { } else if (depth5x5 && param->Strides()[0] == param->Strides()[1] && param->Strides()[0] == 1) { param->ExecMode() = ConvParam::EXEC_DEPTHWISE5x5_FLOAT; - } else if (conv3x3 && !depth3x3 && + } else if (conv3x3 && param->Groups() == 1 && param->Strides()[0] == param->Strides()[1] && param->Dilations()[0] == param->Dilations()[1] && param->Strides()[0] == 1 && param->Dilations()[0] == 1 diff --git a/src/pass/memory_optimize.cpp b/src/pass/memory_optimize.cpp new file mode 100644 index 0000000000..7da698866b --- /dev/null +++ b/src/pass/memory_optimize.cpp @@ -0,0 +1,134 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "pass/memory_optimize.h" +#include "framework/lod_tensor.h" + +namespace paddle_mobile { +namespace pass { + +void MemoryOptPass::InitBlockVars(const framework::BlockDesc *block) { + block_vars_.clear(); + for (const auto var : block->Vars()) { + block_vars_[var->Name()] = var.get(); + } +} + +bool MemoryOptPass::IsPersistable(const std::string name) { + const auto it = block_vars_.find(name); + if (it != block_vars_.end()) { + return it->second->Persistable(); + } + return false; +} + +VarNode *MemoryOptPass::CreateNode(const std::string name) { + auto it = created_nodes_.find(name); + if (it != created_nodes_.end()) { + ++(it->second->count); + return it->second; + } + VarNode *var = new VarNode; + var->name = name; + var->count = 1; + var->visited = false; + created_nodes_[name] = var; + return var; +} + +void MemoryOptPass::operator()(const framework::ProgramDesc *program, + framework::Scope *scope) { + const auto &blocks = program->Blocks(); + for (const auto &block : blocks) { + // access all variables in block, and stored in map + InitBlockVars(block.get()); + + visited_nodes_.clear(); + reused_nodes_.clear(); + // collect all not persistable variables, and accumulate + // it's reference count + std::stack empty_var_nodes; + analysis_nodes_.swap(empty_var_nodes); + + for (const auto &op : block->Ops()) { + DLOG << "op_desc->Type(): " << op->Type(); + const auto &outputs_map = op->GetOutputs(); + for (const auto &outputs : outputs_map) { + for (const auto &output : outputs.second) { + if (!IsPersistable(output)) { + DLOG << "output: " << output; + VarNode *node = CreateNode(output); + analysis_nodes_.push(node); + } + } + } + const auto &inputs_map = op->GetInputs(); + for (const auto &inputs : inputs_map) { + for (const auto &input : inputs.second) { + if (!IsPersistable(input)) { + DLOG << "input: " << input; + VarNode *node = CreateNode(input); + analysis_nodes_.push(node); + } + } + } + } + + // apply optimize + while (!analysis_nodes_.empty()) { + auto *node = analysis_nodes_.top(); + analysis_nodes_.pop(); + // only not visited node can reuse memory between other nodes + // with 0 count which indicate they will not be used any more + if (!node->visited) { + bool reused = false; + // find out a possable reuse list + for (auto &list : reused_nodes_) { + if (list.back()->count == 0) { + list.push_back(node); + reused = true; + break; + } + } + // create new list if can't find a reused list + if (!reused) { + std::vector list; + list.push_back(node); + reused_nodes_.push_back(std::move(list)); + } + } + node->visited = true; + node->count -= 1; + } + } + // shared data within all variables in the same reused list + for (const auto &list : reused_nodes_) { + DLOG << "\n"; + DLOG << "share data within these variables"; + std::string name = list[0]->name; + auto *reused_var = scope->Var(name); + auto *reuse_tensor = + reused_var->template GetMutable(); + reuse_tensor->mutable_data(); + for (const auto &node : list) { + DLOG << node->name; + auto *var = scope->Var(node->name); + auto *tensor = var->template GetMutable(); + tensor->ShareDataWith(*reuse_tensor); + } + } +} + +} // namespace pass +} // namespace paddle_mobile diff --git a/src/pass/memory_optimize.h b/src/pass/memory_optimize.h new file mode 100644 index 0000000000..f4e9b6c851 --- /dev/null +++ b/src/pass/memory_optimize.h @@ -0,0 +1,67 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include +#include "framework/program/program.h" + +namespace paddle_mobile { +namespace pass { + +typedef struct { + std::string name; // variable name + int count; // reference count + bool visited; +} VarNode; + +class PassBase { + public: + PassBase() {} + virtual ~PassBase() {} +}; + +// MemoryOptPass will analyze the program, and reuse memory between +// variables as much as possible +class MemoryOptPass : public PassBase { + public: + MemoryOptPass() {} + virtual ~MemoryOptPass() { + for (auto &it : created_nodes_) { + delete it.second; + } + } + + void operator()(const framework::ProgramDesc *program, + framework::Scope *scope); + + void InitBlockVars(const framework::BlockDesc *block); + + bool IsPersistable(const std::string name); + + VarNode *CreateNode(const std::string name); + + private: + std::stack analysis_nodes_; + std::vector> reused_nodes_; + std::unordered_map created_nodes_; + std::unordered_map visited_nodes_; + std::unordered_map block_vars_; +}; + +} // namespace pass +} // namespace paddle_mobile -- GitLab