提交 0b09e61a 编写于 作者: H hjchen2

Add memory optimize pass

上级 ba7458fa
......@@ -28,6 +28,7 @@ limitations under the License. */
#include "framework/scope.h"
#include "framework/tensor.h"
#include "memory/t_malloc.h"
#include "pass/memory_optimize.h"
#ifdef PADDLE_MOBILE_CL
#include "framework/cl/cl_image.h"
......@@ -62,6 +63,7 @@ Executor<Device, T>::Executor(const Program<Device> &program,
use_optimize_ ? program_.optimizeProgram : program_.originProgram;
PADDLE_MOBILE_ENFORCE(program_desc_ != nullptr,
"program_desc_ should not be nullptr");
pass::MemoryOptPass()(program_desc_.get(), program_.scope.get());
// resize feed and fetch list
// should init feed and fetch variables before infer shape
InitFeedFetchList();
......
......@@ -46,7 +46,7 @@ ProgramDesc::ProgramDesc(PaddleMobile__Framework__Proto__ProgramDesc *desc) {
}
}
void ProgramDesc::Description(std::string header) {
void ProgramDesc::Description(std::string header) const {
#ifdef PADDLE_MOBILE_DEBUG
if (header.size()) {
LOG(kLOG_INFO) << header;
......
......@@ -30,6 +30,14 @@ class ProgramDesc {
friend class ProgramOptimize;
explicit ProgramDesc(PaddleMobile__Framework__Proto__ProgramDesc *desc);
ProgramDesc(const ProgramDesc &program_desc) {
for (auto &block : program_desc.blocks_) {
std::shared_ptr<BlockDesc> copy_block =
std::make_shared<BlockDesc>(*block);
blocks_.push_back(copy_block);
}
}
std::shared_ptr<BlockDesc> Block(size_t idx);
BlockDesc *MutableBlock(size_t idx) {
......@@ -40,16 +48,11 @@ class ProgramDesc {
}
}
const std::vector<std::shared_ptr<BlockDesc>> &Blocks() { return blocks_; }
ProgramDesc(const ProgramDesc &program_desc) {
for (auto &block : program_desc.blocks_) {
std::shared_ptr<BlockDesc> copy_block =
std::make_shared<BlockDesc>(*block);
blocks_.push_back(copy_block);
}
const std::vector<std::shared_ptr<BlockDesc>> &Blocks() const {
return blocks_;
}
void Description(std::string header = "");
void Description(std::string header = "") const;
private:
std::vector<std::shared_ptr<BlockDesc>> blocks_;
......
......@@ -69,7 +69,8 @@ class Tensor : public TensorBase {
inline Tensor &ShareDataWith(const Tensor &src) {
src.check_memory_size();
if (holder_.get() != src.holder_.get()) {
*this = src;
// *this = src;
holder_ = src.holder_;
}
return *this;
}
......
......@@ -52,7 +52,7 @@ void InitBaseConvKernel(ConvParam<CPU> *param) {
} else if (depth5x5 && param->Strides()[0] == param->Strides()[1] &&
param->Strides()[0] == 1) {
param->ExecMode() = ConvParam<CPU>::EXEC_DEPTHWISE5x5_FLOAT;
} else if (conv3x3 && !depth3x3 &&
} else if (conv3x3 && param->Groups() == 1 &&
param->Strides()[0] == param->Strides()[1] &&
param->Dilations()[0] == param->Dilations()[1] &&
param->Strides()[0] == 1 && param->Dilations()[0] == 1
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "pass/memory_optimize.h"
#include "framework/lod_tensor.h"
namespace paddle_mobile {
namespace pass {
void MemoryOptPass::InitBlockVars(const framework::BlockDesc *block) {
block_vars_.clear();
for (const auto var : block->Vars()) {
block_vars_[var->Name()] = var.get();
}
}
bool MemoryOptPass::IsPersistable(const std::string name) {
const auto it = block_vars_.find(name);
if (it != block_vars_.end()) {
return it->second->Persistable();
}
return false;
}
VarNode *MemoryOptPass::CreateNode(const std::string name) {
auto it = created_nodes_.find(name);
if (it != created_nodes_.end()) {
++(it->second->count);
return it->second;
}
VarNode *var = new VarNode;
var->name = name;
var->count = 1;
var->visited = false;
created_nodes_[name] = var;
return var;
}
void MemoryOptPass::operator()(const framework::ProgramDesc *program,
framework::Scope *scope) {
const auto &blocks = program->Blocks();
for (const auto &block : blocks) {
// access all variables in block, and stored in map
InitBlockVars(block.get());
visited_nodes_.clear();
reused_nodes_.clear();
// collect all not persistable variables, and accumulate
// it's reference count
std::stack<VarNode *> empty_var_nodes;
analysis_nodes_.swap(empty_var_nodes);
for (const auto &op : block->Ops()) {
DLOG << "op_desc->Type(): " << op->Type();
const auto &outputs_map = op->GetOutputs();
for (const auto &outputs : outputs_map) {
for (const auto &output : outputs.second) {
if (!IsPersistable(output)) {
DLOG << "output: " << output;
VarNode *node = CreateNode(output);
analysis_nodes_.push(node);
}
}
}
const auto &inputs_map = op->GetInputs();
for (const auto &inputs : inputs_map) {
for (const auto &input : inputs.second) {
if (!IsPersistable(input)) {
DLOG << "input: " << input;
VarNode *node = CreateNode(input);
analysis_nodes_.push(node);
}
}
}
}
// apply optimize
while (!analysis_nodes_.empty()) {
auto *node = analysis_nodes_.top();
analysis_nodes_.pop();
// only not visited node can reuse memory between other nodes
// with 0 count which indicate they will not be used any more
if (!node->visited) {
bool reused = false;
// find out a possable reuse list
for (auto &list : reused_nodes_) {
if (list.back()->count == 0) {
list.push_back(node);
reused = true;
break;
}
}
// create new list if can't find a reused list
if (!reused) {
std::vector<VarNode *> list;
list.push_back(node);
reused_nodes_.push_back(std::move(list));
}
}
node->visited = true;
node->count -= 1;
}
}
// shared data within all variables in the same reused list
for (const auto &list : reused_nodes_) {
DLOG << "\n";
DLOG << "share data within these variables";
std::string name = list[0]->name;
auto *reused_var = scope->Var(name);
auto *reuse_tensor =
reused_var->template GetMutable<framework::LoDTensor>();
reuse_tensor->mutable_data<float>();
for (const auto &node : list) {
DLOG << node->name;
auto *var = scope->Var(node->name);
auto *tensor = var->template GetMutable<framework::LoDTensor>();
tensor->ShareDataWith(*reuse_tensor);
}
}
}
} // namespace pass
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <stack>
#include <string>
#include <unordered_map>
#include <vector>
#include "framework/program/program.h"
namespace paddle_mobile {
namespace pass {
typedef struct {
std::string name; // variable name
int count; // reference count
bool visited;
} VarNode;
class PassBase {
public:
PassBase() {}
virtual ~PassBase() {}
};
// MemoryOptPass will analyze the program, and reuse memory between
// variables as much as possible
class MemoryOptPass : public PassBase {
public:
MemoryOptPass() {}
virtual ~MemoryOptPass() {
for (auto &it : created_nodes_) {
delete it.second;
}
}
void operator()(const framework::ProgramDesc *program,
framework::Scope *scope);
void InitBlockVars(const framework::BlockDesc *block);
bool IsPersistable(const std::string name);
VarNode *CreateNode(const std::string name);
private:
std::stack<VarNode *> analysis_nodes_;
std::vector<std::vector<VarNode *>> reused_nodes_;
std::unordered_map<std::string, VarNode *> created_nodes_;
std::unordered_map<std::string, VarNode *> visited_nodes_;
std::unordered_map<std::string, framework::VarDesc *> block_vars_;
};
} // namespace pass
} // namespace paddle_mobile
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册