提交 a66ee2d5 编写于 作者: N NazgulLee 提交者: Jiaying Zhao

1. reverse commit 2402029d; 2. only adjust memory when dims size equal to 4 (#1787)

上级 a7646bb6
...@@ -29,6 +29,7 @@ limitations under the License. */ ...@@ -29,6 +29,7 @@ limitations under the License. */
#include "framework/scope.h" #include "framework/scope.h"
#include "framework/tensor.h" #include "framework/tensor.h"
#include "memory/t_malloc.h" #include "memory/t_malloc.h"
#include "pass/memory_optimize.h"
#include "pass/model_obfuscate.h" #include "pass/model_obfuscate.h"
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
#include "framework/cl/cl_image.h" #include "framework/cl/cl_image.h"
...@@ -66,9 +67,8 @@ Executor<Device, T>::Executor(const Program<Device> &program, ...@@ -66,9 +67,8 @@ Executor<Device, T>::Executor(const Program<Device> &program,
#if !defined(PADDLE_MOBILE_FPGA) && !defined(PADDLE_MOBILE_FPGA_KD) && \ #if !defined(PADDLE_MOBILE_FPGA) && !defined(PADDLE_MOBILE_FPGA_KD) && \
!defined(PADDLE_MOBILE_CL) !defined(PADDLE_MOBILE_CL)
if (config_.memory_optimization_level != NoMemoryOptimization) { if (config_.memory_optimization_level != NoMemoryOptimization) {
memoryOpt_ = std::make_shared<pass::MemoryOptPass>(); pass::MemoryOptPass()(program_desc_.get(), program_.scope.get(),
(*memoryOpt_)(program_desc_.get(), program_.scope.get(), config_.memory_optimization_level);
config_.memory_optimization_level);
} }
#endif #endif
// resize feed and fetch list // resize feed and fetch list
...@@ -296,34 +296,32 @@ static void ClearNoPersistableTensorArray(const framework::ProgramDesc *program, ...@@ -296,34 +296,32 @@ static void ClearNoPersistableTensorArray(const framework::ProgramDesc *program,
template <typename Device, typename T> template <typename Device, typename T>
void Executor<Device, T>::InitNoPersistableMemory(const Tensor &input_tensor) { void Executor<Device, T>::InitNoPersistableMemory(const Tensor &input_tensor) {
if (input_tensor.dims().size() != 4) {
return;
}
for (const auto &block : program_desc_->Blocks()) { for (const auto &block : program_desc_->Blocks()) {
for (const auto &var_desc : block->Vars()) { for (const auto &var_desc : block->Vars()) {
auto var = program_.scope->Var(var_desc->Name()); auto var = program_.scope->Var(var_desc->Name());
auto tensor = var->template GetMutable<LoDTensor>(); if (!var_desc->Persistable() &&
if (var_desc->Persistable()) { var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { DLOG << "InitNoPersistableMemory var " << var_desc->Name();
var->template GetMutable<framework::LoDTensorArray>(); auto tensor = var->template GetMutable<LoDTensor>();
continue; if (tensor->IsInitialized() && tensor->dims().size() == 4) {
} DLOG << "var's tensor is Initialized or dims size != 4";
} else {
if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
DDim tensor_dim = tensor->dims(); DDim tensor_dim = tensor->dims();
DDim new_dim = DDim new_dim =
make_ddim({tensor_dim[0], tensor_dim[1], input_tensor.dims()[2], make_ddim({tensor_dim[0], tensor_dim[1], input_tensor.dims()[2],
input_tensor.dims()[3]}); input_tensor.dims()[3]});
tensor->Resize(new_dim); tensor->Resize(new_dim);
tensor->template mutable_data<T>(); tensor->template mutable_data_new<T>();
DLOG << "var's tensor dims " << tensor_dim;
DLOG << "var's tensor new dims " << new_dim;
} else { } else {
PADDLE_MOBILE_THROW_EXCEPTION("Unsupported var type `%d`", DLOG << "var's tensor is not Initialized ???";
var_desc->Type());
} }
} }
} }
} }
std::shared_ptr<LoDTensor> output = GetOutput("fetch");
output->Resize(input_tensor.dims());
output->mutable_data<T>();
} }
template <typename Device, typename T> template <typename Device, typename T>
...@@ -411,7 +409,9 @@ void Executor<Device, T>::SetInput(const Tensor &input, ...@@ -411,7 +409,9 @@ void Executor<Device, T>::SetInput(const Tensor &input,
target.ShareDataWith(input); target.ShareDataWith(input);
if (feed_indices_.size() == 1) { if (feed_indices_.size() == 1) {
auto &dim = input.dims(); auto &dim = input.dims();
shouldAdjustMemory_ = (product(dim) < 0.9 * product(input_dim_last_)); if (lod_mode_ && product(dim) < 0.9 * product(input_dim_last_)) {
InitNoPersistableMemory(target);
}
input_dim_has_changed_ = input_dim_last_ != dim; input_dim_has_changed_ = input_dim_last_ != dim;
input_dim_last_ = static_cast<DDim>(dim); input_dim_last_ = static_cast<DDim>(dim);
} }
...@@ -433,7 +433,9 @@ void Executor<Device, T>::SetInput(const LoDTensor &input, ...@@ -433,7 +433,9 @@ void Executor<Device, T>::SetInput(const LoDTensor &input,
target.set_lod(input.lod()); target.set_lod(input.lod());
if (feed_indices_.size() == 1) { if (feed_indices_.size() == 1) {
auto &dim = input.dims(); auto &dim = input.dims();
shouldAdjustMemory_ = (product(dim) < 0.9 * product(input_dim_last_)); if (lod_mode_ && product(dim) < 0.9 * product(input_dim_last_)) {
InitNoPersistableMemory(target);
}
input_dim_has_changed_ = input_dim_last_ != dim; input_dim_has_changed_ = input_dim_last_ != dim;
input_dim_last_ = static_cast<DDim>(dim); input_dim_last_ = static_cast<DDim>(dim);
} }
...@@ -483,16 +485,7 @@ PMStatus Executor<Device, T>::Predict() { ...@@ -483,16 +485,7 @@ PMStatus Executor<Device, T>::Predict() {
// clear all no persistable tensor array since write_to_array // clear all no persistable tensor array since write_to_array
// is always push back a new tensor in the array // is always push back a new tensor in the array
ClearNoPersistableTensorArray(program_desc_.get(), program_.scope.get()); ClearNoPersistableTensorArray(program_desc_.get(), program_.scope.get());
if (lod_mode_ && input_dim_has_changed_) {
for (int i = 0; i < ops_of_block0_.size(); ++i) {
auto &op_handler = ops_of_block0_[i];
op_handler->InferShape();
}
if (memoryOpt_ != nullptr && shouldAdjustMemory_) {
shouldAdjustMemory_ = false;
memoryOpt_->AdjustMemory();
}
}
#ifdef PADDLE_MOBILE_PROFILE #ifdef PADDLE_MOBILE_PROFILE
std::vector<ProfInfo> profile(ops_of_block0_.size()); std::vector<ProfInfo> profile(ops_of_block0_.size());
struct timespec ts; struct timespec ts;
...@@ -503,12 +496,12 @@ PMStatus Executor<Device, T>::Predict() { ...@@ -503,12 +496,12 @@ PMStatus Executor<Device, T>::Predict() {
#ifdef PADDLE_MOBILE_PROFILE #ifdef PADDLE_MOBILE_PROFILE
clock_gettime(CLOCK_MONOTONIC, &ts); clock_gettime(CLOCK_MONOTONIC, &ts);
profile[op_index].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec; profile[op_index].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
// if (lod_mode_ && input_dim_has_changed_) {
// op_handler->InferShape();
// }
#endif #endif
DLOG << i << "th, " DLOG << i << "th, "
<< "run op: " << op_handler->Type(); << "run op: " << op_handler->Type();
if (lod_mode_ && input_dim_has_changed_) {
op_handler->InferShape();
}
op_handler->Run(); op_handler->Run();
#ifdef PADDLE_MOBILE_PROFILE #ifdef PADDLE_MOBILE_PROFILE
clock_gettime(CLOCK_MONOTONIC, &ts); clock_gettime(CLOCK_MONOTONIC, &ts);
......
...@@ -27,7 +27,6 @@ limitations under the License. */ ...@@ -27,7 +27,6 @@ limitations under the License. */
#include "framework/program/program.h" #include "framework/program/program.h"
#include "framework/tensor.h" #include "framework/tensor.h"
#include "framework/type_trait.h" #include "framework/type_trait.h"
#include "pass/memory_optimize.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
...@@ -105,9 +104,6 @@ class Executor { ...@@ -105,9 +104,6 @@ class Executor {
DDim input_dim_last_; DDim input_dim_last_;
bool input_dim_has_changed_ = true; bool input_dim_has_changed_ = true;
bool shouldAdjustMemory_ = false;
std::shared_ptr<pass::MemoryOptPass> memoryOpt_;
#ifdef PADDLE_MOBILE_PROFILE #ifdef PADDLE_MOBILE_PROFILE
typedef typename DtypeTensorTrait<Device>::gtype ProfileTensorType; typedef typename DtypeTensorTrait<Device>::gtype ProfileTensorType;
......
...@@ -104,14 +104,27 @@ class Tensor : public TensorBase { ...@@ -104,14 +104,27 @@ class Tensor : public TensorBase {
return *this; return *this;
} }
inline void mutable_data_new() { template <typename T>
inline T *mutable_data_new() {
static_assert(std::is_pod<T>::value, "T must be POD");
const kTypeId_t type = type_id<T>().hash_code();
if (holder_ != nullptr) { if (holder_ != nullptr) {
PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor's numel must >=0.") holder_->set_type(type);
int64_t size = numel() * SizeOfType(holder_->type()); }
if (holder_->size() != size + offset_) {
holder_->realloc(size + offset_); PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor's numel must >=0.")
int64_t size = numel() * SizeOfType(type);
if (holder_ == nullptr || holder_->size() != size + offset_) {
if (holder_ == nullptr) {
holder_.reset(new PlaceholderImpl(size, type));
} else {
holder_->realloc(size);
} }
offset_ = 0;
} }
return reinterpret_cast<T *>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
offset_);
} }
inline void *mutable_data(const kTypeId_t type) { inline void *mutable_data(const kTypeId_t type) {
......
...@@ -57,7 +57,6 @@ void MemoryOptPass::operator()( ...@@ -57,7 +57,6 @@ void MemoryOptPass::operator()(
AppendBlockVars(block.get()); AppendBlockVars(block.get());
reused_nodes_.clear(); reused_nodes_.clear();
memoryDeputies_.clear();
// collect all not persistable variables, and accumulate // collect all not persistable variables, and accumulate
// it's reference count // it's reference count
std::stack<VarNode *> empty_var_nodes; std::stack<VarNode *> empty_var_nodes;
...@@ -157,33 +156,15 @@ void MemoryOptPass::operator()( ...@@ -157,33 +156,15 @@ void MemoryOptPass::operator()(
auto *reuse_tensor = auto *reuse_tensor =
reused_var->template GetMutable<framework::LoDTensor>(); reused_var->template GetMutable<framework::LoDTensor>();
reuse_tensor->mutable_data<float>(); reuse_tensor->mutable_data<float>();
framework::Variable *deputyVar;
int64_t varSize = 0;
for (const auto &node : list) { for (const auto &node : list) {
DLOG << node->name; DLOG << node->name;
auto *var = scope->Var(node->name); auto *var = scope->Var(node->name);
auto *tensor = var->template GetMutable<framework::LoDTensor>(); auto *tensor = var->template GetMutable<framework::LoDTensor>();
tensor->ShareHolderWith(*reuse_tensor); tensor->ShareHolderWith(*reuse_tensor);
if (tensor->numel() > varSize) {
varSize = tensor->numel();
deputyVar = var;
}
}
if (deputyVar) {
memoryDeputies_.push_back(deputyVar);
} }
} }
} }
} }
void MemoryOptPass::AdjustMemory() {
for (auto &deputy : memoryDeputies_) {
if (deputy->IsType<framework::LoDTensor>()) {
auto *tensor = deputy->template GetMutable<framework::LoDTensor>();
tensor->mutable_data_new();
}
}
}
} // namespace pass } // namespace pass
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -51,14 +51,11 @@ class MemoryOptPass : public PassBase { ...@@ -51,14 +51,11 @@ class MemoryOptPass : public PassBase {
VarNode *CreateNode(const std::string name); VarNode *CreateNode(const std::string name);
void AdjustMemory();
private: private:
std::stack<VarNode *> analysis_nodes_; std::stack<VarNode *> analysis_nodes_;
std::vector<std::vector<VarNode *>> reused_nodes_; std::vector<std::vector<VarNode *>> reused_nodes_;
std::unordered_map<std::string, VarNode *> created_nodes_; std::unordered_map<std::string, VarNode *> created_nodes_;
std::unordered_map<std::string, framework::VarDesc *> block_vars_; std::unordered_map<std::string, framework::VarDesc *> block_vars_;
std::vector<framework::Variable *> memoryDeputies_;
}; };
} // namespace pass } // namespace pass
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册