From 5724eab23a4684e79f8d40eacb8a9ccf4e1920a8 Mon Sep 17 00:00:00 2001 From: Yanzhan Yang Date: Mon, 1 Jul 2019 10:36:40 +0800 Subject: [PATCH] support creating tensor with raw pointer. (#1714) * support creating tensor with raw pointer. * fix style * fix fpga compilation error --- src/common/types.h | 8 ++++- src/framework/executor.cpp | 5 +-- src/framework/tensor.h | 23 ++++++++++++++ src/pass/memory_optimize.cpp | 33 +++++++++++++++++--- src/pass/memory_optimize.h | 3 +- test/net/test_net.cpp | 56 +++++++++++++++++++++++----------- tools/python/fluidtools/run.py | 2 ++ 7 files changed, 103 insertions(+), 27 deletions(-) diff --git a/src/common/types.h b/src/common/types.h index 4d9f9b3e31..bfffe91575 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -132,9 +132,15 @@ enum PowerMode { AUTO = 4, // scheduled by system }; +enum MemoryOptimizationLevel { + NoMemoryOptimization = 0, + MemoryOptimizationWithoutFeeds = 1, + FullMemoryOptimization = 2, +}; + struct PaddleMobileConfigInternal { bool load_when_predict = false; - bool enable_memory_optimization = true; + MemoryOptimizationLevel memory_optimization_level = FullMemoryOptimization; }; extern const char *G_OP_TYPE_CONV; diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp index 47ee22f323..296f8215b3 100644 --- a/src/framework/executor.cpp +++ b/src/framework/executor.cpp @@ -65,8 +65,9 @@ Executor::Executor(const Program &program, "program_desc_ should not be nullptr"); #if !defined(PADDLE_MOBILE_FPGA) && !defined(PADDLE_MOBILE_FPGA_KD) && \ !defined(PADDLE_MOBILE_CL) - if (config_.enable_memory_optimization) { - pass::MemoryOptPass()(program_desc_.get(), program_.scope.get()); + if (config_.memory_optimization_level != NoMemoryOptimization) { + pass::MemoryOptPass()(program_desc_.get(), program_.scope.get(), + config_.memory_optimization_level); } #endif // resize feed and fetch list diff --git a/src/framework/tensor.h b/src/framework/tensor.h index a59a252c95..5008a50bcc 100755 --- a/src/framework/tensor.h +++ b/src/framework/tensor.h @@ -57,6 +57,20 @@ class Tensor : public TensorBase { } } + template + Tensor(T *input, DDim ddim) { + // PADDLE_MOBILE_ENFORCE( + // (sizeof(input) / sizeof(input[0])) == framework::product(ddim), + // "input vector'length should be equal to tensor's length"); + + Resize(ddim); + auto type = type_id().hash_code(); + int64_t size = numel() * SizeOfType(type); + holder_.reset(new PlaceholderImpl(size, type, (uint8_t *)input)); + holder_->set_type(type); + offset_ = 0; + } + Tensor(const Tensor &inTensor) { this->dims_ = inTensor.dims_; this->holder_ = inTensor.holder_; @@ -203,6 +217,15 @@ class Tensor : public TensorBase { "Insufficient memory to allocation"); } + PlaceholderImpl(size_t size, const kTypeId_t type, uint8_t *ptr) + : ptr_(ptr, memory::PODDeleter()), + size_(size), + capatity_(size), + type_(type) { + PADDLE_MOBILE_ENFORCE(ptr_ != nullptr, + "Insufficient memory to allocation"); + } + virtual size_t size() const { return size_; } virtual void *ptr() const { return static_cast(ptr_.get()); } diff --git a/src/pass/memory_optimize.cpp b/src/pass/memory_optimize.cpp index 96a610151c..3de5021bc9 100644 --- a/src/pass/memory_optimize.cpp +++ b/src/pass/memory_optimize.cpp @@ -14,6 +14,7 @@ limitations under the License. */ #include "pass/memory_optimize.h" #include "framework/lod_tensor.h" +#include namespace paddle_mobile { namespace pass { @@ -47,8 +48,9 @@ VarNode *MemoryOptPass::CreateNode(const std::string name) { return var; } -void MemoryOptPass::operator()(const framework::ProgramDesc *program, - framework::Scope *scope) { +void MemoryOptPass::operator()( + const framework::ProgramDesc *program, framework::Scope *scope, + MemoryOptimizationLevel memory_optimization_level) { const auto &blocks = program->Blocks(); for (const auto &block : blocks) { // access all variables in each block @@ -60,12 +62,29 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program, std::stack empty_var_nodes; analysis_nodes_.swap(empty_var_nodes); + std::vector exclude_var_names; + for (const auto &op : block->Ops()) { + for (const auto &inputs : op->GetInputs()) { + for (const auto &input : inputs.second) { + if (!IsPersistable(input)) { + if (memory_optimization_level == MemoryOptimizationWithoutFeeds) { + if (op->Type() == "feed") { + exclude_var_names.push_back(input); + } + } + } + } + } + } + std::vector fetch_var_nodes; for (const auto &op : block->Ops()) { DLOG << "op_desc->Type(): " << op->Type(); for (const auto &outputs : op->GetOutputs()) { for (const auto &output : outputs.second) { - if (!IsPersistable(output)) { + if (!IsPersistable(output) && + std::find(exclude_var_names.begin(), exclude_var_names.end(), + output) == exclude_var_names.end()) { DLOG << "output: " << output; VarNode *node = CreateNode(output); analysis_nodes_.push(node); @@ -74,7 +93,9 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program, } for (const auto &inputs : op->GetInputs()) { for (const auto &input : inputs.second) { - if (!IsPersistable(input)) { + if (!IsPersistable(input) && + std::find(exclude_var_names.begin(), exclude_var_names.end(), + input) == exclude_var_names.end()) { DLOG << "input: " << input; VarNode *node = CreateNode(input); analysis_nodes_.push(node); @@ -86,7 +107,9 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program, } for (const auto &outputs : op->GetOutputs()) { for (const auto &output : outputs.second) { - if (!IsPersistable(output)) { + if (!IsPersistable(output) && + std::find(exclude_var_names.begin(), exclude_var_names.end(), + output) == exclude_var_names.end()) { DLOG << "output: " << output; VarNode *node = CreateNode(output); analysis_nodes_.push(node); diff --git a/src/pass/memory_optimize.h b/src/pass/memory_optimize.h index 116100af0b..991419dc33 100644 --- a/src/pass/memory_optimize.h +++ b/src/pass/memory_optimize.h @@ -47,7 +47,8 @@ class MemoryOptPass : public PassBase { } void operator()(const framework::ProgramDesc *program, - framework::Scope *scope); + framework::Scope *scope, + MemoryOptimizationLevel memory_optimization_level); void AppendBlockVars(const framework::BlockDesc *block); diff --git a/test/net/test_net.cpp b/test/net/test_net.cpp index be7c21db7b..59a0e63d6e 100644 --- a/test/net/test_net.cpp +++ b/test/net/test_net.cpp @@ -31,7 +31,9 @@ void test(int argc, char *argv[]) { bool enable_memory_optimization = std::stoi(argv[arg_index]) == 1; arg_index++; paddle_mobile::PaddleMobileConfigInternal config; - config.enable_memory_optimization = enable_memory_optimization; + config.memory_optimization_level = enable_memory_optimization + ? MemoryOptimizationWithoutFeeds + : NoMemoryOptimization; paddle_mobile::PaddleMobile paddle_mobile(config); paddle_mobile.SetThreadNum(1); @@ -75,56 +77,74 @@ void test(int argc, char *argv[]) { fuse, false, 1, true)) { auto time2 = time(); std::cout << "auto-test" - << " load-time-cost :" << time_diff(time1, time1) << "ms" + << " load-time-cost :" << time_diff(time1, time2) << "ms" << std::endl; - std::vector input_data; + float input_data_array[size]; std::ifstream in("input.txt", std::ios::in); for (int i = 0; i < size; i++) { float num; in >> num; - input_data.push_back(num); + input_data_array[i] = num; } in.close(); - paddle_mobile::framework::LoDTensor input_tensor; + auto time3 = time(); + // std::vector input_data; + // for (int i = 0; i < size; i++) { + // float num = input_data_array[i]; + // input_data.push_back(num); + // } + // paddle_mobile::framework::Tensor input_tensor(input_data, + // paddle_mobile::framework::make_ddim(dims)); + paddle_mobile::framework::Tensor input_tensor( + input_data_array, paddle_mobile::framework::make_ddim(dims)); + auto time4 = time(); + std::cout << "auto-test" + << " preprocess-time-cost :" << time_diff(time3, time4) << "ms" + << std::endl; + + paddle_mobile::framework::LoDTensor input_lod_tensor; if (is_lod) { - input_tensor.Resize(paddle_mobile::framework::make_ddim(dims)); - input_tensor.set_lod(lod); - auto *tensor_data = input_tensor.mutable_data(); + input_lod_tensor.Resize(paddle_mobile::framework::make_ddim(dims)); + input_lod_tensor.set_lod(lod); + auto *tensor_data = input_lod_tensor.mutable_data(); for (int i = 0; i < size; i++) { - tensor_data[i] = input_data[i]; + tensor_data[i] = input_data_array[i]; } } // 预热10次 for (int i = 0; i < 10; i++) { if (is_lod) { - auto out = paddle_mobile.Predict(input_tensor); + auto out = paddle_mobile.Predict(input_lod_tensor); } else { - auto out = paddle_mobile.Predict(input_data, dims); + paddle_mobile.Feed(var_names[0], input_tensor); + paddle_mobile.Predict(); } } // 测速 - auto time3 = time(); + auto time5 = time(); for (int i = 0; i < 50; i++) { if (is_lod) { - auto out = paddle_mobile.Predict(input_tensor); + auto out = paddle_mobile.Predict(input_lod_tensor); } else { - auto out = paddle_mobile.Predict(input_data, dims); + paddle_mobile.Feed(var_names[0], input_tensor); + paddle_mobile.Predict(); } } - auto time4 = time(); + auto time6 = time(); std::cout << "auto-test" - << " predict-time-cost " << time_diff(time3, time4) / 50 << "ms" + << " predict-time-cost " << time_diff(time5, time6) / 50 << "ms" << std::endl; // 测试正确性 if (is_lod) { - auto out = paddle_mobile.Predict(input_tensor); + auto out = paddle_mobile.Predict(input_lod_tensor); } else { - auto out = paddle_mobile.Predict(input_data, dims); + paddle_mobile.Feed(var_names[0], input_tensor); + paddle_mobile.Predict(); } for (auto var_name : var_names) { auto out = paddle_mobile.Fetch(var_name); diff --git a/tools/python/fluidtools/run.py b/tools/python/fluidtools/run.py index 5ded7d2d25..291a7729a8 100644 --- a/tools/python/fluidtools/run.py +++ b/tools/python/fluidtools/run.py @@ -279,6 +279,8 @@ def check_mobile_results(args, fuse, mem_opt): pp_green("load time cost : {}".format(parts[2]), 1) elif parts[1] == "predict-time-cost": pp_green("predict time cost : {}".format(parts[2]), 1) + elif parts[1] == "preprocess-time-cost": + pp_green("preprocess time cost : {}".format(parts[2]), 1) elif parts[1] == "var": var_name = parts[2] values = list(map(lambda x: float(x), parts[3:])) -- GitLab