提交 84409bd8 编写于 作者: Y Yanzhan Yang 提交者: GitHub

support creating tensor with raw pointer. (#1714)

* support creating tensor with raw pointer.

* fix style

* fix fpga compilation error
上级 2292f6ef
...@@ -132,9 +132,15 @@ enum PowerMode { ...@@ -132,9 +132,15 @@ enum PowerMode {
AUTO = 4, // scheduled by system AUTO = 4, // scheduled by system
}; };
enum MemoryOptimizationLevel {
NoMemoryOptimization = 0,
MemoryOptimizationWithoutFeeds = 1,
FullMemoryOptimization = 2,
};
struct PaddleMobileConfigInternal { struct PaddleMobileConfigInternal {
bool load_when_predict = false; bool load_when_predict = false;
bool enable_memory_optimization = true; MemoryOptimizationLevel memory_optimization_level = FullMemoryOptimization;
}; };
extern const char *G_OP_TYPE_CONV; extern const char *G_OP_TYPE_CONV;
......
...@@ -65,8 +65,9 @@ Executor<Device, T>::Executor(const Program<Device> &program, ...@@ -65,8 +65,9 @@ Executor<Device, T>::Executor(const Program<Device> &program,
"program_desc_ should not be nullptr"); "program_desc_ should not be nullptr");
#if !defined(PADDLE_MOBILE_FPGA) && !defined(PADDLE_MOBILE_FPGA_KD) && \ #if !defined(PADDLE_MOBILE_FPGA) && !defined(PADDLE_MOBILE_FPGA_KD) && \
!defined(PADDLE_MOBILE_CL) !defined(PADDLE_MOBILE_CL)
if (config_.enable_memory_optimization) { if (config_.memory_optimization_level != NoMemoryOptimization) {
pass::MemoryOptPass()(program_desc_.get(), program_.scope.get()); pass::MemoryOptPass()(program_desc_.get(), program_.scope.get(),
config_.memory_optimization_level);
} }
#endif #endif
// resize feed and fetch list // resize feed and fetch list
......
...@@ -57,6 +57,20 @@ class Tensor : public TensorBase { ...@@ -57,6 +57,20 @@ class Tensor : public TensorBase {
} }
} }
template <typename T>
Tensor(T *input, DDim ddim) {
// PADDLE_MOBILE_ENFORCE(
// (sizeof(input) / sizeof(input[0])) == framework::product(ddim),
// "input vector'length should be equal to tensor's length");
Resize(ddim);
auto type = type_id<T>().hash_code();
int64_t size = numel() * SizeOfType(type);
holder_.reset(new PlaceholderImpl(size, type, (uint8_t *)input));
holder_->set_type(type);
offset_ = 0;
}
Tensor(const Tensor &inTensor) { Tensor(const Tensor &inTensor) {
this->dims_ = inTensor.dims_; this->dims_ = inTensor.dims_;
this->holder_ = inTensor.holder_; this->holder_ = inTensor.holder_;
...@@ -203,6 +217,15 @@ class Tensor : public TensorBase { ...@@ -203,6 +217,15 @@ class Tensor : public TensorBase {
"Insufficient memory to allocation"); "Insufficient memory to allocation");
} }
PlaceholderImpl(size_t size, const kTypeId_t type, uint8_t *ptr)
: ptr_(ptr, memory::PODDeleter<uint8_t>()),
size_(size),
capatity_(size),
type_(type) {
PADDLE_MOBILE_ENFORCE(ptr_ != nullptr,
"Insufficient memory to allocation");
}
virtual size_t size() const { return size_; } virtual size_t size() const { return size_; }
virtual void *ptr() const { return static_cast<void *>(ptr_.get()); } virtual void *ptr() const { return static_cast<void *>(ptr_.get()); }
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include "pass/memory_optimize.h" #include "pass/memory_optimize.h"
#include "framework/lod_tensor.h" #include "framework/lod_tensor.h"
#include <algorithm>
namespace paddle_mobile { namespace paddle_mobile {
namespace pass { namespace pass {
...@@ -47,8 +48,9 @@ VarNode *MemoryOptPass::CreateNode(const std::string name) { ...@@ -47,8 +48,9 @@ VarNode *MemoryOptPass::CreateNode(const std::string name) {
return var; return var;
} }
void MemoryOptPass::operator()(const framework::ProgramDesc *program, void MemoryOptPass::operator()(
framework::Scope *scope) { const framework::ProgramDesc *program, framework::Scope *scope,
MemoryOptimizationLevel memory_optimization_level) {
const auto &blocks = program->Blocks(); const auto &blocks = program->Blocks();
for (const auto &block : blocks) { for (const auto &block : blocks) {
// access all variables in each block // access all variables in each block
...@@ -60,12 +62,29 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program, ...@@ -60,12 +62,29 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
std::stack<VarNode *> empty_var_nodes; std::stack<VarNode *> empty_var_nodes;
analysis_nodes_.swap(empty_var_nodes); analysis_nodes_.swap(empty_var_nodes);
std::vector<std::string> exclude_var_names;
for (const auto &op : block->Ops()) {
for (const auto &inputs : op->GetInputs()) {
for (const auto &input : inputs.second) {
if (!IsPersistable(input)) {
if (memory_optimization_level == MemoryOptimizationWithoutFeeds) {
if (op->Type() == "feed") {
exclude_var_names.push_back(input);
}
}
}
}
}
}
std::vector<VarNode *> fetch_var_nodes; std::vector<VarNode *> fetch_var_nodes;
for (const auto &op : block->Ops()) { for (const auto &op : block->Ops()) {
DLOG << "op_desc->Type(): " << op->Type(); DLOG << "op_desc->Type(): " << op->Type();
for (const auto &outputs : op->GetOutputs()) { for (const auto &outputs : op->GetOutputs()) {
for (const auto &output : outputs.second) { for (const auto &output : outputs.second) {
if (!IsPersistable(output)) { if (!IsPersistable(output) &&
std::find(exclude_var_names.begin(), exclude_var_names.end(),
output) == exclude_var_names.end()) {
DLOG << "output: " << output; DLOG << "output: " << output;
VarNode *node = CreateNode(output); VarNode *node = CreateNode(output);
analysis_nodes_.push(node); analysis_nodes_.push(node);
...@@ -74,7 +93,9 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program, ...@@ -74,7 +93,9 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
} }
for (const auto &inputs : op->GetInputs()) { for (const auto &inputs : op->GetInputs()) {
for (const auto &input : inputs.second) { for (const auto &input : inputs.second) {
if (!IsPersistable(input)) { if (!IsPersistable(input) &&
std::find(exclude_var_names.begin(), exclude_var_names.end(),
input) == exclude_var_names.end()) {
DLOG << "input: " << input; DLOG << "input: " << input;
VarNode *node = CreateNode(input); VarNode *node = CreateNode(input);
analysis_nodes_.push(node); analysis_nodes_.push(node);
...@@ -86,7 +107,9 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program, ...@@ -86,7 +107,9 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
} }
for (const auto &outputs : op->GetOutputs()) { for (const auto &outputs : op->GetOutputs()) {
for (const auto &output : outputs.second) { for (const auto &output : outputs.second) {
if (!IsPersistable(output)) { if (!IsPersistable(output) &&
std::find(exclude_var_names.begin(), exclude_var_names.end(),
output) == exclude_var_names.end()) {
DLOG << "output: " << output; DLOG << "output: " << output;
VarNode *node = CreateNode(output); VarNode *node = CreateNode(output);
analysis_nodes_.push(node); analysis_nodes_.push(node);
......
...@@ -47,7 +47,8 @@ class MemoryOptPass : public PassBase { ...@@ -47,7 +47,8 @@ class MemoryOptPass : public PassBase {
} }
void operator()(const framework::ProgramDesc *program, void operator()(const framework::ProgramDesc *program,
framework::Scope *scope); framework::Scope *scope,
MemoryOptimizationLevel memory_optimization_level);
void AppendBlockVars(const framework::BlockDesc *block); void AppendBlockVars(const framework::BlockDesc *block);
......
...@@ -31,7 +31,9 @@ void test(int argc, char *argv[]) { ...@@ -31,7 +31,9 @@ void test(int argc, char *argv[]) {
bool enable_memory_optimization = std::stoi(argv[arg_index]) == 1; bool enable_memory_optimization = std::stoi(argv[arg_index]) == 1;
arg_index++; arg_index++;
paddle_mobile::PaddleMobileConfigInternal config; paddle_mobile::PaddleMobileConfigInternal config;
config.enable_memory_optimization = enable_memory_optimization; config.memory_optimization_level = enable_memory_optimization
? MemoryOptimizationWithoutFeeds
: NoMemoryOptimization;
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile(config); paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile(config);
paddle_mobile.SetThreadNum(1); paddle_mobile.SetThreadNum(1);
...@@ -75,56 +77,74 @@ void test(int argc, char *argv[]) { ...@@ -75,56 +77,74 @@ void test(int argc, char *argv[]) {
fuse, false, 1, true)) { fuse, false, 1, true)) {
auto time2 = time(); auto time2 = time();
std::cout << "auto-test" std::cout << "auto-test"
<< " load-time-cost :" << time_diff(time1, time1) << "ms" << " load-time-cost :" << time_diff(time1, time2) << "ms"
<< std::endl; << std::endl;
std::vector<float> input_data; float input_data_array[size];
std::ifstream in("input.txt", std::ios::in); std::ifstream in("input.txt", std::ios::in);
for (int i = 0; i < size; i++) { for (int i = 0; i < size; i++) {
float num; float num;
in >> num; in >> num;
input_data.push_back(num); input_data_array[i] = num;
} }
in.close(); in.close();
paddle_mobile::framework::LoDTensor input_tensor; auto time3 = time();
// std::vector<float> input_data;
// for (int i = 0; i < size; i++) {
// float num = input_data_array[i];
// input_data.push_back(num);
// }
// paddle_mobile::framework::Tensor input_tensor(input_data,
// paddle_mobile::framework::make_ddim(dims));
paddle_mobile::framework::Tensor input_tensor(
input_data_array, paddle_mobile::framework::make_ddim(dims));
auto time4 = time();
std::cout << "auto-test"
<< " preprocess-time-cost :" << time_diff(time3, time4) << "ms"
<< std::endl;
paddle_mobile::framework::LoDTensor input_lod_tensor;
if (is_lod) { if (is_lod) {
input_tensor.Resize(paddle_mobile::framework::make_ddim(dims)); input_lod_tensor.Resize(paddle_mobile::framework::make_ddim(dims));
input_tensor.set_lod(lod); input_lod_tensor.set_lod(lod);
auto *tensor_data = input_tensor.mutable_data<float>(); auto *tensor_data = input_lod_tensor.mutable_data<float>();
for (int i = 0; i < size; i++) { for (int i = 0; i < size; i++) {
tensor_data[i] = input_data[i]; tensor_data[i] = input_data_array[i];
} }
} }
// 预热10次 // 预热10次
for (int i = 0; i < 10; i++) { for (int i = 0; i < 10; i++) {
if (is_lod) { if (is_lod) {
auto out = paddle_mobile.Predict(input_tensor); auto out = paddle_mobile.Predict(input_lod_tensor);
} else { } else {
auto out = paddle_mobile.Predict(input_data, dims); paddle_mobile.Feed(var_names[0], input_tensor);
paddle_mobile.Predict();
} }
} }
// 测速 // 测速
auto time3 = time(); auto time5 = time();
for (int i = 0; i < 50; i++) { for (int i = 0; i < 50; i++) {
if (is_lod) { if (is_lod) {
auto out = paddle_mobile.Predict(input_tensor); auto out = paddle_mobile.Predict(input_lod_tensor);
} else { } else {
auto out = paddle_mobile.Predict(input_data, dims); paddle_mobile.Feed(var_names[0], input_tensor);
paddle_mobile.Predict();
} }
} }
auto time4 = time(); auto time6 = time();
std::cout << "auto-test" std::cout << "auto-test"
<< " predict-time-cost " << time_diff(time3, time4) / 50 << "ms" << " predict-time-cost " << time_diff(time5, time6) / 50 << "ms"
<< std::endl; << std::endl;
// 测试正确性 // 测试正确性
if (is_lod) { if (is_lod) {
auto out = paddle_mobile.Predict(input_tensor); auto out = paddle_mobile.Predict(input_lod_tensor);
} else { } else {
auto out = paddle_mobile.Predict(input_data, dims); paddle_mobile.Feed(var_names[0], input_tensor);
paddle_mobile.Predict();
} }
for (auto var_name : var_names) { for (auto var_name : var_names) {
auto out = paddle_mobile.Fetch(var_name); auto out = paddle_mobile.Fetch(var_name);
......
...@@ -279,6 +279,8 @@ def check_mobile_results(args, fuse, mem_opt): ...@@ -279,6 +279,8 @@ def check_mobile_results(args, fuse, mem_opt):
pp_green("load time cost : {}".format(parts[2]), 1) pp_green("load time cost : {}".format(parts[2]), 1)
elif parts[1] == "predict-time-cost": elif parts[1] == "predict-time-cost":
pp_green("predict time cost : {}".format(parts[2]), 1) pp_green("predict time cost : {}".format(parts[2]), 1)
elif parts[1] == "preprocess-time-cost":
pp_green("preprocess time cost : {}".format(parts[2]), 1)
elif parts[1] == "var": elif parts[1] == "var":
var_name = parts[2] var_name = parts[2]
values = list(map(lambda x: float(x), parts[3:])) values = list(map(lambda x: float(x), parts[3:]))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册