提交 84409bd8 编写于 作者: Y Yanzhan Yang 提交者: GitHub

support creating tensor with raw pointer. (#1714)

* support creating tensor with raw pointer.

* fix style

* fix fpga compilation error
上级 2292f6ef
......@@ -132,9 +132,15 @@ enum PowerMode {
AUTO = 4, // scheduled by system
};
enum MemoryOptimizationLevel {
NoMemoryOptimization = 0,
MemoryOptimizationWithoutFeeds = 1,
FullMemoryOptimization = 2,
};
struct PaddleMobileConfigInternal {
bool load_when_predict = false;
bool enable_memory_optimization = true;
MemoryOptimizationLevel memory_optimization_level = FullMemoryOptimization;
};
extern const char *G_OP_TYPE_CONV;
......
......@@ -65,8 +65,9 @@ Executor<Device, T>::Executor(const Program<Device> &program,
"program_desc_ should not be nullptr");
#if !defined(PADDLE_MOBILE_FPGA) && !defined(PADDLE_MOBILE_FPGA_KD) && \
!defined(PADDLE_MOBILE_CL)
if (config_.enable_memory_optimization) {
pass::MemoryOptPass()(program_desc_.get(), program_.scope.get());
if (config_.memory_optimization_level != NoMemoryOptimization) {
pass::MemoryOptPass()(program_desc_.get(), program_.scope.get(),
config_.memory_optimization_level);
}
#endif
// resize feed and fetch list
......
......@@ -57,6 +57,20 @@ class Tensor : public TensorBase {
}
}
template <typename T>
Tensor(T *input, DDim ddim) {
// PADDLE_MOBILE_ENFORCE(
// (sizeof(input) / sizeof(input[0])) == framework::product(ddim),
// "input vector'length should be equal to tensor's length");
Resize(ddim);
auto type = type_id<T>().hash_code();
int64_t size = numel() * SizeOfType(type);
holder_.reset(new PlaceholderImpl(size, type, (uint8_t *)input));
holder_->set_type(type);
offset_ = 0;
}
Tensor(const Tensor &inTensor) {
this->dims_ = inTensor.dims_;
this->holder_ = inTensor.holder_;
......@@ -203,6 +217,15 @@ class Tensor : public TensorBase {
"Insufficient memory to allocation");
}
PlaceholderImpl(size_t size, const kTypeId_t type, uint8_t *ptr)
: ptr_(ptr, memory::PODDeleter<uint8_t>()),
size_(size),
capatity_(size),
type_(type) {
PADDLE_MOBILE_ENFORCE(ptr_ != nullptr,
"Insufficient memory to allocation");
}
virtual size_t size() const { return size_; }
virtual void *ptr() const { return static_cast<void *>(ptr_.get()); }
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#include "pass/memory_optimize.h"
#include "framework/lod_tensor.h"
#include <algorithm>
namespace paddle_mobile {
namespace pass {
......@@ -47,8 +48,9 @@ VarNode *MemoryOptPass::CreateNode(const std::string name) {
return var;
}
void MemoryOptPass::operator()(const framework::ProgramDesc *program,
framework::Scope *scope) {
void MemoryOptPass::operator()(
const framework::ProgramDesc *program, framework::Scope *scope,
MemoryOptimizationLevel memory_optimization_level) {
const auto &blocks = program->Blocks();
for (const auto &block : blocks) {
// access all variables in each block
......@@ -60,12 +62,29 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
std::stack<VarNode *> empty_var_nodes;
analysis_nodes_.swap(empty_var_nodes);
std::vector<std::string> exclude_var_names;
for (const auto &op : block->Ops()) {
for (const auto &inputs : op->GetInputs()) {
for (const auto &input : inputs.second) {
if (!IsPersistable(input)) {
if (memory_optimization_level == MemoryOptimizationWithoutFeeds) {
if (op->Type() == "feed") {
exclude_var_names.push_back(input);
}
}
}
}
}
}
std::vector<VarNode *> fetch_var_nodes;
for (const auto &op : block->Ops()) {
DLOG << "op_desc->Type(): " << op->Type();
for (const auto &outputs : op->GetOutputs()) {
for (const auto &output : outputs.second) {
if (!IsPersistable(output)) {
if (!IsPersistable(output) &&
std::find(exclude_var_names.begin(), exclude_var_names.end(),
output) == exclude_var_names.end()) {
DLOG << "output: " << output;
VarNode *node = CreateNode(output);
analysis_nodes_.push(node);
......@@ -74,7 +93,9 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
}
for (const auto &inputs : op->GetInputs()) {
for (const auto &input : inputs.second) {
if (!IsPersistable(input)) {
if (!IsPersistable(input) &&
std::find(exclude_var_names.begin(), exclude_var_names.end(),
input) == exclude_var_names.end()) {
DLOG << "input: " << input;
VarNode *node = CreateNode(input);
analysis_nodes_.push(node);
......@@ -86,7 +107,9 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
}
for (const auto &outputs : op->GetOutputs()) {
for (const auto &output : outputs.second) {
if (!IsPersistable(output)) {
if (!IsPersistable(output) &&
std::find(exclude_var_names.begin(), exclude_var_names.end(),
output) == exclude_var_names.end()) {
DLOG << "output: " << output;
VarNode *node = CreateNode(output);
analysis_nodes_.push(node);
......
......@@ -47,7 +47,8 @@ class MemoryOptPass : public PassBase {
}
void operator()(const framework::ProgramDesc *program,
framework::Scope *scope);
framework::Scope *scope,
MemoryOptimizationLevel memory_optimization_level);
void AppendBlockVars(const framework::BlockDesc *block);
......
......@@ -31,7 +31,9 @@ void test(int argc, char *argv[]) {
bool enable_memory_optimization = std::stoi(argv[arg_index]) == 1;
arg_index++;
paddle_mobile::PaddleMobileConfigInternal config;
config.enable_memory_optimization = enable_memory_optimization;
config.memory_optimization_level = enable_memory_optimization
? MemoryOptimizationWithoutFeeds
: NoMemoryOptimization;
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile(config);
paddle_mobile.SetThreadNum(1);
......@@ -75,56 +77,74 @@ void test(int argc, char *argv[]) {
fuse, false, 1, true)) {
auto time2 = time();
std::cout << "auto-test"
<< " load-time-cost :" << time_diff(time1, time1) << "ms"
<< " load-time-cost :" << time_diff(time1, time2) << "ms"
<< std::endl;
std::vector<float> input_data;
float input_data_array[size];
std::ifstream in("input.txt", std::ios::in);
for (int i = 0; i < size; i++) {
float num;
in >> num;
input_data.push_back(num);
input_data_array[i] = num;
}
in.close();
paddle_mobile::framework::LoDTensor input_tensor;
auto time3 = time();
// std::vector<float> input_data;
// for (int i = 0; i < size; i++) {
// float num = input_data_array[i];
// input_data.push_back(num);
// }
// paddle_mobile::framework::Tensor input_tensor(input_data,
// paddle_mobile::framework::make_ddim(dims));
paddle_mobile::framework::Tensor input_tensor(
input_data_array, paddle_mobile::framework::make_ddim(dims));
auto time4 = time();
std::cout << "auto-test"
<< " preprocess-time-cost :" << time_diff(time3, time4) << "ms"
<< std::endl;
paddle_mobile::framework::LoDTensor input_lod_tensor;
if (is_lod) {
input_tensor.Resize(paddle_mobile::framework::make_ddim(dims));
input_tensor.set_lod(lod);
auto *tensor_data = input_tensor.mutable_data<float>();
input_lod_tensor.Resize(paddle_mobile::framework::make_ddim(dims));
input_lod_tensor.set_lod(lod);
auto *tensor_data = input_lod_tensor.mutable_data<float>();
for (int i = 0; i < size; i++) {
tensor_data[i] = input_data[i];
tensor_data[i] = input_data_array[i];
}
}
// 预热10次
for (int i = 0; i < 10; i++) {
if (is_lod) {
auto out = paddle_mobile.Predict(input_tensor);
auto out = paddle_mobile.Predict(input_lod_tensor);
} else {
auto out = paddle_mobile.Predict(input_data, dims);
paddle_mobile.Feed(var_names[0], input_tensor);
paddle_mobile.Predict();
}
}
// 测速
auto time3 = time();
auto time5 = time();
for (int i = 0; i < 50; i++) {
if (is_lod) {
auto out = paddle_mobile.Predict(input_tensor);
auto out = paddle_mobile.Predict(input_lod_tensor);
} else {
auto out = paddle_mobile.Predict(input_data, dims);
paddle_mobile.Feed(var_names[0], input_tensor);
paddle_mobile.Predict();
}
}
auto time4 = time();
auto time6 = time();
std::cout << "auto-test"
<< " predict-time-cost " << time_diff(time3, time4) / 50 << "ms"
<< " predict-time-cost " << time_diff(time5, time6) / 50 << "ms"
<< std::endl;
// 测试正确性
if (is_lod) {
auto out = paddle_mobile.Predict(input_tensor);
auto out = paddle_mobile.Predict(input_lod_tensor);
} else {
auto out = paddle_mobile.Predict(input_data, dims);
paddle_mobile.Feed(var_names[0], input_tensor);
paddle_mobile.Predict();
}
for (auto var_name : var_names) {
auto out = paddle_mobile.Fetch(var_name);
......
......@@ -279,6 +279,8 @@ def check_mobile_results(args, fuse, mem_opt):
pp_green("load time cost : {}".format(parts[2]), 1)
elif parts[1] == "predict-time-cost":
pp_green("predict time cost : {}".format(parts[2]), 1)
elif parts[1] == "preprocess-time-cost":
pp_green("preprocess time cost : {}".format(parts[2]), 1)
elif parts[1] == "var":
var_name = parts[2]
values = list(map(lambda x: float(x), parts[3:]))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册