提交 4794d9cf 编写于 作者: X Xin Pan

use fast RunPrepareContext for inference

上级 515a756a
......@@ -21,6 +21,7 @@ limitations under the License. */
#include <utility>
#include <vector>
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/platform/profiler.h"
......@@ -57,6 +58,27 @@ std::string num2str(T a) {
}
} // namespace
void NativePaddlePredictor::PrepareFeedFetch() {
for (auto *op : inference_program_->Block(0).AllOps()) {
if (op->Type() == "feed") {
int idx = boost::get<int>(op->GetAttr("col"));
if (feeds_.size() <= idx) {
feeds_.resize(idx + 1);
}
feeds_[idx] = op;
feed_names_[op->Output("Out")[0]] = idx;
LOG(ERROR) << "feed " << idx << " " << op->Output("Out")[0];
} else if (op->Type() == "fetch") {
int idx = boost::get<int>(op->GetAttr("col"));
if (fetchs_.size() <= idx) {
fetchs_.resize(idx + 1);
}
fetchs_[idx] = op;
LOG(ERROR) << "fetch " << idx << " " << op->Input("X")[0];
}
}
}
bool NativePaddlePredictor::Init(
std::shared_ptr<framework::Scope> parent_scope) {
VLOG(3) << "Predictor::init()";
......@@ -108,8 +130,7 @@ bool NativePaddlePredictor::Init(
sub_scope_ ? sub_scope_ : scope_.get(), 0);
// Get the feed_target_names and fetch_target_names
feed_target_names_ = inference_program_->GetFeedTargetNames();
fetch_target_names_ = inference_program_->GetFetchTargetNames();
PrepareFeedFetch();
return true;
}
......@@ -130,36 +151,21 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
Timer timer;
timer.tic();
// set feed variable
std::map<std::string, const framework::LoDTensor *> feed_targets;
std::vector<framework::LoDTensor> feeds;
if (!SetFeed(inputs, &feeds)) {
framework::Scope *scope = sub_scope_ != nullptr ? sub_scope_ : scope_.get();
if (!SetFeed(inputs, scope)) {
LOG(ERROR) << "fail to set feed";
return false;
}
for (size_t i = 0; i < feed_target_names_.size(); ++i) {
if (config_.specify_input_name) {
feed_targets[inputs[i].name] = &feeds[i];
} else {
feed_targets[feed_target_names_[i]] = &feeds[i];
}
}
// get fetch variable
std::map<std::string, framework::LoDTensor *> fetch_targets;
std::vector<framework::LoDTensor> fetchs;
fetchs.resize(fetch_target_names_.size());
for (size_t i = 0; i < fetch_target_names_.size(); ++i) {
fetch_targets[fetch_target_names_[i]] = &fetchs[i];
}
// Run the inference program
// if share variables, we need not create variables
VLOG(4) << "Run prepared context";
executor_->RunPreparedContext(
ctx_.get(), sub_scope_ != nullptr ? sub_scope_ : scope_.get(),
&feed_targets, &fetch_targets,
false, /* don't create local scope each time*/
false /* don't create variable eatch time */);
executor_->RunPreparedContext(ctx_.get(), scope,
false, /* don't create local scope each time*/
false /* don't create variable eatch time */);
VLOG(4) << "Finish prepared context";
if (!GetFetch(fetchs, output_data)) {
// get fetch variable
if (!GetFetch(output_data, scope)) {
LOG(ERROR) << "fail to get fetches";
return false;
}
......@@ -180,13 +186,13 @@ std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
}
bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
std::vector<framework::LoDTensor> *feeds) {
framework::Scope *scope) {
VLOG(3) << "Predictor::set_feed";
if (inputs.size() != feed_target_names_.size()) {
if (inputs.size() != feeds_.size()) {
LOG(ERROR) << "wrong feed input size.";
return false;
}
for (size_t i = 0; i < feed_target_names_.size(); ++i) {
for (size_t i = 0; i < inputs.size(); ++i) {
framework::LoDTensor input;
framework::DDim ddim = framework::make_ddim(inputs[i].shape);
void *input_ptr;
......@@ -208,29 +214,40 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
lod.emplace_back(level);
}
input.set_lod(lod);
feeds->push_back(input);
int idx = -1;
if (config_.specify_input_name) {
idx =
boost::get<int>(feeds_[feed_names_[inputs[i].name]]->GetAttr("col"));
} else {
idx = boost::get<int>(feeds_[i]->GetAttr("col"));
}
framework::SetFeedVariable(scope, input, "feed", idx);
}
return true;
}
bool NativePaddlePredictor::GetFetch(
const std::vector<framework::LoDTensor> &fetchs,
std::vector<PaddleTensor> *outputs) {
bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
framework::Scope *scope) {
VLOG(3) << "Predictor::get_fetch";
outputs->resize(fetchs.size());
for (size_t i = 0; i < fetchs.size(); ++i) {
outputs->resize(fetchs_.size());
for (size_t i = 0; i < fetchs_.size(); ++i) {
std::string fetch_target_name = fetchs_[i]->Input("X")[0];
int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
PADDLE_ENFORCE(idx == i);
framework::LoDTensor &output =
framework::GetFetchVariable(*scope, "fetch", idx);
// TODO(panyx0718): Support fetch of other types.
if (fetchs[i].type() != typeid(float)) {
if (output.type() != typeid(float)) {
LOG(ERROR) << "only support fetching float now.";
return false;
}
std::vector<int> shape;
auto dims_i = fetchs[i].dims();
auto lod = fetchs[i].lod();
const float *output_ptr = fetchs[i].data<float>();
auto dims_i = output.dims();
auto lod = output.lod();
const float *output_ptr = output.data<float>();
// const int64_t* output_ptr = fetchs[i].data<int64_t>();
auto num = fetchs[i].numel();
auto num = output.numel();
std::vector<float> data;
if (0 == lod.size()) {
std::copy(output_ptr, output_ptr + num, std::back_inserter(data));
......@@ -275,7 +292,7 @@ bool NativePaddlePredictor::GetFetch(
}
std::memcpy(buffer.data(), data.data(), buffer.length());
// copy LoD
for (const auto &level : fetchs[i].lod()) {
for (const auto &level : output.lod()) {
outputs->at(i).lod.emplace_back(level);
}
outputs->at(i).dtype = PaddleDType::FLOAT32;
......
......@@ -15,6 +15,7 @@
#pragma once
#include <glog/logging.h>
#include <map>
#include <memory>
#include <string>
#include <vector>
......@@ -47,9 +48,11 @@ class NativePaddlePredictor : public PaddlePredictor {
protected:
bool SetFeed(const std::vector<PaddleTensor> &input_datas,
std::vector<framework::LoDTensor> *feeds);
bool GetFetch(const std::vector<framework::LoDTensor> &fetchs,
std::vector<PaddleTensor> *output_data);
framework::Scope *scope);
bool GetFetch(std::vector<PaddleTensor> *output_data,
framework::Scope *scope);
void PrepareFeedFetch();
NativeConfig config_;
platform::Place place_;
......@@ -57,8 +60,9 @@ class NativePaddlePredictor : public PaddlePredictor {
std::shared_ptr<framework::Scope> scope_;
std::unique_ptr<framework::ExecutorPrepareContext> ctx_;
std::unique_ptr<framework::ProgramDesc> inference_program_;
std::vector<std::string> feed_target_names_;
std::vector<std::string> fetch_target_names_;
std::vector<framework::OpDesc *> feeds_;
std::map<std::string, size_t> feed_names_;
std::vector<framework::OpDesc *> fetchs_;
// Do not use unique_ptr, use parent scope to delete
framework::Scope *sub_scope_{nullptr};
};
......
......@@ -74,10 +74,8 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
VLOG(5) << "to create variables";
executor_->CreateVariables(*inference_program_,
sub_scope_ ? sub_scope_ : scope_.get(), 0);
// Get the feed_target_names and fetch_target_names
feed_target_names_ = inference_program_->GetFeedTargetNames();
fetch_target_names_ = inference_program_->GetFetchTargetNames();
PrepareFeedFetch();
return true;
}
......
......@@ -21,6 +21,8 @@ limitations under the License. */
#include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
DEFINE_string(model_path, "", "Directory of the inference model.");
DEFINE_string(data_file, "", "File of input index data.");
DEFINE_int32(repeat, 100, "Running the inference program repeat times");
......@@ -124,14 +126,35 @@ void ThreadRunInfer(
std::map<std::string, const paddle::framework::LoDTensor*> feed_targets;
PADDLE_ENFORCE_EQ(feed_target_names.size(), 1UL);
// map the data of feed_targets to feed_holder
for (auto* op : inference_program->Block(0).AllOps()) {
if (op->Type() == "feed") {
std::string feed_target_name = op->Output("Out")[0];
int idx = boost::get<int>(op->GetAttr("col"));
paddle::framework::SetFeedVariable(scope, *feed_targets[feed_target_name],
"feed", idx);
}
}
auto& inputs = jobs[tid];
auto start_ms = GetCurrentMs();
for (size_t i = 0; i < inputs.size(); ++i) {
feed_targets[feed_target_names[0]] = inputs[i];
executor.RunPreparedContext(ctx.get(), &sub_scope, &feed_targets,
&fetch_targets, false /*create_local_scope*/);
executor.RunPreparedContext(ctx.get(), &sub_scope,
false /*create_local_scope*/);
}
auto stop_ms = GetCurrentMs();
// obtain the data of fetch_targets from fetch_holder
for (auto* op : inference_program->Block(0).AllOps()) {
if (op->Type() == "fetch") {
std::string fetch_target_name = op->Input("X")[0];
int idx = boost::get<int>(op->GetAttr("col"));
*fetch_targets[fetch_target_name] =
paddle::framework::GetFetchVariable(*scope, "fetch", idx);
}
}
scope->DeleteScope(&sub_scope);
LOG(INFO) << "Tid: " << tid << ", process " << inputs.size()
<< " samples, avg time per sample: "
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册