未验证 提交 823c4f87 编写于 作者: X Xin Pan 提交者: GitHub

Merge pull request #13058 from panyx0718/infer

use fast RunPrepareContext for inference
...@@ -60,6 +60,7 @@ class Executor { ...@@ -60,6 +60,7 @@ class Executor {
void Run(const ProgramDesc& prog, Scope* scope, int block_id, void Run(const ProgramDesc& prog, Scope* scope, int block_id,
bool create_local_scope = true, bool create_vars = true); bool create_local_scope = true, bool create_vars = true);
// This API is very slow.
void Run(const ProgramDesc& program, Scope* scope, void Run(const ProgramDesc& program, Scope* scope,
std::map<std::string, const LoDTensor*>* feed_targets, std::map<std::string, const LoDTensor*>* feed_targets,
std::map<std::string, LoDTensor*>* fetch_targets, std::map<std::string, LoDTensor*>* fetch_targets,
...@@ -79,6 +80,7 @@ class Executor { ...@@ -79,6 +80,7 @@ class Executor {
bool create_local_scope = true, bool create_local_scope = true,
bool create_vars = true, bool keep_kids = false); bool create_vars = true, bool keep_kids = false);
// This API is very slow.
void RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, void RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
std::map<std::string, const LoDTensor*>* feed_targets, std::map<std::string, const LoDTensor*>* feed_targets,
std::map<std::string, LoDTensor*>* fetch_targets, std::map<std::string, LoDTensor*>* fetch_targets,
......
...@@ -80,8 +80,7 @@ class AnalysisPredictor : public NativePaddlePredictor { ...@@ -80,8 +80,7 @@ class AnalysisPredictor : public NativePaddlePredictor {
sub_scope_ ? sub_scope_ : scope_.get(), 0); sub_scope_ ? sub_scope_ : scope_.get(), 0);
// Get the feed_target_names and fetch_target_names // Get the feed_target_names and fetch_target_names
feed_target_names_ = inference_program_->GetFeedTargetNames(); PrepareFeedFetch();
fetch_target_names_ = inference_program_->GetFetchTargetNames();
return true; return true;
} }
......
...@@ -21,6 +21,7 @@ limitations under the License. */ ...@@ -21,6 +21,7 @@ limitations under the License. */
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/inference/api/api_impl.h" #include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
...@@ -57,6 +58,25 @@ std::string num2str(T a) { ...@@ -57,6 +58,25 @@ std::string num2str(T a) {
} }
} // namespace } // namespace
void NativePaddlePredictor::PrepareFeedFetch() {
for (auto *op : inference_program_->Block(0).AllOps()) {
if (op->Type() == "feed") {
int idx = boost::get<int>(op->GetAttr("col"));
if (feeds_.size() <= idx) {
feeds_.resize(idx + 1);
}
feeds_[idx] = op;
feed_names_[op->Output("Out")[0]] = idx;
} else if (op->Type() == "fetch") {
int idx = boost::get<int>(op->GetAttr("col"));
if (fetchs_.size() <= idx) {
fetchs_.resize(idx + 1);
}
fetchs_[idx] = op;
}
}
}
bool NativePaddlePredictor::Init( bool NativePaddlePredictor::Init(
std::shared_ptr<framework::Scope> parent_scope) { std::shared_ptr<framework::Scope> parent_scope) {
VLOG(3) << "Predictor::init()"; VLOG(3) << "Predictor::init()";
...@@ -108,8 +128,7 @@ bool NativePaddlePredictor::Init( ...@@ -108,8 +128,7 @@ bool NativePaddlePredictor::Init(
sub_scope_ ? sub_scope_ : scope_.get(), 0); sub_scope_ ? sub_scope_ : scope_.get(), 0);
// Get the feed_target_names and fetch_target_names // Get the feed_target_names and fetch_target_names
feed_target_names_ = inference_program_->GetFeedTargetNames(); PrepareFeedFetch();
fetch_target_names_ = inference_program_->GetFetchTargetNames();
return true; return true;
} }
...@@ -130,36 +149,21 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs, ...@@ -130,36 +149,21 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
Timer timer; Timer timer;
timer.tic(); timer.tic();
// set feed variable // set feed variable
std::map<std::string, const framework::LoDTensor *> feed_targets;
std::vector<framework::LoDTensor> feeds; std::vector<framework::LoDTensor> feeds;
if (!SetFeed(inputs, &feeds)) { framework::Scope *scope = sub_scope_ != nullptr ? sub_scope_ : scope_.get();
if (!SetFeed(inputs, scope)) {
LOG(ERROR) << "fail to set feed"; LOG(ERROR) << "fail to set feed";
return false; return false;
} }
for (size_t i = 0; i < feed_target_names_.size(); ++i) {
if (config_.specify_input_name) {
feed_targets[inputs[i].name] = &feeds[i];
} else {
feed_targets[feed_target_names_[i]] = &feeds[i];
}
}
// get fetch variable
std::map<std::string, framework::LoDTensor *> fetch_targets;
std::vector<framework::LoDTensor> fetchs;
fetchs.resize(fetch_target_names_.size());
for (size_t i = 0; i < fetch_target_names_.size(); ++i) {
fetch_targets[fetch_target_names_[i]] = &fetchs[i];
}
// Run the inference program // Run the inference program
// if share variables, we need not create variables // if share variables, we need not create variables
VLOG(4) << "Run prepared context"; VLOG(4) << "Run prepared context";
executor_->RunPreparedContext( executor_->RunPreparedContext(ctx_.get(), scope,
ctx_.get(), sub_scope_ != nullptr ? sub_scope_ : scope_.get(), false, /* don't create local scope each time*/
&feed_targets, &fetch_targets, false /* don't create variable eatch time */);
false, /* don't create local scope each time*/
false /* don't create variable eatch time */);
VLOG(4) << "Finish prepared context"; VLOG(4) << "Finish prepared context";
if (!GetFetch(fetchs, output_data)) { // get fetch variable
if (!GetFetch(output_data, scope)) {
LOG(ERROR) << "fail to get fetches"; LOG(ERROR) << "fail to get fetches";
return false; return false;
} }
...@@ -180,13 +184,13 @@ std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() { ...@@ -180,13 +184,13 @@ std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
} }
bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs, bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
std::vector<framework::LoDTensor> *feeds) { framework::Scope *scope) {
VLOG(3) << "Predictor::set_feed"; VLOG(3) << "Predictor::set_feed";
if (inputs.size() != feed_target_names_.size()) { if (inputs.size() != feeds_.size()) {
LOG(ERROR) << "wrong feed input size."; LOG(ERROR) << "wrong feed input size.";
return false; return false;
} }
for (size_t i = 0; i < feed_target_names_.size(); ++i) { for (size_t i = 0; i < inputs.size(); ++i) {
framework::LoDTensor input; framework::LoDTensor input;
framework::DDim ddim = framework::make_ddim(inputs[i].shape); framework::DDim ddim = framework::make_ddim(inputs[i].shape);
void *input_ptr; void *input_ptr;
...@@ -208,29 +212,38 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs, ...@@ -208,29 +212,38 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
lod.emplace_back(level); lod.emplace_back(level);
} }
input.set_lod(lod); input.set_lod(lod);
int idx = -1;
feeds->push_back(input); if (config_.specify_input_name) {
idx = feed_names_[inputs[i].name];
} else {
idx = boost::get<int>(feeds_[i]->GetAttr("col"));
}
framework::SetFeedVariable(scope, input, "feed", idx);
} }
return true; return true;
} }
bool NativePaddlePredictor::GetFetch( bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
const std::vector<framework::LoDTensor> &fetchs, framework::Scope *scope) {
std::vector<PaddleTensor> *outputs) {
VLOG(3) << "Predictor::get_fetch"; VLOG(3) << "Predictor::get_fetch";
outputs->resize(fetchs.size()); outputs->resize(fetchs_.size());
for (size_t i = 0; i < fetchs.size(); ++i) { for (size_t i = 0; i < fetchs_.size(); ++i) {
int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
PADDLE_ENFORCE(idx == i);
framework::LoDTensor &output =
framework::GetFetchVariable(*scope, "fetch", idx);
// TODO(panyx0718): Support fetch of other types. // TODO(panyx0718): Support fetch of other types.
if (fetchs[i].type() != typeid(float)) { if (output.type() != typeid(float)) {
LOG(ERROR) << "only support fetching float now."; LOG(ERROR) << "only support fetching float now.";
return false; return false;
} }
std::vector<int> shape; std::vector<int> shape;
auto dims_i = fetchs[i].dims(); auto dims_i = output.dims();
auto lod = fetchs[i].lod(); auto lod = output.lod();
const float *output_ptr = fetchs[i].data<float>(); const float *output_ptr = output.data<float>();
// const int64_t* output_ptr = fetchs[i].data<int64_t>(); // const int64_t* output_ptr = fetchs[i].data<int64_t>();
auto num = fetchs[i].numel(); auto num = output.numel();
std::vector<float> data; std::vector<float> data;
if (0 == lod.size()) { if (0 == lod.size()) {
std::copy(output_ptr, output_ptr + num, std::back_inserter(data)); std::copy(output_ptr, output_ptr + num, std::back_inserter(data));
...@@ -275,7 +288,7 @@ bool NativePaddlePredictor::GetFetch( ...@@ -275,7 +288,7 @@ bool NativePaddlePredictor::GetFetch(
} }
std::memcpy(buffer.data(), data.data(), buffer.length()); std::memcpy(buffer.data(), data.data(), buffer.length());
// copy LoD // copy LoD
for (const auto &level : fetchs[i].lod()) { for (const auto &level : output.lod()) {
outputs->at(i).lod.emplace_back(level); outputs->at(i).lod.emplace_back(level);
} }
outputs->at(i).dtype = PaddleDType::FLOAT32; outputs->at(i).dtype = PaddleDType::FLOAT32;
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#pragma once #pragma once
#include <glog/logging.h> #include <glog/logging.h>
#include <map>
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -47,9 +48,11 @@ class NativePaddlePredictor : public PaddlePredictor { ...@@ -47,9 +48,11 @@ class NativePaddlePredictor : public PaddlePredictor {
protected: protected:
bool SetFeed(const std::vector<PaddleTensor> &input_datas, bool SetFeed(const std::vector<PaddleTensor> &input_datas,
std::vector<framework::LoDTensor> *feeds); framework::Scope *scope);
bool GetFetch(const std::vector<framework::LoDTensor> &fetchs, bool GetFetch(std::vector<PaddleTensor> *output_data,
std::vector<PaddleTensor> *output_data); framework::Scope *scope);
void PrepareFeedFetch();
NativeConfig config_; NativeConfig config_;
platform::Place place_; platform::Place place_;
...@@ -57,8 +60,9 @@ class NativePaddlePredictor : public PaddlePredictor { ...@@ -57,8 +60,9 @@ class NativePaddlePredictor : public PaddlePredictor {
std::shared_ptr<framework::Scope> scope_; std::shared_ptr<framework::Scope> scope_;
std::unique_ptr<framework::ExecutorPrepareContext> ctx_; std::unique_ptr<framework::ExecutorPrepareContext> ctx_;
std::unique_ptr<framework::ProgramDesc> inference_program_; std::unique_ptr<framework::ProgramDesc> inference_program_;
std::vector<std::string> feed_target_names_; std::vector<framework::OpDesc *> feeds_;
std::vector<std::string> fetch_target_names_; std::map<std::string, size_t> feed_names_;
std::vector<framework::OpDesc *> fetchs_;
// Do not use unique_ptr, use parent scope to delete // Do not use unique_ptr, use parent scope to delete
framework::Scope *sub_scope_{nullptr}; framework::Scope *sub_scope_{nullptr};
}; };
......
...@@ -74,10 +74,8 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor { ...@@ -74,10 +74,8 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
VLOG(5) << "to create variables"; VLOG(5) << "to create variables";
executor_->CreateVariables(*inference_program_, executor_->CreateVariables(*inference_program_,
sub_scope_ ? sub_scope_ : scope_.get(), 0); sub_scope_ ? sub_scope_ : scope_.get(), 0);
// Get the feed_target_names and fetch_target_names // Get the feed_target_names and fetch_target_names
feed_target_names_ = inference_program_->GetFeedTargetNames(); PrepareFeedFetch();
fetch_target_names_ = inference_program_->GetFetchTargetNames();
return true; return true;
} }
......
...@@ -21,6 +21,8 @@ limitations under the License. */ ...@@ -21,6 +21,8 @@ limitations under the License. */
#include "paddle/fluid/inference/tests/test_helper.h" #include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
DEFINE_string(model_path, "", "Directory of the inference model."); DEFINE_string(model_path, "", "Directory of the inference model.");
DEFINE_string(data_file, "", "File of input index data."); DEFINE_string(data_file, "", "File of input index data.");
DEFINE_int32(repeat, 100, "Running the inference program repeat times"); DEFINE_int32(repeat, 100, "Running the inference program repeat times");
...@@ -124,14 +126,35 @@ void ThreadRunInfer( ...@@ -124,14 +126,35 @@ void ThreadRunInfer(
std::map<std::string, const paddle::framework::LoDTensor*> feed_targets; std::map<std::string, const paddle::framework::LoDTensor*> feed_targets;
PADDLE_ENFORCE_EQ(feed_target_names.size(), 1UL); PADDLE_ENFORCE_EQ(feed_target_names.size(), 1UL);
// map the data of feed_targets to feed_holder
for (auto* op : inference_program->Block(0).AllOps()) {
if (op->Type() == "feed") {
std::string feed_target_name = op->Output("Out")[0];
int idx = boost::get<int>(op->GetAttr("col"));
paddle::framework::SetFeedVariable(scope, *feed_targets[feed_target_name],
"feed", idx);
}
}
auto& inputs = jobs[tid]; auto& inputs = jobs[tid];
auto start_ms = GetCurrentMs(); auto start_ms = GetCurrentMs();
for (size_t i = 0; i < inputs.size(); ++i) { for (size_t i = 0; i < inputs.size(); ++i) {
feed_targets[feed_target_names[0]] = inputs[i]; feed_targets[feed_target_names[0]] = inputs[i];
executor.RunPreparedContext(ctx.get(), &sub_scope, &feed_targets, executor.RunPreparedContext(ctx.get(), &sub_scope,
&fetch_targets, false /*create_local_scope*/); false /*create_local_scope*/);
} }
auto stop_ms = GetCurrentMs(); auto stop_ms = GetCurrentMs();
// obtain the data of fetch_targets from fetch_holder
for (auto* op : inference_program->Block(0).AllOps()) {
if (op->Type() == "fetch") {
std::string fetch_target_name = op->Input("X")[0];
int idx = boost::get<int>(op->GetAttr("col"));
*fetch_targets[fetch_target_name] =
paddle::framework::GetFetchVariable(*scope, "fetch", idx);
}
}
scope->DeleteScope(&sub_scope); scope->DeleteScope(&sub_scope);
LOG(INFO) << "Tid: " << tid << ", process " << inputs.size() LOG(INFO) << "Tid: " << tid << ", process " << inputs.size()
<< " samples, avg time per sample: " << " samples, avg time per sample: "
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册