未验证 提交 18d64025 编写于 作者: Y Yan Chunwei 提交者: GitHub

simplify inference api (#11104)

上级 86d8659c
......@@ -40,14 +40,23 @@ struct PaddleBuf {
struct PaddleTensor {
std::string name; // variable name.
std::vector<int> shape;
// TODO(Superjomn) for LoD support, add a vector<vector<int>> field if needed.
PaddleBuf data; // blob of data.
PaddleDType dtype;
};
enum class PaddleEngineKind {
kNative = 0, // Use the native Fluid facility.
// TODO(Superjomn) support following engines latter.
// kAnakin, // Use Anakin for inference.
// kTensorRT, // Use TensorRT for inference.
// kAutoMixedAnakin, // Automatically mix Fluid with Anakin.
// kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
};
/*
* A simple Inference API for Paddle. Currently this API can be used by
* non-sequence scenerios.
* TODO(Superjomn) Support another API for NLP-related usages.
*/
class PaddlePredictor {
public:
......@@ -69,15 +78,6 @@ class PaddlePredictor {
// Destroy the Predictor.
virtual ~PaddlePredictor() {}
enum class EngineKind {
kNative = -1, // Use the native Fluid facility.
// TODO(Superjomn) support latter.
// kAnakin, // Use Anakin for inference.
// kTensorRT, // Use TensorRT for inference.
// kAutoMixedAnakin, // Automatically mix Fluid with Anakin.
// kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
};
// The common configs for all the predictors.
struct Config {
std::string model_dir; // path to the model directory.
......@@ -86,18 +86,24 @@ class PaddlePredictor {
};
struct NativeConfig : public PaddlePredictor::Config {
// GPU related fields.
bool use_gpu{false};
int device;
float fraction_of_gpu_memory;
int device{0};
float fraction_of_gpu_memory{-1.f}; // Negative to notify initialization.
std::string prog_file;
std::string param_file;
bool share_variables;
};
// A factory to help create difference predictor.
template <
typename ConfigT,
PaddlePredictor::EngineKind engine = PaddlePredictor::EngineKind::kNative>
// A factory to help create different predictors.
//
// FOR EXTENSION DEVELOPER:
// Different predictors are designated by config type and engine kind. Similar
// configs can be merged, but there shouldn't be a huge config containing
// different fields for more than one kind of predictors.
//
// Similarly, each engine kind should map to a unique predictor implementation.
template <typename ConfigT, PaddleEngineKind engine = PaddleEngineKind::kNative>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
} // namespace paddle
......@@ -57,8 +57,7 @@ std::string num2str(T a) {
bool NativePaddlePredictor::Init() {
VLOG(3) << "Predictor::init()";
// TODO(panyx0718): Should CPU vs GPU device be decided by id?
if (config_.device >= 0) {
if (config_.use_gpu) {
place_ = paddle::platform::CUDAPlace(config_.device);
} else {
place_ = paddle::platform::CPUPlace();
......@@ -85,11 +84,13 @@ bool NativePaddlePredictor::Init() {
}
ctx_ = executor_->Prepare(*inference_program_, 0);
// Create variables
// TODO(panyx0718): Why need to test share_variables here?
if (config_.share_variables) {
executor_->CreateVariables(*inference_program_, scope_.get(), 0);
}
// Create temporary variables first, so that the first batch do not need to
// create variables in the runtime. This is the logics of the old inference
// API.
// TODO(Superjomn) this should be modified when `Clone` is valid for
// multi-thread application.
executor_->CreateVariables(*inference_program_, scope_.get(), 0);
// Get the feed_target_names and fetch_target_names
feed_target_names_ = inference_program_->GetFeedTargetNames();
fetch_target_names_ = inference_program_->GetFetchTargetNames();
......@@ -124,7 +125,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
scope_.get(),
&feed_targets,
&fetch_targets,
!config_.share_variables);
false /* don't create variable eatch time */);
if (!GetFetch(fetchs, output_data)) {
LOG(ERROR) << "fail to get fetchs";
return false;
......@@ -242,11 +243,14 @@ bool NativePaddlePredictor::GetFetch(
template <>
std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<NativeConfig, PaddlePredictor::EngineKind::kNative>(
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(
const NativeConfig &config) {
VLOG(3) << "create NativePaddlePredictor";
if (config.use_gpu) {
// 1. GPU memeroy
PADDLE_ENFORCE(
config.fraction_of_gpu_memory > 0.f,
"fraction_of_gpu_memory in the config should be set to range (0., 1.]");
std::vector<std::string> flags;
if (config.fraction_of_gpu_memory >= 0.0f ||
config.fraction_of_gpu_memory <= 0.95f) {
......
......@@ -47,7 +47,6 @@ NativeConfig GetConfig() {
config.fraction_of_gpu_memory = 0.15;
config.use_gpu = true;
config.device = 0;
config.share_variables = true;
return config;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册