From 18d640255efb6807a360c29d6e1c672aa679818a Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Fri, 1 Jun 2018 15:38:45 +0800 Subject: [PATCH] simplify inference api (#11104) --- .../contrib/inference/paddle_inference_api.h | 40 +++++++++++-------- .../inference/paddle_inference_api_impl.cc | 22 +++++----- .../test_paddle_inference_api_impl.cc | 1 - 3 files changed, 36 insertions(+), 27 deletions(-) diff --git a/paddle/contrib/inference/paddle_inference_api.h b/paddle/contrib/inference/paddle_inference_api.h index b4c7f9bef..5fe839976 100644 --- a/paddle/contrib/inference/paddle_inference_api.h +++ b/paddle/contrib/inference/paddle_inference_api.h @@ -40,14 +40,23 @@ struct PaddleBuf { struct PaddleTensor { std::string name; // variable name. std::vector shape; + // TODO(Superjomn) for LoD support, add a vector> field if needed. PaddleBuf data; // blob of data. PaddleDType dtype; }; +enum class PaddleEngineKind { + kNative = 0, // Use the native Fluid facility. + // TODO(Superjomn) support following engines latter. + // kAnakin, // Use Anakin for inference. + // kTensorRT, // Use TensorRT for inference. + // kAutoMixedAnakin, // Automatically mix Fluid with Anakin. + // kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT. +}; + /* * A simple Inference API for Paddle. Currently this API can be used by * non-sequence scenerios. - * TODO(Superjomn) Support another API for NLP-related usages. */ class PaddlePredictor { public: @@ -69,15 +78,6 @@ class PaddlePredictor { // Destroy the Predictor. virtual ~PaddlePredictor() {} - enum class EngineKind { - kNative = -1, // Use the native Fluid facility. - // TODO(Superjomn) support latter. - // kAnakin, // Use Anakin for inference. - // kTensorRT, // Use TensorRT for inference. - // kAutoMixedAnakin, // Automatically mix Fluid with Anakin. - // kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT. - }; - // The common configs for all the predictors. struct Config { std::string model_dir; // path to the model directory. @@ -86,18 +86,24 @@ class PaddlePredictor { }; struct NativeConfig : public PaddlePredictor::Config { + // GPU related fields. bool use_gpu{false}; - int device; - float fraction_of_gpu_memory; + int device{0}; + float fraction_of_gpu_memory{-1.f}; // Negative to notify initialization. + std::string prog_file; std::string param_file; - bool share_variables; }; -// A factory to help create difference predictor. -template < - typename ConfigT, - PaddlePredictor::EngineKind engine = PaddlePredictor::EngineKind::kNative> +// A factory to help create different predictors. +// +// FOR EXTENSION DEVELOPER: +// Different predictors are designated by config type and engine kind. Similar +// configs can be merged, but there shouldn't be a huge config containing +// different fields for more than one kind of predictors. +// +// Similarly, each engine kind should map to a unique predictor implementation. +template std::unique_ptr CreatePaddlePredictor(const ConfigT& config); } // namespace paddle diff --git a/paddle/contrib/inference/paddle_inference_api_impl.cc b/paddle/contrib/inference/paddle_inference_api_impl.cc index 989252f69..99a64662d 100644 --- a/paddle/contrib/inference/paddle_inference_api_impl.cc +++ b/paddle/contrib/inference/paddle_inference_api_impl.cc @@ -57,8 +57,7 @@ std::string num2str(T a) { bool NativePaddlePredictor::Init() { VLOG(3) << "Predictor::init()"; - // TODO(panyx0718): Should CPU vs GPU device be decided by id? - if (config_.device >= 0) { + if (config_.use_gpu) { place_ = paddle::platform::CUDAPlace(config_.device); } else { place_ = paddle::platform::CPUPlace(); @@ -85,11 +84,13 @@ bool NativePaddlePredictor::Init() { } ctx_ = executor_->Prepare(*inference_program_, 0); - // Create variables - // TODO(panyx0718): Why need to test share_variables here? - if (config_.share_variables) { - executor_->CreateVariables(*inference_program_, scope_.get(), 0); - } + // Create temporary variables first, so that the first batch do not need to + // create variables in the runtime. This is the logics of the old inference + // API. + // TODO(Superjomn) this should be modified when `Clone` is valid for + // multi-thread application. + executor_->CreateVariables(*inference_program_, scope_.get(), 0); + // Get the feed_target_names and fetch_target_names feed_target_names_ = inference_program_->GetFeedTargetNames(); fetch_target_names_ = inference_program_->GetFetchTargetNames(); @@ -124,7 +125,7 @@ bool NativePaddlePredictor::Run(const std::vector &inputs, scope_.get(), &feed_targets, &fetch_targets, - !config_.share_variables); + false /* don't create variable eatch time */); if (!GetFetch(fetchs, output_data)) { LOG(ERROR) << "fail to get fetchs"; return false; @@ -242,11 +243,14 @@ bool NativePaddlePredictor::GetFetch( template <> std::unique_ptr -CreatePaddlePredictor( +CreatePaddlePredictor( const NativeConfig &config) { VLOG(3) << "create NativePaddlePredictor"; if (config.use_gpu) { // 1. GPU memeroy + PADDLE_ENFORCE( + config.fraction_of_gpu_memory > 0.f, + "fraction_of_gpu_memory in the config should be set to range (0., 1.]"); std::vector flags; if (config.fraction_of_gpu_memory >= 0.0f || config.fraction_of_gpu_memory <= 0.95f) { diff --git a/paddle/contrib/inference/test_paddle_inference_api_impl.cc b/paddle/contrib/inference/test_paddle_inference_api_impl.cc index 5240fc2f2..07b17acd4 100644 --- a/paddle/contrib/inference/test_paddle_inference_api_impl.cc +++ b/paddle/contrib/inference/test_paddle_inference_api_impl.cc @@ -47,7 +47,6 @@ NativeConfig GetConfig() { config.fraction_of_gpu_memory = 0.15; config.use_gpu = true; config.device = 0; - config.share_variables = true; return config; } -- GitLab