未验证 提交 97b75027 编写于 作者: Y Yan Chunwei 提交者: GitHub

inference API little fix (#11069)

上级 f437c46f
......@@ -36,7 +36,7 @@ function(inference_api_test TARGET_NAME TEST_SRC)
string(REGEX REPLACE "^_$" "" arg "${arg}")
cc_test(${TARGET_NAME}
SRCS ${TEST_SRC}
DEPS paddle_fluid_api paddle_inference_api paddle_inference_api_impl
DEPS paddle_fluid_api paddle_inference_api
ARGS --dirname=${PYTHON_TESTS_DIR}/book/)
# TODO(panyx0178): Figure out how to add word2vec and image_classification
# as deps.
......@@ -47,13 +47,9 @@ endfunction(inference_api_test)
cc_library(paddle_inference_api
SRCS paddle_inference_api.cc
SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
cc_library(paddle_inference_api_impl
SRCS paddle_inference_api_impl.cc
DEPS paddle_inference_api paddle_fluid_api)
cc_test(test_paddle_inference_api
SRCS test_paddle_inference_api.cc
DEPS paddle_inference_api)
......
......@@ -45,10 +45,10 @@ struct PaddleTensor {
};
/*
* A simple Inference API for Paddle. Currently this API might just be used by
* non-sequence scenerios.
* TODO(Superjomn) Prepare another API for NLP-related usages.
*/
* A simple Inference API for Paddle. Currently this API can be used by
* non-sequence scenerios.
* TODO(Superjomn) Support another API for NLP-related usages.
*/
class PaddlePredictor {
public:
struct Config;
......@@ -66,34 +66,38 @@ class PaddlePredictor {
// be thread-safe.
virtual std::unique_ptr<PaddlePredictor> Clone() = 0;
virtual bool InitShared() { return false; }
// Destroy the Predictor.
virtual ~PaddlePredictor() {}
friend std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(
const PaddlePredictor::Config& config);
enum class EngineKind {
kNative = -1, // Use the native Fluid facility.
// TODO(Superjomn) support latter.
// kAnakin, // Use Anakin for inference.
// kTensorRT, // Use TensorRT for inference.
// kAutoMixedAnakin, // Automatically mix Fluid with Anakin.
// kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
};
// The common configs for all the predictors.
struct Config {
enum class EngineKind;
std::string model_dir; // path to the model directory.
bool enable_engine{false}; // Enable to execute (part of) the model on
// third-party engines.
EngineKind engine_kind{Config::EngineKind::kNone};
enum class EngineKind {
kNone = -1, // Use the native Fluid facility.
kAnakin, // Use Anakin for inference.
kTensorRT, // Use TensorRT for inference.
kAutoMixedAnakin, // Automatically mix Fluid with Anakin.
kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
};
};
};
struct NativeConfig : public PaddlePredictor::Config {
bool use_gpu{false};
int device;
float fraction_of_gpu_memory;
std::string prog_file;
std::string param_file;
bool share_variables;
};
// A factory to help create difference predictor.
template <typename ConfigT>
template <
typename ConfigT,
PaddlePredictor::EngineKind engine = PaddlePredictor::EngineKind::kNative>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
} // namespace paddle
......@@ -54,7 +54,7 @@ std::string num2str(T a) {
}
} // namespace
bool PaddlePredictorImpl::Init() {
bool NativePaddlePredictor::Init() {
VLOG(3) << "Predictor::init()";
// TODO(panyx0718): Should CPU vs GPU device be decided by id?
......@@ -96,8 +96,8 @@ bool PaddlePredictorImpl::Init() {
return true;
}
bool PaddlePredictorImpl::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data) {
bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data) {
VLOG(3) << "Predictor::predict";
Timer timer;
timer.tic();
......@@ -133,59 +133,20 @@ bool PaddlePredictorImpl::Run(const std::vector<PaddleTensor> &inputs,
return true;
}
std::unique_ptr<PaddlePredictor> PaddlePredictorImpl::Clone() {
std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
VLOG(3) << "Predictor::clone";
std::unique_ptr<PaddlePredictor> cls(new PaddlePredictorImpl(config_));
if (!cls->InitShared()) {
LOG(ERROR) << "fail to call InitShared";
std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init()) {
LOG(ERROR) << "fail to call Init";
return nullptr;
}
// fix manylinux compile error.
return std::move(cls);
}
// TODO(panyx0718): Consider merge with Init()?
bool PaddlePredictorImpl::InitShared() {
VLOG(3) << "Predictor::init_shared";
// 1. Define place, executor, scope
if (this->config_.device >= 0) {
place_ = platform::CUDAPlace();
} else {
place_ = platform::CPUPlace();
}
this->executor_.reset(new framework::Executor(this->place_));
this->scope_.reset(new framework::Scope());
// Initialize the inference program
if (!this->config_.model_dir.empty()) {
// Parameters are saved in separate files sited in
// the specified `dirname`.
this->inference_program_ = inference::Load(
this->executor_.get(), this->scope_.get(), this->config_.model_dir);
} else if (!this->config_.prog_file.empty() &&
!this->config_.param_file.empty()) {
// All parameters are saved in a single file.
// The file names should be consistent with that used
// in Python API `fluid.io.save_inference_model`.
this->inference_program_ = inference::Load(this->executor_.get(),
this->scope_.get(),
this->config_.prog_file,
this->config_.param_file);
}
this->ctx_ = this->executor_->Prepare(*this->inference_program_, 0);
// 3. create variables
// TODO(panyx0718): why test share_variables.
if (config_.share_variables) {
this->executor_->CreateVariables(
*this->inference_program_, this->scope_.get(), 0);
}
// 4. Get the feed_target_names and fetch_target_names
this->feed_target_names_ = this->inference_program_->GetFeedTargetNames();
this->fetch_target_names_ = this->inference_program_->GetFetchTargetNames();
return true;
}
bool PaddlePredictorImpl::SetFeed(const std::vector<PaddleTensor> &inputs,
std::vector<framework::LoDTensor> *feeds) {
bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
std::vector<framework::LoDTensor> *feeds) {
VLOG(3) << "Predictor::set_feed";
if (inputs.size() != feed_target_names_.size()) {
LOG(ERROR) << "wrong feed input size.";
......@@ -213,7 +174,7 @@ bool PaddlePredictorImpl::SetFeed(const std::vector<PaddleTensor> &inputs,
return true;
}
bool PaddlePredictorImpl::GetFetch(
bool NativePaddlePredictor::GetFetch(
const std::vector<framework::LoDTensor> &fetchs,
std::vector<PaddleTensor> *outputs) {
VLOG(3) << "Predictor::get_fetch";
......@@ -280,23 +241,26 @@ bool PaddlePredictorImpl::GetFetch(
}
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(
const ConfigImpl &config) {
VLOG(3) << "create PaddlePredictorImpl";
// 1. GPU memeroy
std::vector<std::string> flags;
if (config.fraction_of_gpu_memory >= 0.0f ||
config.fraction_of_gpu_memory <= 0.95f) {
flags.push_back("dummpy");
std::string flag = "--fraction_of_gpu_memory_to_use=" +
num2str<float>(config.fraction_of_gpu_memory);
flags.push_back(flag);
VLOG(3) << "set flag: " << flag;
framework::InitGflags(flags);
std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<NativeConfig, PaddlePredictor::EngineKind::kNative>(
const NativeConfig &config) {
VLOG(3) << "create NativePaddlePredictor";
if (config.use_gpu) {
// 1. GPU memeroy
std::vector<std::string> flags;
if (config.fraction_of_gpu_memory >= 0.0f ||
config.fraction_of_gpu_memory <= 0.95f) {
flags.push_back("dummpy");
std::string flag = "--fraction_of_gpu_memory_to_use=" +
num2str<float>(config.fraction_of_gpu_memory);
flags.push_back(flag);
VLOG(3) << "set flag: " << flag;
framework::InitGflags(flags);
}
}
std::unique_ptr<PaddlePredictor> predictor(new PaddlePredictorImpl(config));
if (!dynamic_cast<PaddlePredictorImpl *>(predictor.get())->Init()) {
std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init()) {
return nullptr;
}
return std::move(predictor);
......
......@@ -29,17 +29,10 @@
namespace paddle {
struct ConfigImpl : public PaddlePredictor::Config {
int device;
float fraction_of_gpu_memory;
std::string prog_file;
std::string param_file;
bool share_variables;
};
class PaddlePredictorImpl : public PaddlePredictor {
class NativePaddlePredictor : public PaddlePredictor {
public:
explicit PaddlePredictorImpl(const ConfigImpl &config) : config_(config) {}
explicit NativePaddlePredictor(const NativeConfig &config)
: config_(config) {}
bool Init();
......@@ -48,16 +41,15 @@ class PaddlePredictorImpl : public PaddlePredictor {
std::unique_ptr<PaddlePredictor> Clone() override;
~PaddlePredictorImpl() override{};
~NativePaddlePredictor() override{};
private:
bool InitShared() override;
bool SetFeed(const std::vector<PaddleTensor> &input_datas,
std::vector<framework::LoDTensor> *feeds);
bool GetFetch(const std::vector<framework::LoDTensor> &fetchs,
std::vector<PaddleTensor> *output_data);
ConfigImpl config_;
NativeConfig config_;
platform::Place place_;
std::unique_ptr<framework::Executor> executor_;
std::unique_ptr<framework::Scope> scope_;
......
......@@ -40,19 +40,20 @@ PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) {
return pt;
}
ConfigImpl GetConfig() {
ConfigImpl config;
NativeConfig GetConfig() {
NativeConfig config;
config.model_dir = FLAGS_dirname + "word2vec.inference.model";
LOG(INFO) << "dirname " << config.model_dir;
config.fraction_of_gpu_memory = 0.15;
config.use_gpu = true;
config.device = 0;
config.share_variables = true;
return config;
}
TEST(paddle_inference_api_impl, word2vec) {
ConfigImpl config = GetConfig();
std::unique_ptr<PaddlePredictor> predictor = CreatePaddlePredictor(config);
NativeConfig config = GetConfig();
auto predictor = CreatePaddlePredictor<NativeConfig>(config);
framework::LoDTensor first_word, second_word, third_word, fourth_word;
framework::LoD lod{{0, 1}};
......@@ -104,7 +105,7 @@ TEST(paddle_inference_api_impl, image_classification) {
int batch_size = 2;
bool use_mkldnn = false;
bool repeat = false;
ConfigImpl config = GetConfig();
NativeConfig config = GetConfig();
config.model_dir =
FLAGS_dirname + "image_classification_resnet.inference.model";
......@@ -133,7 +134,7 @@ TEST(paddle_inference_api_impl, image_classification) {
is_combined,
use_mkldnn);
std::unique_ptr<PaddlePredictor> predictor = CreatePaddlePredictor(config);
auto predictor = CreatePaddlePredictor(config);
std::vector<PaddleTensor> paddle_tensor_feeds;
paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&input));
......
......@@ -5,14 +5,19 @@ cc_library(paddle_fluid_api
SRCS io.cc
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
# Create static library
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
cc_library(paddle_fluid DEPS ${fluid_modules})
if(WITH_CONTRIB)
set(fluid_modules "${fluid_modules}" paddle_inference_api)
endif()
# Create static library
cc_library(paddle_fluid DEPS ${fluid_modules} paddle_fluid_api)
# Create shared library
cc_library(paddle_fluid_shared SHARED
SRCS io.cc
DEPS ${fluid_modules})
DEPS ${fluid_modules} paddle_fluid_api)
set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid)
if(NOT APPLE)
# TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册