未验证 提交 97b75027 编写于 作者: Y Yan Chunwei 提交者: GitHub

inference API little fix (#11069)

上级 f437c46f
...@@ -36,7 +36,7 @@ function(inference_api_test TARGET_NAME TEST_SRC) ...@@ -36,7 +36,7 @@ function(inference_api_test TARGET_NAME TEST_SRC)
string(REGEX REPLACE "^_$" "" arg "${arg}") string(REGEX REPLACE "^_$" "" arg "${arg}")
cc_test(${TARGET_NAME} cc_test(${TARGET_NAME}
SRCS ${TEST_SRC} SRCS ${TEST_SRC}
DEPS paddle_fluid_api paddle_inference_api paddle_inference_api_impl DEPS paddle_fluid_api paddle_inference_api
ARGS --dirname=${PYTHON_TESTS_DIR}/book/) ARGS --dirname=${PYTHON_TESTS_DIR}/book/)
# TODO(panyx0178): Figure out how to add word2vec and image_classification # TODO(panyx0178): Figure out how to add word2vec and image_classification
# as deps. # as deps.
...@@ -47,13 +47,9 @@ endfunction(inference_api_test) ...@@ -47,13 +47,9 @@ endfunction(inference_api_test)
cc_library(paddle_inference_api cc_library(paddle_inference_api
SRCS paddle_inference_api.cc SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
cc_library(paddle_inference_api_impl
SRCS paddle_inference_api_impl.cc
DEPS paddle_inference_api paddle_fluid_api)
cc_test(test_paddle_inference_api cc_test(test_paddle_inference_api
SRCS test_paddle_inference_api.cc SRCS test_paddle_inference_api.cc
DEPS paddle_inference_api) DEPS paddle_inference_api)
......
...@@ -45,10 +45,10 @@ struct PaddleTensor { ...@@ -45,10 +45,10 @@ struct PaddleTensor {
}; };
/* /*
* A simple Inference API for Paddle. Currently this API might just be used by * A simple Inference API for Paddle. Currently this API can be used by
* non-sequence scenerios. * non-sequence scenerios.
* TODO(Superjomn) Prepare another API for NLP-related usages. * TODO(Superjomn) Support another API for NLP-related usages.
*/ */
class PaddlePredictor { class PaddlePredictor {
public: public:
struct Config; struct Config;
...@@ -66,34 +66,38 @@ class PaddlePredictor { ...@@ -66,34 +66,38 @@ class PaddlePredictor {
// be thread-safe. // be thread-safe.
virtual std::unique_ptr<PaddlePredictor> Clone() = 0; virtual std::unique_ptr<PaddlePredictor> Clone() = 0;
virtual bool InitShared() { return false; }
// Destroy the Predictor. // Destroy the Predictor.
virtual ~PaddlePredictor() {} virtual ~PaddlePredictor() {}
friend std::unique_ptr<PaddlePredictor> CreatePaddlePredictor( enum class EngineKind {
const PaddlePredictor::Config& config); kNative = -1, // Use the native Fluid facility.
// TODO(Superjomn) support latter.
// kAnakin, // Use Anakin for inference.
// kTensorRT, // Use TensorRT for inference.
// kAutoMixedAnakin, // Automatically mix Fluid with Anakin.
// kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
};
// The common configs for all the predictors. // The common configs for all the predictors.
struct Config { struct Config {
enum class EngineKind;
std::string model_dir; // path to the model directory. std::string model_dir; // path to the model directory.
bool enable_engine{false}; // Enable to execute (part of) the model on bool enable_engine{false}; // Enable to execute (part of) the model on
// third-party engines.
EngineKind engine_kind{Config::EngineKind::kNone};
enum class EngineKind {
kNone = -1, // Use the native Fluid facility.
kAnakin, // Use Anakin for inference.
kTensorRT, // Use TensorRT for inference.
kAutoMixedAnakin, // Automatically mix Fluid with Anakin.
kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
};
}; };
}; };
struct NativeConfig : public PaddlePredictor::Config {
bool use_gpu{false};
int device;
float fraction_of_gpu_memory;
std::string prog_file;
std::string param_file;
bool share_variables;
};
// A factory to help create difference predictor. // A factory to help create difference predictor.
template <typename ConfigT> template <
typename ConfigT,
PaddlePredictor::EngineKind engine = PaddlePredictor::EngineKind::kNative>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config); std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
} // namespace paddle } // namespace paddle
...@@ -54,7 +54,7 @@ std::string num2str(T a) { ...@@ -54,7 +54,7 @@ std::string num2str(T a) {
} }
} // namespace } // namespace
bool PaddlePredictorImpl::Init() { bool NativePaddlePredictor::Init() {
VLOG(3) << "Predictor::init()"; VLOG(3) << "Predictor::init()";
// TODO(panyx0718): Should CPU vs GPU device be decided by id? // TODO(panyx0718): Should CPU vs GPU device be decided by id?
...@@ -96,8 +96,8 @@ bool PaddlePredictorImpl::Init() { ...@@ -96,8 +96,8 @@ bool PaddlePredictorImpl::Init() {
return true; return true;
} }
bool PaddlePredictorImpl::Run(const std::vector<PaddleTensor> &inputs, bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data) { std::vector<PaddleTensor> *output_data) {
VLOG(3) << "Predictor::predict"; VLOG(3) << "Predictor::predict";
Timer timer; Timer timer;
timer.tic(); timer.tic();
...@@ -133,59 +133,20 @@ bool PaddlePredictorImpl::Run(const std::vector<PaddleTensor> &inputs, ...@@ -133,59 +133,20 @@ bool PaddlePredictorImpl::Run(const std::vector<PaddleTensor> &inputs,
return true; return true;
} }
std::unique_ptr<PaddlePredictor> PaddlePredictorImpl::Clone() { std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
VLOG(3) << "Predictor::clone"; VLOG(3) << "Predictor::clone";
std::unique_ptr<PaddlePredictor> cls(new PaddlePredictorImpl(config_)); std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
if (!cls->InitShared()) {
LOG(ERROR) << "fail to call InitShared"; if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init()) {
LOG(ERROR) << "fail to call Init";
return nullptr; return nullptr;
} }
// fix manylinux compile error. // fix manylinux compile error.
return std::move(cls); return std::move(cls);
} }
// TODO(panyx0718): Consider merge with Init()? bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
bool PaddlePredictorImpl::InitShared() { std::vector<framework::LoDTensor> *feeds) {
VLOG(3) << "Predictor::init_shared";
// 1. Define place, executor, scope
if (this->config_.device >= 0) {
place_ = platform::CUDAPlace();
} else {
place_ = platform::CPUPlace();
}
this->executor_.reset(new framework::Executor(this->place_));
this->scope_.reset(new framework::Scope());
// Initialize the inference program
if (!this->config_.model_dir.empty()) {
// Parameters are saved in separate files sited in
// the specified `dirname`.
this->inference_program_ = inference::Load(
this->executor_.get(), this->scope_.get(), this->config_.model_dir);
} else if (!this->config_.prog_file.empty() &&
!this->config_.param_file.empty()) {
// All parameters are saved in a single file.
// The file names should be consistent with that used
// in Python API `fluid.io.save_inference_model`.
this->inference_program_ = inference::Load(this->executor_.get(),
this->scope_.get(),
this->config_.prog_file,
this->config_.param_file);
}
this->ctx_ = this->executor_->Prepare(*this->inference_program_, 0);
// 3. create variables
// TODO(panyx0718): why test share_variables.
if (config_.share_variables) {
this->executor_->CreateVariables(
*this->inference_program_, this->scope_.get(), 0);
}
// 4. Get the feed_target_names and fetch_target_names
this->feed_target_names_ = this->inference_program_->GetFeedTargetNames();
this->fetch_target_names_ = this->inference_program_->GetFetchTargetNames();
return true;
}
bool PaddlePredictorImpl::SetFeed(const std::vector<PaddleTensor> &inputs,
std::vector<framework::LoDTensor> *feeds) {
VLOG(3) << "Predictor::set_feed"; VLOG(3) << "Predictor::set_feed";
if (inputs.size() != feed_target_names_.size()) { if (inputs.size() != feed_target_names_.size()) {
LOG(ERROR) << "wrong feed input size."; LOG(ERROR) << "wrong feed input size.";
...@@ -213,7 +174,7 @@ bool PaddlePredictorImpl::SetFeed(const std::vector<PaddleTensor> &inputs, ...@@ -213,7 +174,7 @@ bool PaddlePredictorImpl::SetFeed(const std::vector<PaddleTensor> &inputs,
return true; return true;
} }
bool PaddlePredictorImpl::GetFetch( bool NativePaddlePredictor::GetFetch(
const std::vector<framework::LoDTensor> &fetchs, const std::vector<framework::LoDTensor> &fetchs,
std::vector<PaddleTensor> *outputs) { std::vector<PaddleTensor> *outputs) {
VLOG(3) << "Predictor::get_fetch"; VLOG(3) << "Predictor::get_fetch";
...@@ -280,23 +241,26 @@ bool PaddlePredictorImpl::GetFetch( ...@@ -280,23 +241,26 @@ bool PaddlePredictorImpl::GetFetch(
} }
template <> template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor( std::unique_ptr<PaddlePredictor>
const ConfigImpl &config) { CreatePaddlePredictor<NativeConfig, PaddlePredictor::EngineKind::kNative>(
VLOG(3) << "create PaddlePredictorImpl"; const NativeConfig &config) {
// 1. GPU memeroy VLOG(3) << "create NativePaddlePredictor";
std::vector<std::string> flags; if (config.use_gpu) {
if (config.fraction_of_gpu_memory >= 0.0f || // 1. GPU memeroy
config.fraction_of_gpu_memory <= 0.95f) { std::vector<std::string> flags;
flags.push_back("dummpy"); if (config.fraction_of_gpu_memory >= 0.0f ||
std::string flag = "--fraction_of_gpu_memory_to_use=" + config.fraction_of_gpu_memory <= 0.95f) {
num2str<float>(config.fraction_of_gpu_memory); flags.push_back("dummpy");
flags.push_back(flag); std::string flag = "--fraction_of_gpu_memory_to_use=" +
VLOG(3) << "set flag: " << flag; num2str<float>(config.fraction_of_gpu_memory);
framework::InitGflags(flags); flags.push_back(flag);
VLOG(3) << "set flag: " << flag;
framework::InitGflags(flags);
}
} }
std::unique_ptr<PaddlePredictor> predictor(new PaddlePredictorImpl(config)); std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
if (!dynamic_cast<PaddlePredictorImpl *>(predictor.get())->Init()) { if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init()) {
return nullptr; return nullptr;
} }
return std::move(predictor); return std::move(predictor);
......
...@@ -29,17 +29,10 @@ ...@@ -29,17 +29,10 @@
namespace paddle { namespace paddle {
struct ConfigImpl : public PaddlePredictor::Config { class NativePaddlePredictor : public PaddlePredictor {
int device;
float fraction_of_gpu_memory;
std::string prog_file;
std::string param_file;
bool share_variables;
};
class PaddlePredictorImpl : public PaddlePredictor {
public: public:
explicit PaddlePredictorImpl(const ConfigImpl &config) : config_(config) {} explicit NativePaddlePredictor(const NativeConfig &config)
: config_(config) {}
bool Init(); bool Init();
...@@ -48,16 +41,15 @@ class PaddlePredictorImpl : public PaddlePredictor { ...@@ -48,16 +41,15 @@ class PaddlePredictorImpl : public PaddlePredictor {
std::unique_ptr<PaddlePredictor> Clone() override; std::unique_ptr<PaddlePredictor> Clone() override;
~PaddlePredictorImpl() override{}; ~NativePaddlePredictor() override{};
private: private:
bool InitShared() override;
bool SetFeed(const std::vector<PaddleTensor> &input_datas, bool SetFeed(const std::vector<PaddleTensor> &input_datas,
std::vector<framework::LoDTensor> *feeds); std::vector<framework::LoDTensor> *feeds);
bool GetFetch(const std::vector<framework::LoDTensor> &fetchs, bool GetFetch(const std::vector<framework::LoDTensor> &fetchs,
std::vector<PaddleTensor> *output_data); std::vector<PaddleTensor> *output_data);
ConfigImpl config_; NativeConfig config_;
platform::Place place_; platform::Place place_;
std::unique_ptr<framework::Executor> executor_; std::unique_ptr<framework::Executor> executor_;
std::unique_ptr<framework::Scope> scope_; std::unique_ptr<framework::Scope> scope_;
......
...@@ -40,19 +40,20 @@ PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) { ...@@ -40,19 +40,20 @@ PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) {
return pt; return pt;
} }
ConfigImpl GetConfig() { NativeConfig GetConfig() {
ConfigImpl config; NativeConfig config;
config.model_dir = FLAGS_dirname + "word2vec.inference.model"; config.model_dir = FLAGS_dirname + "word2vec.inference.model";
LOG(INFO) << "dirname " << config.model_dir; LOG(INFO) << "dirname " << config.model_dir;
config.fraction_of_gpu_memory = 0.15; config.fraction_of_gpu_memory = 0.15;
config.use_gpu = true;
config.device = 0; config.device = 0;
config.share_variables = true; config.share_variables = true;
return config; return config;
} }
TEST(paddle_inference_api_impl, word2vec) { TEST(paddle_inference_api_impl, word2vec) {
ConfigImpl config = GetConfig(); NativeConfig config = GetConfig();
std::unique_ptr<PaddlePredictor> predictor = CreatePaddlePredictor(config); auto predictor = CreatePaddlePredictor<NativeConfig>(config);
framework::LoDTensor first_word, second_word, third_word, fourth_word; framework::LoDTensor first_word, second_word, third_word, fourth_word;
framework::LoD lod{{0, 1}}; framework::LoD lod{{0, 1}};
...@@ -104,7 +105,7 @@ TEST(paddle_inference_api_impl, image_classification) { ...@@ -104,7 +105,7 @@ TEST(paddle_inference_api_impl, image_classification) {
int batch_size = 2; int batch_size = 2;
bool use_mkldnn = false; bool use_mkldnn = false;
bool repeat = false; bool repeat = false;
ConfigImpl config = GetConfig(); NativeConfig config = GetConfig();
config.model_dir = config.model_dir =
FLAGS_dirname + "image_classification_resnet.inference.model"; FLAGS_dirname + "image_classification_resnet.inference.model";
...@@ -133,7 +134,7 @@ TEST(paddle_inference_api_impl, image_classification) { ...@@ -133,7 +134,7 @@ TEST(paddle_inference_api_impl, image_classification) {
is_combined, is_combined,
use_mkldnn); use_mkldnn);
std::unique_ptr<PaddlePredictor> predictor = CreatePaddlePredictor(config); auto predictor = CreatePaddlePredictor(config);
std::vector<PaddleTensor> paddle_tensor_feeds; std::vector<PaddleTensor> paddle_tensor_feeds;
paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&input)); paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&input));
......
...@@ -5,14 +5,19 @@ cc_library(paddle_fluid_api ...@@ -5,14 +5,19 @@ cc_library(paddle_fluid_api
SRCS io.cc SRCS io.cc
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
# Create static library
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
cc_library(paddle_fluid DEPS ${fluid_modules})
if(WITH_CONTRIB)
set(fluid_modules "${fluid_modules}" paddle_inference_api)
endif()
# Create static library
cc_library(paddle_fluid DEPS ${fluid_modules} paddle_fluid_api)
# Create shared library # Create shared library
cc_library(paddle_fluid_shared SHARED cc_library(paddle_fluid_shared SHARED
SRCS io.cc SRCS io.cc
DEPS ${fluid_modules}) DEPS ${fluid_modules} paddle_fluid_api)
set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid) set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid)
if(NOT APPLE) if(NOT APPLE)
# TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac. # TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册