未验证 提交 5de14c6b 编写于 作者: Y Yan Chunwei 提交者: GitHub

refine inference api (#13518)

上级 79463ae7
...@@ -71,7 +71,7 @@ bool AnalysisPredictor::Init( ...@@ -71,7 +71,7 @@ bool AnalysisPredictor::Init(
inference_program_ = paddle::inference::Load( inference_program_ = paddle::inference::Load(
executor_.get(), scope_.get(), config_.prog_file, config_.param_file); executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
} else { } else {
LOG(ERROR) << "fail to load inference model."; LOG(ERROR) << "fail to load inference model from " << config_.model_dir;
return false; return false;
} }
...@@ -109,7 +109,8 @@ void AnalysisPredictor::OptimizeInferenceProgram() { ...@@ -109,7 +109,8 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
} }
argument_.origin_program_desc.reset( argument_.origin_program_desc.reset(
new ProgramDesc(*inference_program_->Proto())); new ProgramDesc(*inference_program_->Proto()));
PADDLE_ENFORCE(config_.ir_mode == AnalysisConfig::IrPassMode::kExclude, PADDLE_ENFORCE(
config_.ir_mode == contrib::AnalysisConfig::IrPassMode::kExclude,
"Only kExclude is supported yet."); "Only kExclude is supported yet.");
Analyzer().DisableIrPasses(config_.ir_passes).Run(&argument_); Analyzer().DisableIrPasses(config_.ir_passes).Run(&argument_);
...@@ -126,8 +127,9 @@ void AnalysisPredictor::OptimizeInferenceProgram() { ...@@ -126,8 +127,9 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
} }
template <> template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< std::unique_ptr<PaddlePredictor>
AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig& config) { CreatePaddlePredictor<contrib::AnalysisConfig, PaddleEngineKind::kAnalysis>(
const contrib::AnalysisConfig& config) {
VLOG(3) << "create AnalysisConfig"; VLOG(3) << "create AnalysisConfig";
if (config.use_gpu) { if (config.use_gpu) {
// 1. GPU memeroy // 1. GPU memeroy
...@@ -154,4 +156,11 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< ...@@ -154,4 +156,11 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
return predictor; return predictor;
} }
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<contrib::AnalysisConfig>(
const contrib::AnalysisConfig& config) {
return CreatePaddlePredictor<contrib::AnalysisConfig,
PaddleEngineKind::kAnalysis>(config);
}
} // namespace paddle } // namespace paddle
...@@ -30,7 +30,7 @@ using framework::proto::ProgramDesc; ...@@ -30,7 +30,7 @@ using framework::proto::ProgramDesc;
*/ */
class AnalysisPredictor : public NativePaddlePredictor { class AnalysisPredictor : public NativePaddlePredictor {
public: public:
explicit AnalysisPredictor(const AnalysisConfig& config) explicit AnalysisPredictor(const contrib::AnalysisConfig& config)
: NativePaddlePredictor(config), config_(config) {} : NativePaddlePredictor(config), config_(config) {}
bool Init(const std::shared_ptr<framework::Scope>& parent_scope); bool Init(const std::shared_ptr<framework::Scope>& parent_scope);
...@@ -46,7 +46,7 @@ class AnalysisPredictor : public NativePaddlePredictor { ...@@ -46,7 +46,7 @@ class AnalysisPredictor : public NativePaddlePredictor {
Argument& analysis_argument() { return argument_; } Argument& analysis_argument() { return argument_; }
private: private:
AnalysisConfig config_; contrib::AnalysisConfig config_;
Argument argument_; Argument argument_;
}; };
......
...@@ -31,21 +31,24 @@ ...@@ -31,21 +31,24 @@
namespace paddle { namespace paddle {
using paddle::contrib::AnakinConfig;
template <typename Target> template <typename Target>
PaddleInferenceAnakinPredictor<Target>::PaddleInferenceAnakinPredictor( PaddleInferenceAnakinPredictor<Target>::PaddleInferenceAnakinPredictor(
const AnakinConfig &config) { const contrib::AnakinConfig &config) {
CHECK(Init(config)); CHECK(Init(config));
} }
template <> template <>
PaddleInferenceAnakinPredictor<anakin::X86>::PaddleInferenceAnakinPredictor( PaddleInferenceAnakinPredictor<anakin::X86>::PaddleInferenceAnakinPredictor(
const AnakinConfig &config) { const contrib::AnakinConfig &config) {
omp_set_dynamic(0); omp_set_dynamic(0);
omp_set_num_threads(1); omp_set_num_threads(1);
mkl_set_num_threads(1); mkl_set_num_threads(1);
CHECK(Init(config)); CHECK(Init(config));
} }
template <typename Target> template <typename Target>
bool PaddleInferenceAnakinPredictor<Target>::Init(const AnakinConfig &config) { bool PaddleInferenceAnakinPredictor<Target>::Init(
const contrib::AnakinConfig &config) {
if (!(graph_.load(config.model_file))) { if (!(graph_.load(config.model_file))) {
VLOG(3) << "fail to load graph from " << config.model_file; VLOG(3) << "fail to load graph from " << config.model_file;
return false; return false;
...@@ -200,10 +203,11 @@ template class PaddleInferenceAnakinPredictor<anakin::X86>; ...@@ -200,10 +203,11 @@ template class PaddleInferenceAnakinPredictor<anakin::X86>;
// A factory to help create difference predictor. // A factory to help create difference predictor.
template <> template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< std::unique_ptr<PaddlePredictor>
AnakinConfig, PaddleEngineKind::kAnakin>(const AnakinConfig &config) { CreatePaddlePredictor<contrib::AnakinConfig, PaddleEngineKind::kAnakin>(
const contrib::AnakinConfig &config) {
VLOG(3) << "Anakin Predictor create."; VLOG(3) << "Anakin Predictor create.";
if (config.target_type == AnakinConfig::NVGPU) { if (config.target_type == contrib::AnakinConfig::NVGPU) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
VLOG(3) << "Anakin Predictor create on [ NVIDIA GPU ]."; VLOG(3) << "Anakin Predictor create on [ NVIDIA GPU ].";
std::unique_ptr<PaddlePredictor> x( std::unique_ptr<PaddlePredictor> x(
...@@ -213,7 +217,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< ...@@ -213,7 +217,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
LOG(ERROR) << "AnakinConfig::NVGPU could not used in ONLY-CPU environment"; LOG(ERROR) << "AnakinConfig::NVGPU could not used in ONLY-CPU environment";
return nullptr; return nullptr;
#endif #endif
} else if (config.target_type == AnakinConfig::X86) { } else if (config.target_type == contrib::AnakinConfig::X86) {
VLOG(3) << "Anakin Predictor create on [ Intel X86 ]."; VLOG(3) << "Anakin Predictor create on [ Intel X86 ].";
std::unique_ptr<PaddlePredictor> x( std::unique_ptr<PaddlePredictor> x(
new PaddleInferenceAnakinPredictor<anakin::X86>(config)); new PaddleInferenceAnakinPredictor<anakin::X86>(config));
......
...@@ -29,6 +29,8 @@ limitations under the License. */ ...@@ -29,6 +29,8 @@ limitations under the License. */
namespace paddle { namespace paddle {
using contrib::AnakinConfig;
template <typename Target> template <typename Target>
class PaddleInferenceAnakinPredictor : public PaddlePredictor { class PaddleInferenceAnakinPredictor : public PaddlePredictor {
public: public:
......
...@@ -101,14 +101,11 @@ bool NativePaddlePredictor::Init( ...@@ -101,14 +101,11 @@ bool NativePaddlePredictor::Init(
inference_program_ = paddle::inference::Load( inference_program_ = paddle::inference::Load(
executor_.get(), scope_.get(), config_.prog_file, config_.param_file); executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
} else { } else {
LOG(ERROR) << "fail to load inference model."; LOG(ERROR) << "fail to load inference model from " << config_.model_dir;
return false; return false;
} }
ctx_ = executor_->Prepare(*inference_program_, 0); ctx_ = executor_->Prepare(*inference_program_, 0);
if (config_._use_mkldnn) {
executor_->EnableMKLDNN(*inference_program_);
}
executor_->CreateVariables(*inference_program_, executor_->CreateVariables(*inference_program_,
sub_scope_ ? sub_scope_ : scope_.get(), 0); sub_scope_ ? sub_scope_ : scope_.get(), 0);
...@@ -330,4 +327,10 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< ...@@ -330,4 +327,10 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
#endif #endif
} }
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<NativeConfig>(
const NativeConfig &config) {
return CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
}
} // namespace paddle } // namespace paddle
...@@ -25,10 +25,11 @@ using inference::analysis::Argument; ...@@ -25,10 +25,11 @@ using inference::analysis::Argument;
using inference::Singleton; using inference::Singleton;
using inference::analysis::Analyzer; using inference::analysis::Analyzer;
using framework::proto::ProgramDesc; using framework::proto::ProgramDesc;
using paddle::contrib::MixedRTConfig;
class TensorRTSubgraphPredictor : public NativePaddlePredictor { class TensorRTSubgraphPredictor : public NativePaddlePredictor {
public: public:
explicit TensorRTSubgraphPredictor(const TensorRTConfig& config) explicit TensorRTSubgraphPredictor(const MixedRTConfig& config)
: NativePaddlePredictor(config), config_(config) {} : NativePaddlePredictor(config), config_(config) {}
bool Init(const std::shared_ptr<framework::Scope>& parent_scope) { bool Init(const std::shared_ptr<framework::Scope>& parent_scope) {
...@@ -115,13 +116,13 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor { ...@@ -115,13 +116,13 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
} }
private: private:
TensorRTConfig config_; MixedRTConfig config_;
}; };
template <> template <>
std::unique_ptr<PaddlePredictor> std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<TensorRTConfig, PaddleEngineKind::kAutoMixedTensorRT>( CreatePaddlePredictor<MixedRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
const TensorRTConfig& config) { const MixedRTConfig& config) {
VLOG(3) << "create TensorRTSubgraphPredictor"; VLOG(3) << "create TensorRTSubgraphPredictor";
if (config.use_gpu) { if (config.use_gpu) {
// 1. GPU memeroy // 1. GPU memeroy
...@@ -150,6 +151,13 @@ CreatePaddlePredictor<TensorRTConfig, PaddleEngineKind::kAutoMixedTensorRT>( ...@@ -150,6 +151,13 @@ CreatePaddlePredictor<TensorRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
return std::move(predictor); return std::move(predictor);
} }
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<MixedRTConfig>(
const MixedRTConfig& config) {
return CreatePaddlePredictor<MixedRTConfig,
PaddleEngineKind::kAutoMixedTensorRT>(config);
}
} // namespace paddle } // namespace paddle
USE_TRT_CONVERTER(elementwise_add_weight); USE_TRT_CONVERTER(elementwise_add_weight);
......
...@@ -20,6 +20,8 @@ ...@@ -20,6 +20,8 @@
namespace paddle { namespace paddle {
using contrib::MixedRTConfig;
DEFINE_string(dirname, "", "Directory of the inference model."); DEFINE_string(dirname, "", "Directory of the inference model.");
void CompareTensorRTWithFluid(bool enable_tensorrt) { void CompareTensorRTWithFluid(bool enable_tensorrt) {
...@@ -32,7 +34,7 @@ void CompareTensorRTWithFluid(bool enable_tensorrt) { ...@@ -32,7 +34,7 @@ void CompareTensorRTWithFluid(bool enable_tensorrt) {
config0.fraction_of_gpu_memory = 0.3; config0.fraction_of_gpu_memory = 0.3;
config0.device = 0; config0.device = 0;
TensorRTConfig config1; MixedRTConfig config1;
config1.model_dir = FLAGS_dirname + "word2vec.inference.model"; config1.model_dir = FLAGS_dirname + "word2vec.inference.model";
config1.use_gpu = true; config1.use_gpu = true;
config1.fraction_of_gpu_memory = 0.3; config1.fraction_of_gpu_memory = 0.3;
...@@ -42,7 +44,7 @@ void CompareTensorRTWithFluid(bool enable_tensorrt) { ...@@ -42,7 +44,7 @@ void CompareTensorRTWithFluid(bool enable_tensorrt) {
auto predictor0 = auto predictor0 =
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config0); CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config0);
auto predictor1 = auto predictor1 =
CreatePaddlePredictor<TensorRTConfig, CreatePaddlePredictor<MixedRTConfig,
PaddleEngineKind::kAutoMixedTensorRT>(config1); PaddleEngineKind::kAutoMixedTensorRT>(config1);
for (int batch_id = 0; batch_id < 1; batch_id++) { for (int batch_id = 0; batch_id < 1; batch_id++) {
......
...@@ -28,34 +28,61 @@ limitations under the License. */ ...@@ -28,34 +28,61 @@ limitations under the License. */
namespace paddle { namespace paddle {
// Data type.
enum PaddleDType { enum PaddleDType {
FLOAT32, FLOAT32,
INT64, INT64,
// TODO(Superjomn) support more data types if needed.
}; };
/*
* Memory menage for PaddleTensor.
* The PaddleBuf holds a buffer for data input or output. The memory can be
* allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf
* should be reused for better performance.
*
* For user allocated memory, the following API can be used:
* - PaddleBuf(void* data, size_t length) to set an external memory by
* specifying
* the memory address and length.
* - Reset(void* data, size_t length) to reset the PaddleBuf with an external
* memory.
* ATTENTION, for user allocated memory, deallocation should be done by users
* externally after the program finished. The PaddleBuf won't do any allocation
* or deallocation.
*
* To have the PaddleBuf allocate and manage the memory:
* - PaddleBuf(size_t length) will allocate a memory of size `length`.
* - Resize(size_t length) resize the memory to no less than `length`, ATTENTION
* if the allocated memory is larger than `length`, nothing will done.
*/
class PaddleBuf { class PaddleBuf {
public: public:
PaddleBuf() = default; // PaddleBuf allocate memory internally, and manage it.
PaddleBuf(PaddleBuf&& other);
// Copy only available when memory is managed externally.
explicit PaddleBuf(const PaddleBuf&);
PaddleBuf& operator=(const PaddleBuf&);
PaddleBuf& operator=(PaddleBuf&&);
// Do not own the memory.
PaddleBuf(void* data, size_t length)
: data_(data), length_(length), memory_owned_{false} {}
// Own memory.
explicit PaddleBuf(size_t length) explicit PaddleBuf(size_t length)
: data_(new char[length]), length_(length), memory_owned_(true) {} : data_(new char[length]), length_(length), memory_owned_(true) {}
// Resize to `length` bytes. // Set external memory, the PaddleBuf won't manage it.
PaddleBuf(void* data, size_t length)
: data_(data), length_(length), memory_owned_{false} {}
// Copy only available when memory is managed externally.
explicit PaddleBuf(const PaddleBuf&);
// Resize the memory.
void Resize(size_t length); void Resize(size_t length);
// Reset to external memory. // Reset to external memory, with address and length set.
void Reset(void* data, size_t length); void Reset(void* data, size_t length);
// Tell whether the buffer is empty.
bool empty() const { return length_ == 0; } bool empty() const { return length_ == 0; }
// Get the memory address.
void* data() const { return data_; } void* data() const { return data_; }
// Get the memory length.
size_t length() const { return length_; } size_t length() const { return length_; }
~PaddleBuf() { Free(); } ~PaddleBuf() { Free(); }
PaddleBuf& operator=(const PaddleBuf&);
PaddleBuf& operator=(PaddleBuf&&);
PaddleBuf() = default;
PaddleBuf(PaddleBuf&& other);
private: private:
void Free(); void Free();
...@@ -64,6 +91,7 @@ class PaddleBuf { ...@@ -64,6 +91,7 @@ class PaddleBuf {
bool memory_owned_{true}; bool memory_owned_{true};
}; };
// Basic input and output data structure for PaddlePredictor.
struct PaddleTensor { struct PaddleTensor {
PaddleTensor() = default; PaddleTensor() = default;
std::string name; // variable name. std::string name; // variable name.
...@@ -73,19 +101,8 @@ struct PaddleTensor { ...@@ -73,19 +101,8 @@ struct PaddleTensor {
std::vector<std::vector<size_t>> lod; // Tensor+LoD equals LoDTensor std::vector<std::vector<size_t>> lod; // Tensor+LoD equals LoDTensor
}; };
enum class PaddleEngineKind {
kNative = 0, // Use the native Fluid facility.
kAnakin, // Use Anakin for inference.
kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
kAnalysis
// TODO(Superjomn) support following engines latter.
// kTensorRT, // Use TensorRT for inference.
// kAutoMixedAnakin, // Automatically mix Fluid with Anakin.
};
/* /*
* A simple Inference API for Paddle. Currently this API can be used by * A simple Inference API for Paddle.
* non-sequence scenerios.
*/ */
class PaddlePredictor { class PaddlePredictor {
public: public:
...@@ -120,26 +137,53 @@ struct NativeConfig : public PaddlePredictor::Config { ...@@ -120,26 +137,53 @@ struct NativeConfig : public PaddlePredictor::Config {
// GPU related fields. // GPU related fields.
bool use_gpu{false}; bool use_gpu{false};
int device{0}; int device{0};
float fraction_of_gpu_memory{-1.f}; // Negative to notify initialization. float fraction_of_gpu_memory{-1.f}; // Change to a float in (0,1] if needed.
// NOTE: NOT use it, just for the internal test, will discard later
bool _use_mkldnn{false};
// Specify the variable's name of each input.
bool specify_input_name{false};
// Specify the exact path of program and parameter files.
std::string prog_file; std::string prog_file;
std::string param_file; std::string param_file;
// Specify the variable's name of each input if input tensors don't follow the
// `feeds` and `fetches` of the phase `save_inference_model`.
bool specify_input_name{false};
}; };
// Configurations for Anakin engine. // A factory to help create different predictors.
struct AnakinConfig : public PaddlePredictor::Config { //
enum TargetType { NVGPU = 0, X86 }; // Usage:
int device; //
std::string model_file; // NativeConfig config;
int max_batch_size{-1}; // ... // change the configs.
TargetType target_type; // auto native_predictor = CreatePaddlePredictor(config);
//
// FOR EXTENSION DEVELOPER:
// Different predictors are designated by config type. Similar configs can be
// merged, but there shouldn't be a huge config containing different fields for
// more than one kind of predictors.
template <typename ConfigT>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
// NOTE The following APIs are too trivial, we will discard it in the following
// versions.
enum class PaddleEngineKind {
kNative = 0, // Use the native Fluid facility.
kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
kAnalysis, // More optimization.
kAnakin // Use Anakin for inference, not mature yet.
}; };
struct TensorRTConfig : public NativeConfig { template <typename ConfigT, PaddleEngineKind engine>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
// ==
//
// -----------------------------------------------------------------------------------
// NOTE: The following APIs are not mature yet, we are still working on them.
namespace contrib {
// Accelerate GPU computation with TensorRT engine.
struct MixedRTConfig : public NativeConfig {
// Determine whether a subgraph will be executed by TRT. // Determine whether a subgraph will be executed by TRT.
int min_subgraph_size{1}; int min_subgraph_size{1};
// While TensorRT allows an engine optimized for a given max batch size // While TensorRT allows an engine optimized for a given max batch size
...@@ -154,7 +198,6 @@ struct TensorRTConfig : public NativeConfig { ...@@ -154,7 +198,6 @@ struct TensorRTConfig : public NativeConfig {
// NOTE WIP, not stable yet. // NOTE WIP, not stable yet.
struct AnalysisConfig : public NativeConfig { struct AnalysisConfig : public NativeConfig {
//
enum class IrPassMode { enum class IrPassMode {
kSystem, // Use system default passes, not customize. kSystem, // Use system default passes, not customize.
kInclude, // Specify the passes in `ir_passes`. kInclude, // Specify the passes in `ir_passes`.
...@@ -165,18 +208,21 @@ struct AnalysisConfig : public NativeConfig { ...@@ -165,18 +208,21 @@ struct AnalysisConfig : public NativeConfig {
IrPassMode ir_mode{IrPassMode::kExclude}; IrPassMode ir_mode{IrPassMode::kExclude};
// attention lstm fuse works only on some specific models, disable as default. // attention lstm fuse works only on some specific models, disable as default.
std::vector<std::string> ir_passes{"attention_lstm_fuse_pass"}; std::vector<std::string> ir_passes{"attention_lstm_fuse_pass"};
// NOTE this is just for internal development, please not use it.
bool _use_mkldnn{false};
}; };
// A factory to help create different predictors. // Configurations for Anakin engine.
// struct AnakinConfig : public PaddlePredictor::Config {
// FOR EXTENSION DEVELOPER: enum TargetType { NVGPU = 0, X86 };
// Different predictors are designated by config type and engine kind. Similar int device;
// configs can be merged, but there shouldn't be a huge config containing std::string model_file;
// different fields for more than one kind of predictors. int max_batch_size{-1};
// TargetType target_type;
// Similarly, each engine kind should map to a unique predictor implementation. };
template <typename ConfigT, PaddleEngineKind engine = PaddleEngineKind::kNative>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config); } // namespace contrib
int PaddleDtypeSize(PaddleDType dtype); int PaddleDtypeSize(PaddleDType dtype);
......
...@@ -22,10 +22,10 @@ DEFINE_string(model, "", "Directory of the inference model(mobile_v2)."); ...@@ -22,10 +22,10 @@ DEFINE_string(model, "", "Directory of the inference model(mobile_v2).");
namespace paddle { namespace paddle {
AnakinConfig GetConfig() { contrib::AnakinConfig GetConfig() {
AnakinConfig config; contrib::AnakinConfig config;
// using AnakinConfig::X86 if you need to use cpu to do inference // using AnakinConfig::X86 if you need to use cpu to do inference
config.target_type = AnakinConfig::NVGPU; config.target_type = contrib::AnakinConfig::NVGPU;
config.model_file = FLAGS_model; config.model_file = FLAGS_model;
config.device = 0; config.device = 0;
config.max_batch_size = 1; config.max_batch_size = 1;
...@@ -33,9 +33,10 @@ AnakinConfig GetConfig() { ...@@ -33,9 +33,10 @@ AnakinConfig GetConfig() {
} }
TEST(inference, anakin) { TEST(inference, anakin) {
AnakinConfig config = GetConfig(); auto config = GetConfig();
auto predictor = auto predictor =
CreatePaddlePredictor<AnakinConfig, PaddleEngineKind::kAnakin>(config); CreatePaddlePredictor<contrib::AnakinConfig, PaddleEngineKind::kAnakin>(
config);
float data[1 * 3 * 224 * 224] = {1.0f}; float data[1 * 3 * 224 * 224] = {1.0f};
PaddleTensor tensor; PaddleTensor tensor;
......
...@@ -97,10 +97,10 @@ void Data::get_batch_data( ...@@ -97,10 +97,10 @@ void Data::get_batch_data(
namespace paddle { namespace paddle {
AnakinConfig GetConfig() { contrib::AnakinConfig GetConfig() {
AnakinConfig config; contrib::AnakinConfig config;
// using AnakinConfig::X86 if you need to use cpu to do inference // using AnakinConfig::X86 if you need to use cpu to do inference
config.target_type = AnakinConfig::X86; config.target_type = contrib::AnakinConfig::X86;
config.model_file = FLAGS_model; config.model_file = FLAGS_model;
config.device = 0; config.device = 0;
config.max_batch_size = 1000; // the max number of token config.max_batch_size = 1000; // the max number of token
...@@ -121,9 +121,10 @@ void set_tensor(std::string name, std::vector<int> shape, ...@@ -121,9 +121,10 @@ void set_tensor(std::string name, std::vector<int> shape,
} }
void single_test() { void single_test() {
AnakinConfig config = GetConfig(); auto config = GetConfig();
auto predictor = auto predictor =
CreatePaddlePredictor<AnakinConfig, PaddleEngineKind::kAnakin>(config); CreatePaddlePredictor<contrib::AnakinConfig, PaddleEngineKind::kAnakin>(
config);
int max_batch_size = 1000; int max_batch_size = 1000;
std::string feature_file = FLAGS_datapath; std::string feature_file = FLAGS_datapath;
......
...@@ -95,7 +95,7 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data, ...@@ -95,7 +95,7 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
} }
} }
void SetConfig(AnalysisConfig *cfg) { void SetConfig(contrib::AnalysisConfig *cfg) {
cfg->prog_file = FLAGS_infer_model + "/__model__"; cfg->prog_file = FLAGS_infer_model + "/__model__";
cfg->param_file = FLAGS_infer_model + "/param"; cfg->param_file = FLAGS_infer_model + "/param";
cfg->use_gpu = false; cfg->use_gpu = false;
...@@ -117,7 +117,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) { ...@@ -117,7 +117,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
// Easy for profiling independently. // Easy for profiling independently.
TEST(Analyzer_Chinese_ner, profile) { TEST(Analyzer_Chinese_ner, profile) {
AnalysisConfig cfg; contrib::AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
std::vector<PaddleTensor> outputs; std::vector<PaddleTensor> outputs;
...@@ -141,7 +141,7 @@ TEST(Analyzer_Chinese_ner, profile) { ...@@ -141,7 +141,7 @@ TEST(Analyzer_Chinese_ner, profile) {
// Check the fuse status // Check the fuse status
TEST(Analyzer_Chinese_ner, fuse_statis) { TEST(Analyzer_Chinese_ner, fuse_statis) {
AnalysisConfig cfg; contrib::AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
int num_ops; int num_ops;
...@@ -155,7 +155,7 @@ TEST(Analyzer_Chinese_ner, fuse_statis) { ...@@ -155,7 +155,7 @@ TEST(Analyzer_Chinese_ner, fuse_statis) {
// Compare result of NativeConfig and AnalysisConfig // Compare result of NativeConfig and AnalysisConfig
TEST(Analyzer_Chinese_ner, compare) { TEST(Analyzer_Chinese_ner, compare) {
AnalysisConfig cfg; contrib::AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
......
...@@ -149,7 +149,7 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data, ...@@ -149,7 +149,7 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
} }
} }
void SetConfig(AnalysisConfig *cfg) { void SetConfig(contrib::AnalysisConfig *cfg) {
cfg->prog_file = FLAGS_infer_model + "/__model__"; cfg->prog_file = FLAGS_infer_model + "/__model__";
cfg->param_file = FLAGS_infer_model + "/param"; cfg->param_file = FLAGS_infer_model + "/param";
cfg->use_gpu = false; cfg->use_gpu = false;
...@@ -172,7 +172,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) { ...@@ -172,7 +172,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
// Easy for profiling independently. // Easy for profiling independently.
TEST(Analyzer_rnn1, profile) { TEST(Analyzer_rnn1, profile) {
AnalysisConfig cfg; contrib::AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
std::vector<PaddleTensor> outputs; std::vector<PaddleTensor> outputs;
...@@ -183,7 +183,7 @@ TEST(Analyzer_rnn1, profile) { ...@@ -183,7 +183,7 @@ TEST(Analyzer_rnn1, profile) {
// Check the fuse status // Check the fuse status
TEST(Analyzer_rnn1, fuse_statis) { TEST(Analyzer_rnn1, fuse_statis) {
AnalysisConfig cfg; contrib::AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
int num_ops; int num_ops;
...@@ -198,7 +198,7 @@ TEST(Analyzer_rnn1, fuse_statis) { ...@@ -198,7 +198,7 @@ TEST(Analyzer_rnn1, fuse_statis) {
// Compare result of NativeConfig and AnalysisConfig // Compare result of NativeConfig and AnalysisConfig
TEST(Analyzer_rnn1, compare) { TEST(Analyzer_rnn1, compare) {
AnalysisConfig cfg; contrib::AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
...@@ -208,7 +208,7 @@ TEST(Analyzer_rnn1, compare) { ...@@ -208,7 +208,7 @@ TEST(Analyzer_rnn1, compare) {
// Test Multi-Thread. // Test Multi-Thread.
TEST(Analyzer_rnn1, multi_thread) { TEST(Analyzer_rnn1, multi_thread) {
AnalysisConfig cfg; contrib::AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
std::vector<PaddleTensor> outputs; std::vector<PaddleTensor> outputs;
......
...@@ -38,6 +38,8 @@ DEFINE_bool(use_analysis, true, ...@@ -38,6 +38,8 @@ DEFINE_bool(use_analysis, true,
namespace paddle { namespace paddle {
namespace inference { namespace inference {
using contrib::AnalysisConfig;
void CompareResult(const std::vector<PaddleTensor> &outputs, void CompareResult(const std::vector<PaddleTensor> &outputs,
const std::vector<PaddleTensor> &ref_outputs) { const std::vector<PaddleTensor> &ref_outputs) {
EXPECT_GT(outputs.size(), 0UL); EXPECT_GT(outputs.size(), 0UL);
...@@ -77,8 +79,8 @@ void CompareResult(const std::vector<PaddleTensor> &outputs, ...@@ -77,8 +79,8 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
std::unique_ptr<PaddlePredictor> CreateTestPredictor( std::unique_ptr<PaddlePredictor> CreateTestPredictor(
const AnalysisConfig &config, bool use_analysis = true) { const AnalysisConfig &config, bool use_analysis = true) {
if (use_analysis) { if (use_analysis) {
return CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>( return CreatePaddlePredictor<contrib::AnalysisConfig,
config); PaddleEngineKind::kAnalysis>(config);
} else { } else {
return CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>( return CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(
config); config);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册