提交 d64b4ac5 编写于 作者: Z zhangjun

arm inference use paddle inference 2.0

上级 ff2aff64
......@@ -58,7 +58,7 @@ else()
endif()
if(WITH_LITE)
SET(PADDLE_LIB_PATH "http://paddle-serving.bj.bcebos.com/inferlib/${PADDLE_LIB_VERSION}/fluid_inference.tgz")
SET(PADDLE_LIB_PATH "http://paddle-serving.bj.bcebos.com/inferlib/${PADDLE_LIB_VERSION}/paddle_inference.tgz")
else()
SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/paddle_inference.tgz")
endif()
......
......@@ -13,13 +13,13 @@
# limitations under the License
if (NOT CLIENT_ONLY)
add_subdirectory(inferencer-fluid-cpu)
if (WITH_GPU)
add_subdirectory(inferencer-fluid-gpu)
endif()
if (WITH_LITE)
add_subdirectory(inferencer-fluid-arm)
endif()
add_subdirectory(inferencer-fluid-cpu)
if (WITH_GPU)
add_subdirectory(inferencer-fluid-gpu)
endif()
if (WITH_LITE)
add_subdirectory(inferencer-fluid-arm)
endif()
endif()
......@@ -28,8 +28,6 @@ namespace baidu {
namespace paddle_serving {
namespace fluid_arm {
using configure::SigmoidConf;
class AutoLock {
public:
explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
......@@ -57,31 +55,37 @@ class GlobalPaddleCreateMutex {
pthread_mutex_t _mut;
};
class GlobalSigmoidCreateMutex {
public:
pthread_mutex_t& mutex() { return _mut; }
static pthread_mutex_t& instance() {
static GlobalSigmoidCreateMutex gmutex;
return gmutex.mutex();
}
private:
GlobalSigmoidCreateMutex() { pthread_mutex_init(&_mut, NULL); }
pthread_mutex_t _mut;
};
using paddle_infer::Config;
using paddle_infer::Predictor;
using paddle_infer::Tensor;
using paddle_infer::PrecisionType;
using paddle_infer::CreatePredictor;
// data interface
class FluidFamilyCore {
public:
virtual ~FluidFamilyCore() {}
virtual bool Run(const void* in_data, void* out_data) {
if (!_core->Run(*(std::vector<paddle::PaddleTensor>*)in_data,
(std::vector<paddle::PaddleTensor>*)out_data)) {
virtual std::vector<std::string> GetInputNames() {
return _core->GetInputNames();
}
virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
return _core->GetInputHandle(name);
}
virtual std::vector<std::string> GetOutputNames() {
return _core->GetOutputNames();
}
virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
return _core->GetOutputHandle(name);
}
virtual bool Run() {
if (!_core->Run()) {
LOG(ERROR) << "Failed call Run with paddle predictor";
return false;
}
return true;
}
......@@ -92,8 +96,7 @@ class FluidFamilyCore {
LOG(ERROR) << "origin paddle Predictor is null.";
return -1;
}
paddle::PaddlePredictor* p_predictor =
(paddle::PaddlePredictor*)origin_core;
Predictor* p_predictor = (Predictor*)origin_core;
_core = p_predictor->Clone();
if (_core.get() == NULL) {
LOG(ERROR) << "fail to clone paddle predictor: " << origin_core;
......@@ -105,7 +108,7 @@ class FluidFamilyCore {
virtual void* get() { return _core.get(); }
protected:
std::unique_ptr<paddle::PaddlePredictor> _core;
std::shared_ptr<Predictor> _core;
};
// infer interface
......@@ -119,67 +122,37 @@ class FluidArmAnalysisCore : public FluidFamilyCore {
return -1;
}
paddle::AnalysisConfig analysis_config;
analysis_config.SetParamsFile(data_path + "/__params__");
analysis_config.SetProgFile(data_path + "/__model__");
analysis_config.EnableLiteEngine(paddle::AnalysisConfig::Precision::kFloat32, true);
analysis_config.SetCpuMathLibraryNumThreads(1);
Config config;
config.SetParamsFile(data_path + "/__params__");
config.SetProgFile(data_path + "/__model__");
config.DisableGpu();
config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim();
config.EnableMemoryOptim();
}
if (params.use_lite()) {
analysis_config.EnableLiteEngine(paddle::AnalysisConfig::Precision::kFloat32, true);
config.EnableLiteEngine(PrecisionType::kFloat32, true);
}
if (params.use_xpu()) {
analysis_config.EnableXpu(100);
config.EnableXpu(100);
}
analysis_config.SwitchSpecifyInputNames(true);
config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core =
paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "[FluidArmAnalysisCore] create paddle predictor sucess, path: " << data_path;
params.dump();
return 0;
}
};
class FluidArmNativeCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
paddle::NativeConfig native_config;
native_config.param_file = data_path + "/__params__";
native_config.prog_file = data_path + "/__model__";
native_config.use_gpu = false;
native_config.device = 0;
native_config.fraction_of_gpu_memory = 0;
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = paddle::CreatePaddlePredictor<paddle::NativeConfig,
paddle::PaddleEngineKind::kNative>(
native_config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "[FluidArmNativeCore] create paddle predictor sucess, path: " << data_path;
params.dump();
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
......@@ -194,63 +167,32 @@ class FluidArmAnalysisDirCore : public FluidFamilyCore {
return -1;
}
paddle::AnalysisConfig analysis_config;
analysis_config.SetModel(data_path);
analysis_config.DisableGpu();
analysis_config.SwitchSpecifyInputNames(true);
analysis_config.SetCpuMathLibraryNumThreads(1);
Config config;
config.SetModel(data_path);
config.DisableGpu();
config.SwitchSpecifyInputNames(true);
config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim();
config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
analysis_config.SwitchIrOptim(true);
config.SwitchIrOptim(true);
} else {
analysis_config.SwitchIrOptim(false);
config.SwitchIrOptim(false);
}
if (params.use_lite()) {
analysis_config.EnableLiteEngine(paddle::AnalysisConfig::Precision::kFloat32, true);
config.EnableLiteEngine(PrecisionType::kFloat32, true);
}
if (params.use_xpu()) {
analysis_config.EnableXpu(100);
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core =
paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "[FluidArmAnalysisDirCore] create paddle predictor sucess, path: " << data_path;
params.dump();
return 0;
}
};
class FluidArmNativeDirCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
config.EnableXpu(100);
}
paddle::NativeConfig native_config;
native_config.model_dir = data_path;
native_config.use_gpu = false;
native_config.device = 0;
native_config.fraction_of_gpu_memory = 0;
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = paddle::CreatePaddlePredictor<paddle::NativeConfig,
paddle::PaddleEngineKind::kNative>(
native_config);
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
......@@ -342,214 +284,6 @@ class Parameter {
float* _params;
};
class SigmoidModel {
public:
~SigmoidModel() {}
int load(const char* sigmoid_w_file,
const char* sigmoid_b_file,
float exp_max,
float exp_min) {
AutoLock lock(GlobalSigmoidCreateMutex::instance());
if (0 != _sigmoid_w.init(2, 1, sigmoid_w_file) || 0 != _sigmoid_w.load()) {
LOG(ERROR) << "load params sigmoid_w failed.";
return -1;
}
VLOG(2) << "load sigmoid_w [" << _sigmoid_w._params[0] << "] ["
<< _sigmoid_w._params[1] << "].";
if (0 != _sigmoid_b.init(2, 1, sigmoid_b_file) || 0 != _sigmoid_b.load()) {
LOG(ERROR) << "load params sigmoid_b failed.";
return -1;
}
VLOG(2) << "load sigmoid_b [" << _sigmoid_b._params[0] << "] ["
<< _sigmoid_b._params[1] << "].";
_exp_max_input = exp_max;
_exp_min_input = exp_min;
return 0;
}
int softmax(float x, double& o) { // NOLINT
float _y0 = x * _sigmoid_w._params[0] + _sigmoid_b._params[0];
float _y1 = x * _sigmoid_w._params[1] + _sigmoid_b._params[1];
_y0 = (_y0 > _exp_max_input)
? _exp_max_input
: ((_y0 < _exp_min_input) ? _exp_min_input : _y0);
_y1 = (_y1 > _exp_max_input)
? _exp_max_input
: ((_y1 < _exp_min_input) ? _exp_min_input : _y1);
o = 1.0f / (1.0f + exp(_y0 - _y1));
return 0;
}
public:
Parameter _sigmoid_w;
Parameter _sigmoid_b;
float _exp_max_input;
float _exp_min_input;
};
class SigmoidFluidModel {
public:
int softmax(float x, double& o) { // NOLINT
return _sigmoid_core->softmax(x, o);
} // NOLINT
std::unique_ptr<SigmoidFluidModel> Clone() {
std::unique_ptr<SigmoidFluidModel> clone_model;
clone_model.reset(new SigmoidFluidModel());
clone_model->_sigmoid_core = _sigmoid_core;
clone_model->_fluid_core = _fluid_core->Clone();
return std::move(clone_model); // NOLINT
}
public:
std::unique_ptr<paddle::PaddlePredictor> _fluid_core;
std::shared_ptr<SigmoidModel> _sigmoid_core;
};
class FluidArmWithSigmoidCore : public FluidFamilyCore {
public:
virtual ~FluidArmWithSigmoidCore() {}
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string model_path = params.get_path();
size_t pos = model_path.find_last_of("/\\");
std::string conf_path = model_path.substr(0, pos);
std::string conf_file = model_path.substr(pos);
configure::SigmoidConf conf;
if (configure::read_proto_conf(conf_path, conf_file, &conf) != 0) {
LOG(ERROR) << "failed load model path: " << model_path;
return -1;
}
_core.reset(new SigmoidFluidModel);
std::string fluid_model_data_path = conf.dnn_model_path();
predictor::InferEngineCreationParams new_params(params);
new_params.set_path(fluid_model_data_path);
int ret = load_fluid_model(new_params);
if (ret < 0) {
LOG(ERROR) << "fail to load fluid model.";
return -1;
}
const char* sigmoid_w_file = conf.sigmoid_w_file().c_str();
const char* sigmoid_b_file = conf.sigmoid_b_file().c_str();
float exp_max = conf.exp_max_input();
float exp_min = conf.exp_min_input();
_core->_sigmoid_core.reset(new SigmoidModel);
VLOG(2) << "create sigmoid core[" << _core->_sigmoid_core.get()
<< "], use count[" << _core->_sigmoid_core.use_count() << "].";
ret = _core->_sigmoid_core->load(
sigmoid_w_file, sigmoid_b_file, exp_max, exp_min);
if (ret < 0) {
LOG(ERROR) << "fail to load sigmoid model.";
return -1;
}
return 0;
}
virtual bool Run(const void* in_data, void* out_data) {
if (!_core->_fluid_core->Run(
*(std::vector<paddle::PaddleTensor>*)in_data,
(std::vector<paddle::PaddleTensor>*)out_data)) {
LOG(ERROR) << "Failed call Run with paddle predictor";
return false;
}
return true;
}
virtual int clone(SigmoidFluidModel* origin_core) {
if (origin_core == NULL) {
LOG(ERROR) << "origin paddle Predictor is null.";
return -1;
}
_core = origin_core->Clone();
if (_core.get() == NULL) {
LOG(ERROR) << "fail to clone paddle predictor: " << origin_core;
return -1;
}
VLOG(2) << "clone sigmoid core[" << _core->_sigmoid_core.get()
<< "] use count[" << _core->_sigmoid_core.use_count() << "].";
return 0;
}
virtual SigmoidFluidModel* get() { return _core.get(); }
virtual int load_fluid_model(
const predictor::InferEngineCreationParams& params) = 0;
int softmax(float x, double& o) { // NOLINT
return _core->_sigmoid_core->softmax(x, o);
}
protected:
std::unique_ptr<SigmoidFluidModel> _core; // NOLINT
};
class FluidArmNativeDirWithSigmoidCore : public FluidArmWithSigmoidCore {
public:
int load_fluid_model(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
paddle::NativeConfig native_config;
native_config.model_dir = data_path;
native_config.use_gpu = false;
native_config.device = 0;
native_config.fraction_of_gpu_memory = 0;
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core->_fluid_core =
paddle::CreatePaddlePredictor<paddle::NativeConfig,
paddle::PaddleEngineKind::kNative>(
native_config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class FluidArmAnalysisDirWithSigmoidCore : public FluidArmWithSigmoidCore {
public:
int load_fluid_model(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
paddle::AnalysisConfig analysis_config;
analysis_config.SetModel(data_path);
analysis_config.DisableGpu();
analysis_config.SwitchSpecifyInputNames(true);
analysis_config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim();
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core->_fluid_core =
paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
} // namespace fluid_arm
} // namespace paddle_serving
} // namespace baidu
......@@ -19,8 +19,6 @@ namespace baidu {
namespace paddle_serving {
namespace fluid_arm {
DEFINE_int32(gpuid, 0, "device id to use");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<FluidArmAnalysisCore>,
::baidu::paddle_serving::predictor::InferEngine,
......@@ -32,28 +30,6 @@ REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_ARM_ANALYSIS_DIR");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidArmAnalysisDirWithSigmoidCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_ARM_ANALYSIS_DIR_SIGMOID");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<FluidArmNativeCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_ARM_NATIVE");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<FluidArmNativeDirCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_ARM_NATIVE_DIR");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidArmNativeDirWithSigmoidCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_ARM_NATIVE_DIR_SIGMOID");
} // namespace fluid_arm
} // namespace paddle_serving
} // namespace baidu
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册