diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index 7c697c8126a57332623fd0c93ccf6c90bdffb0e7..344d12dd0b12a0378f68b3de91c84ca11253c502 100755 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -70,9 +70,9 @@ cc_test(test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_ if(ANAKIN_FOUND) # Do not turn warnings into errors. set_source_files_properties(api.cc api_anakin_engine.cc PROPERTIES COMPILE_FLAGS "-Wno-error") - cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS boost xxhash) + cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS boost xxhash framework_proto eigen3) target_link_libraries(inference_anakin_api anakin anakin_saber_common) - cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS boost xxhash) + cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS boost xxhash framework_proto eigen3) target_link_libraries(inference_anakin_api_shared anakin anakin_saber_common) function(anakin_target target_name) target_compile_options(${target_name} BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) diff --git a/paddle/fluid/inference/api/api_anakin_engine.cc b/paddle/fluid/inference/api/api_anakin_engine.cc index e38531a47292975c5fda5ee0931d42a53f485bdf..4c51c239f6d4449795fa38665495ab260277c84d 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.cc +++ b/paddle/fluid/inference/api/api_anakin_engine.cc @@ -42,6 +42,7 @@ void PaddleInferenceAnakinPredictor::InitEnv() { template void PaddleInferenceAnakinPredictor::InitNet() { std::unique_lock lock(this->mutex_); + delete this->executor_p_; this->executor_p_ = new anakin::Net(*this->graph_p_, true); } template @@ -89,7 +90,7 @@ void PaddleInferenceAnakinPredictor::InitPredictor() { this->InitNet(); } template -void PaddleInferenceAnakinPredictor::Predict() { +void PaddleInferenceAnakinPredictor::Predict(int batch_size) { anakin::TargetWrapper::device_sync(); this->executor_p_->prediction(); anakin::TargetWrapper::device_sync(); @@ -99,7 +100,7 @@ bool PaddleInferenceAnakinPredictor::Run( const std::vector &inputs, std::vector *output_data, int batch_size) { if (this->config_.re_allocable) { - return this->RunImpl(inputs, output_data); + return this->RunImpl(inputs, output_data, batch_size); } else { // Run inputs data that exceeds batch size in batches. // 1. Reassign the batch size. @@ -194,7 +195,7 @@ bool PaddleInferenceAnakinPredictor::Run( template bool PaddleInferenceAnakinPredictor::RunImpl( const std::vector &inputs, - std::vector *output_data) { + std::vector *output_data, int batch_size) { anakin::TargetWrapper::set_device(this->config_.device_id); for (const auto &input : inputs) { if (input.dtype != PaddleDType::FLOAT32) { @@ -207,12 +208,12 @@ bool PaddleInferenceAnakinPredictor::RunImpl( LOG(FATAL) << " input " << input.name << "'s shape size should be equal to that of net"; } +#ifndef ANAKIN_MLU_PLACE int sum = 1; for_each(input.shape.begin(), input.shape.end(), [&](int n) { sum *= n; }); if (sum > net_shape.count()) { if (this->config_.re_allocable) { this->graph_p_->Reshape(input.name, input.shape); - delete this->executor_p_; this->InitNet(); d_tensor_p = this->executor_p_->get_in(input.name); } else { @@ -221,6 +222,7 @@ bool PaddleInferenceAnakinPredictor::RunImpl( "memory."; } } +#endif std::vector tmp_shape; for (auto s : input.shape) { tmp_shape.push_back(s); @@ -229,8 +231,9 @@ bool PaddleInferenceAnakinPredictor::RunImpl( anakin::saber::Tensor::Host_type> h_tensor(data, typename anakin::DefaultHostType::Host_type(), 0, tmp_shape); +#ifndef ANAKIN_MLU_PLACE d_tensor_p->reshape(tmp_shape); - +#endif if (input.lod.size() > 0) { if (input.lod.size() > 1) { LOG(FATAL) << " input lod first dim should <=1, but you set " @@ -246,9 +249,9 @@ bool PaddleInferenceAnakinPredictor::RunImpl( } d_tensor_p->copy_from(h_tensor); } - this->Predict(); + this->Predict(batch_size); if (output_data->empty()) { - LOG(FATAL) << "At least one output should be set with tensors' names."; + LOG(FATAL) << "The output param in the Run function is incorrect."; } for (auto &output : *output_data) { if (std::find(this->output_names_.begin(), this->output_names_.end(), @@ -256,14 +259,18 @@ bool PaddleInferenceAnakinPredictor::RunImpl( LOG(FATAL) << output.name << " is not in the outputs of the graph."; } auto *d_tensor_p = this->executor_p_->get_out(output.name); - output.shape = d_tensor_p->valid_shape(); - if (output.data.length() < d_tensor_p->valid_size() * sizeof(float)) { - output.data.Resize(d_tensor_p->valid_size() * sizeof(float)); + auto tmp_shape = d_tensor_p->valid_shape(); +#ifdef ANAKIN_MLU_PLACE + tmp_shape.set_num(batch_size); +#endif + output.shape = tmp_shape; + if (output.data.length() < tmp_shape.count() * sizeof(float)) { + output.data.Resize(tmp_shape.count() * sizeof(float)); } auto *data = static_cast(output.data.data()); anakin::saber::Tensor::Host_type> h_tensor(data, typename anakin::DefaultHostType::Host_type(), 0, - d_tensor_p->valid_shape()); + tmp_shape); h_tensor.copy_from(*d_tensor_p); } return true; @@ -317,6 +324,8 @@ void PaddleInferenceAnakinMLUPredictor::SetContext() { this->config_.compute_stream_id); this->ctx_p_->set_model_parallel(this->config_.model_parallel); this->ctx_p_->set_fusion(this->config_.op_fuse); + this->ctx_p_->enable_batch_changable(); + this->ctx_p_->enable_channel_duplicate(); } template void PaddleInferenceAnakinMLUPredictor::OptimizeGraph() { @@ -327,14 +336,13 @@ void PaddleInferenceAnakinMLUPredictor::OptimizeGraph() { template void PaddleInferenceAnakinMLUPredictor::InitNet() { std::unique_lock lock(this->mutex_); + delete this->executor_p_; this->executor_p_ = new anakin::Net(); this->executor_p_->fusion_init(*this->graph_p_, this->ctx_p_, true); } template -void PaddleInferenceAnakinMLUPredictor::Predict() { - anakin::TargetWrapper::device_sync(); - this->executor_p_->fusion_prediction(); - anakin::TargetWrapper::device_sync(); +void PaddleInferenceAnakinMLUPredictor::Predict(int batch_size) { + this->executor_p_->fusion_prediction(batch_size); } #endif @@ -353,14 +361,13 @@ void PaddleInferenceAnakinBMPredictor::OptimizeGraph() { template void PaddleInferenceAnakinBMPredictor::InitNet() { std::unique_lock lock(this->mutex_); + delete this->executor_p_; this->executor_p_ = new anakin::Net(); this->executor_p_->fusion_init(*this->graph_p_, this->ctx_p_, true); } template -void PaddleInferenceAnakinBMPredictor::Predict() { - anakin::TargetWrapper::device_sync(); +void PaddleInferenceAnakinBMPredictor::Predict(int batch_size) { this->executor_p_->fusion_prediction(); - anakin::TargetWrapper::device_sync(); } #endif diff --git a/paddle/fluid/inference/api/api_anakin_engine.h b/paddle/fluid/inference/api/api_anakin_engine.h index 88d3325b18a1570c00e27e3d09b362a3f776e949..97fc00610e05d4362d705a13a45ee6a3e5d39ffe 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.h +++ b/paddle/fluid/inference/api/api_anakin_engine.h @@ -73,7 +73,7 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor { virtual void OptimizeGraph(); virtual void InitNet(); virtual void SetContext(); - virtual void Predict(); + virtual void Predict(int batch_size); virtual std::unique_ptr New(); static std::mutex mutex_; AnakinConfig config_; @@ -85,7 +85,7 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor { private: bool RunImpl(const std::vector& inputs, - std::vector* output_data); + std::vector* output_data, int batch_size = -1); static std::once_flag init_anakin_; }; @@ -103,7 +103,7 @@ class PaddleInferenceAnakinMLUPredictor final void SetContext() override; void OptimizeGraph() override; void InitNet() override; - void Predict() override; + void Predict(int batch_size) override; }; #endif @@ -120,7 +120,7 @@ class PaddleInferenceAnakinBMPredictor final std::unique_ptr New() override; void OptimizeGraph() override; void InitNet() override; - void Predict() override; + void Predict(int batch_size) override; }; #endif } // namespace paddle