未验证 提交 71b2ed61 编写于 作者: 石晓伟 提交者: GitHub

support MLU nums, test=develop (#19372)

上级 e2c6bada
...@@ -70,9 +70,9 @@ cc_test(test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_ ...@@ -70,9 +70,9 @@ cc_test(test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_
if(ANAKIN_FOUND) if(ANAKIN_FOUND)
# Do not turn warnings into errors. # Do not turn warnings into errors.
set_source_files_properties(api.cc api_anakin_engine.cc PROPERTIES COMPILE_FLAGS "-Wno-error") set_source_files_properties(api.cc api_anakin_engine.cc PROPERTIES COMPILE_FLAGS "-Wno-error")
cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS boost xxhash) cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS boost xxhash framework_proto eigen3)
target_link_libraries(inference_anakin_api anakin anakin_saber_common) target_link_libraries(inference_anakin_api anakin anakin_saber_common)
cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS boost xxhash) cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS boost xxhash framework_proto eigen3)
target_link_libraries(inference_anakin_api_shared anakin anakin_saber_common) target_link_libraries(inference_anakin_api_shared anakin anakin_saber_common)
function(anakin_target target_name) function(anakin_target target_name)
target_compile_options(${target_name} BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) target_compile_options(${target_name} BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
......
...@@ -42,6 +42,7 @@ void PaddleInferenceAnakinPredictor<T, P, R>::InitEnv() { ...@@ -42,6 +42,7 @@ void PaddleInferenceAnakinPredictor<T, P, R>::InitEnv() {
template <typename T, Precision P, OpRunType R> template <typename T, Precision P, OpRunType R>
void PaddleInferenceAnakinPredictor<T, P, R>::InitNet() { void PaddleInferenceAnakinPredictor<T, P, R>::InitNet() {
std::unique_lock<std::mutex> lock(this->mutex_); std::unique_lock<std::mutex> lock(this->mutex_);
delete this->executor_p_;
this->executor_p_ = new anakin::Net<T, P, R>(*this->graph_p_, true); this->executor_p_ = new anakin::Net<T, P, R>(*this->graph_p_, true);
} }
template <typename T, Precision P, OpRunType R> template <typename T, Precision P, OpRunType R>
...@@ -89,7 +90,7 @@ void PaddleInferenceAnakinPredictor<T, P, R>::InitPredictor() { ...@@ -89,7 +90,7 @@ void PaddleInferenceAnakinPredictor<T, P, R>::InitPredictor() {
this->InitNet(); this->InitNet();
} }
template <typename T, Precision P, OpRunType R> template <typename T, Precision P, OpRunType R>
void PaddleInferenceAnakinPredictor<T, P, R>::Predict() { void PaddleInferenceAnakinPredictor<T, P, R>::Predict(int batch_size) {
anakin::TargetWrapper<T>::device_sync(); anakin::TargetWrapper<T>::device_sync();
this->executor_p_->prediction(); this->executor_p_->prediction();
anakin::TargetWrapper<T>::device_sync(); anakin::TargetWrapper<T>::device_sync();
...@@ -99,7 +100,7 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::Run( ...@@ -99,7 +100,7 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::Run(
const std::vector<PaddleTensor> &inputs, const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data, int batch_size) { std::vector<PaddleTensor> *output_data, int batch_size) {
if (this->config_.re_allocable) { if (this->config_.re_allocable) {
return this->RunImpl(inputs, output_data); return this->RunImpl(inputs, output_data, batch_size);
} else { } else {
// Run inputs data that exceeds batch size in batches. // Run inputs data that exceeds batch size in batches.
// 1. Reassign the batch size. // 1. Reassign the batch size.
...@@ -194,7 +195,7 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::Run( ...@@ -194,7 +195,7 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::Run(
template <typename T, Precision P, OpRunType R> template <typename T, Precision P, OpRunType R>
bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl( bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
const std::vector<PaddleTensor> &inputs, const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data) { std::vector<PaddleTensor> *output_data, int batch_size) {
anakin::TargetWrapper<T>::set_device(this->config_.device_id); anakin::TargetWrapper<T>::set_device(this->config_.device_id);
for (const auto &input : inputs) { for (const auto &input : inputs) {
if (input.dtype != PaddleDType::FLOAT32) { if (input.dtype != PaddleDType::FLOAT32) {
...@@ -207,12 +208,12 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl( ...@@ -207,12 +208,12 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
LOG(FATAL) << " input " << input.name LOG(FATAL) << " input " << input.name
<< "'s shape size should be equal to that of net"; << "'s shape size should be equal to that of net";
} }
#ifndef ANAKIN_MLU_PLACE
int sum = 1; int sum = 1;
for_each(input.shape.begin(), input.shape.end(), [&](int n) { sum *= n; }); for_each(input.shape.begin(), input.shape.end(), [&](int n) { sum *= n; });
if (sum > net_shape.count()) { if (sum > net_shape.count()) {
if (this->config_.re_allocable) { if (this->config_.re_allocable) {
this->graph_p_->Reshape(input.name, input.shape); this->graph_p_->Reshape(input.name, input.shape);
delete this->executor_p_;
this->InitNet(); this->InitNet();
d_tensor_p = this->executor_p_->get_in(input.name); d_tensor_p = this->executor_p_->get_in(input.name);
} else { } else {
...@@ -221,6 +222,7 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl( ...@@ -221,6 +222,7 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
"memory."; "memory.";
} }
} }
#endif
std::vector<int> tmp_shape; std::vector<int> tmp_shape;
for (auto s : input.shape) { for (auto s : input.shape) {
tmp_shape.push_back(s); tmp_shape.push_back(s);
...@@ -229,8 +231,9 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl( ...@@ -229,8 +231,9 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
anakin::saber::Tensor<typename anakin::DefaultHostType<T>::Host_type> anakin::saber::Tensor<typename anakin::DefaultHostType<T>::Host_type>
h_tensor(data, typename anakin::DefaultHostType<T>::Host_type(), 0, h_tensor(data, typename anakin::DefaultHostType<T>::Host_type(), 0,
tmp_shape); tmp_shape);
#ifndef ANAKIN_MLU_PLACE
d_tensor_p->reshape(tmp_shape); d_tensor_p->reshape(tmp_shape);
#endif
if (input.lod.size() > 0) { if (input.lod.size() > 0) {
if (input.lod.size() > 1) { if (input.lod.size() > 1) {
LOG(FATAL) << " input lod first dim should <=1, but you set " LOG(FATAL) << " input lod first dim should <=1, but you set "
...@@ -246,9 +249,9 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl( ...@@ -246,9 +249,9 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
} }
d_tensor_p->copy_from(h_tensor); d_tensor_p->copy_from(h_tensor);
} }
this->Predict(); this->Predict(batch_size);
if (output_data->empty()) { if (output_data->empty()) {
LOG(FATAL) << "At least one output should be set with tensors' names."; LOG(FATAL) << "The output param in the Run function is incorrect.";
} }
for (auto &output : *output_data) { for (auto &output : *output_data) {
if (std::find(this->output_names_.begin(), this->output_names_.end(), if (std::find(this->output_names_.begin(), this->output_names_.end(),
...@@ -256,14 +259,18 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl( ...@@ -256,14 +259,18 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
LOG(FATAL) << output.name << " is not in the outputs of the graph."; LOG(FATAL) << output.name << " is not in the outputs of the graph.";
} }
auto *d_tensor_p = this->executor_p_->get_out(output.name); auto *d_tensor_p = this->executor_p_->get_out(output.name);
output.shape = d_tensor_p->valid_shape(); auto tmp_shape = d_tensor_p->valid_shape();
if (output.data.length() < d_tensor_p->valid_size() * sizeof(float)) { #ifdef ANAKIN_MLU_PLACE
output.data.Resize(d_tensor_p->valid_size() * sizeof(float)); tmp_shape.set_num(batch_size);
#endif
output.shape = tmp_shape;
if (output.data.length() < tmp_shape.count() * sizeof(float)) {
output.data.Resize(tmp_shape.count() * sizeof(float));
} }
auto *data = static_cast<float *>(output.data.data()); auto *data = static_cast<float *>(output.data.data());
anakin::saber::Tensor<typename anakin::DefaultHostType<T>::Host_type> anakin::saber::Tensor<typename anakin::DefaultHostType<T>::Host_type>
h_tensor(data, typename anakin::DefaultHostType<T>::Host_type(), 0, h_tensor(data, typename anakin::DefaultHostType<T>::Host_type(), 0,
d_tensor_p->valid_shape()); tmp_shape);
h_tensor.copy_from(*d_tensor_p); h_tensor.copy_from(*d_tensor_p);
} }
return true; return true;
...@@ -317,6 +324,8 @@ void PaddleInferenceAnakinMLUPredictor<P, R>::SetContext() { ...@@ -317,6 +324,8 @@ void PaddleInferenceAnakinMLUPredictor<P, R>::SetContext() {
this->config_.compute_stream_id); this->config_.compute_stream_id);
this->ctx_p_->set_model_parallel(this->config_.model_parallel); this->ctx_p_->set_model_parallel(this->config_.model_parallel);
this->ctx_p_->set_fusion(this->config_.op_fuse); this->ctx_p_->set_fusion(this->config_.op_fuse);
this->ctx_p_->enable_batch_changable();
this->ctx_p_->enable_channel_duplicate();
} }
template <Precision P, OpRunType R> template <Precision P, OpRunType R>
void PaddleInferenceAnakinMLUPredictor<P, R>::OptimizeGraph() { void PaddleInferenceAnakinMLUPredictor<P, R>::OptimizeGraph() {
...@@ -327,14 +336,13 @@ void PaddleInferenceAnakinMLUPredictor<P, R>::OptimizeGraph() { ...@@ -327,14 +336,13 @@ void PaddleInferenceAnakinMLUPredictor<P, R>::OptimizeGraph() {
template <Precision P, OpRunType R> template <Precision P, OpRunType R>
void PaddleInferenceAnakinMLUPredictor<P, R>::InitNet() { void PaddleInferenceAnakinMLUPredictor<P, R>::InitNet() {
std::unique_lock<std::mutex> lock(this->mutex_); std::unique_lock<std::mutex> lock(this->mutex_);
delete this->executor_p_;
this->executor_p_ = new anakin::Net<anakin::MLU, P, R>(); this->executor_p_ = new anakin::Net<anakin::MLU, P, R>();
this->executor_p_->fusion_init(*this->graph_p_, this->ctx_p_, true); this->executor_p_->fusion_init(*this->graph_p_, this->ctx_p_, true);
} }
template <Precision P, OpRunType R> template <Precision P, OpRunType R>
void PaddleInferenceAnakinMLUPredictor<P, R>::Predict() { void PaddleInferenceAnakinMLUPredictor<P, R>::Predict(int batch_size) {
anakin::TargetWrapper<anakin::MLU>::device_sync(); this->executor_p_->fusion_prediction(batch_size);
this->executor_p_->fusion_prediction();
anakin::TargetWrapper<anakin::MLU>::device_sync();
} }
#endif #endif
...@@ -353,14 +361,13 @@ void PaddleInferenceAnakinBMPredictor<P, R>::OptimizeGraph() { ...@@ -353,14 +361,13 @@ void PaddleInferenceAnakinBMPredictor<P, R>::OptimizeGraph() {
template <Precision P, OpRunType R> template <Precision P, OpRunType R>
void PaddleInferenceAnakinBMPredictor<P, R>::InitNet() { void PaddleInferenceAnakinBMPredictor<P, R>::InitNet() {
std::unique_lock<std::mutex> lock(this->mutex_); std::unique_lock<std::mutex> lock(this->mutex_);
delete this->executor_p_;
this->executor_p_ = new anakin::Net<anakin::BM, P, R>(); this->executor_p_ = new anakin::Net<anakin::BM, P, R>();
this->executor_p_->fusion_init(*this->graph_p_, this->ctx_p_, true); this->executor_p_->fusion_init(*this->graph_p_, this->ctx_p_, true);
} }
template <Precision P, OpRunType R> template <Precision P, OpRunType R>
void PaddleInferenceAnakinBMPredictor<P, R>::Predict() { void PaddleInferenceAnakinBMPredictor<P, R>::Predict(int batch_size) {
anakin::TargetWrapper<anakin::BM>::device_sync();
this->executor_p_->fusion_prediction(); this->executor_p_->fusion_prediction();
anakin::TargetWrapper<anakin::BM>::device_sync();
} }
#endif #endif
......
...@@ -73,7 +73,7 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor { ...@@ -73,7 +73,7 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor {
virtual void OptimizeGraph(); virtual void OptimizeGraph();
virtual void InitNet(); virtual void InitNet();
virtual void SetContext(); virtual void SetContext();
virtual void Predict(); virtual void Predict(int batch_size);
virtual std::unique_ptr<PaddlePredictor> New(); virtual std::unique_ptr<PaddlePredictor> New();
static std::mutex mutex_; static std::mutex mutex_;
AnakinConfig config_; AnakinConfig config_;
...@@ -85,7 +85,7 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor { ...@@ -85,7 +85,7 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor {
private: private:
bool RunImpl(const std::vector<PaddleTensor>& inputs, bool RunImpl(const std::vector<PaddleTensor>& inputs,
std::vector<PaddleTensor>* output_data); std::vector<PaddleTensor>* output_data, int batch_size = -1);
static std::once_flag init_anakin_; static std::once_flag init_anakin_;
}; };
...@@ -103,7 +103,7 @@ class PaddleInferenceAnakinMLUPredictor final ...@@ -103,7 +103,7 @@ class PaddleInferenceAnakinMLUPredictor final
void SetContext() override; void SetContext() override;
void OptimizeGraph() override; void OptimizeGraph() override;
void InitNet() override; void InitNet() override;
void Predict() override; void Predict(int batch_size) override;
}; };
#endif #endif
...@@ -120,7 +120,7 @@ class PaddleInferenceAnakinBMPredictor final ...@@ -120,7 +120,7 @@ class PaddleInferenceAnakinBMPredictor final
std::unique_ptr<PaddlePredictor> New() override; std::unique_ptr<PaddlePredictor> New() override;
void OptimizeGraph() override; void OptimizeGraph() override;
void InitNet() override; void InitNet() override;
void Predict() override; void Predict(int batch_size) override;
}; };
#endif #endif
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册