未验证 提交 d008260f 编写于 作者: 石晓伟 提交者: GitHub

update the initialization of anakin subgraph (#17880)

test=develop
上级 ae576f3c
...@@ -32,18 +32,26 @@ namespace paddle { ...@@ -32,18 +32,26 @@ namespace paddle {
namespace inference { namespace inference {
namespace anakin { namespace anakin {
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
extern std::once_flag
AnakinEngine<TargetT, PrecisionType, RunType>::init_anakin_;
template <typename TargetT, Precision PrecisionType, OpRunType RunType> template <typename TargetT, Precision PrecisionType, OpRunType RunType>
AnakinEngine<TargetT, PrecisionType, RunType>::AnakinEngine( AnakinEngine<TargetT, PrecisionType, RunType>::AnakinEngine(
bool need_summary, int device, int max_batch_size, bool need_summary, int device, int max_batch_size,
std::map<std::string, std::vector<int>> max_input_shape, std::map<std::string, std::vector<int>> max_input_shape,
std::vector<std::string> program_inputs, bool auto_config_layout) std::vector<std::string> program_inputs, bool auto_config_layout)
: graph_(new AnakinGraphT<TargetT, PrecisionType>()), : device_(device),
net_(new AnakinNetT<TargetT, PrecisionType, RunType>(need_summary)) { max_batch_size_(max_batch_size),
device_ = device; max_input_shape_(max_input_shape),
max_batch_size_ = max_batch_size; program_inputs_(program_inputs),
max_input_shape_ = max_input_shape; auto_config_layout_(auto_config_layout) {
program_inputs_ = program_inputs; std::call_once(init_anakin_, [this]() {
auto_config_layout_ = auto_config_layout; ::anakin::TargetWrapper<TargetT>::set_device(device_);
::anakin::Env<TargetT>::env_init();
});
graph_.reset(new AnakinGraphT<TargetT, PrecisionType>());
net_.reset(new AnakinNetT<TargetT, PrecisionType, RunType>(need_summary));
} }
template <typename TargetT, Precision PrecisionType, OpRunType RunType> template <typename TargetT, Precision PrecisionType, OpRunType RunType>
...@@ -102,7 +110,7 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::BindInput( ...@@ -102,7 +110,7 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::BindInput(
anakin_input = net_->get_in(input.first); anakin_input = net_->get_in(input.first);
} }
anakin_input->reshape(fluid_input_shape); anakin_input->reshape(fluid_input_shape);
::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), 0, ::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), device_,
fluid_input_shape); fluid_input_shape);
anakin_input->copy_from(tmp_anakin_tensor); anakin_input->copy_from(tmp_anakin_tensor);
} }
......
...@@ -114,12 +114,13 @@ class AnakinEngine { ...@@ -114,12 +114,13 @@ class AnakinEngine {
private: private:
bool initialized_{false}; bool initialized_{false};
int device_;
int max_batch_size_; int max_batch_size_;
std::map<std::string, std::vector<int>> max_input_shape_; std::map<std::string, std::vector<int>> max_input_shape_;
int device_; std::vector<std::string> program_inputs_;
std::unique_ptr<GraphT> graph_; std::unique_ptr<GraphT> graph_;
std::unique_ptr<NetT> net_; std::unique_ptr<NetT> net_;
std::vector<std::string> program_inputs_; static std::once_flag init_anakin_;
std::unordered_map<std::string, float> tensor_scales_; std::unordered_map<std::string, float> tensor_scales_;
// Always be false in gpu mode but true in most cpu cases. // Always be false in gpu mode but true in most cpu cases.
bool auto_config_layout_; bool auto_config_layout_;
......
...@@ -371,6 +371,7 @@ float AnalysisConfig::fraction_of_gpu_memory_for_pool() const { ...@@ -371,6 +371,7 @@ float AnalysisConfig::fraction_of_gpu_memory_for_pool() const {
// Get the GPU memory details and calculate the fraction of memory for the // Get the GPU memory details and calculate the fraction of memory for the
// GPU memory pool. // GPU memory pool.
size_t gpu_used, gpu_available; size_t gpu_used, gpu_available;
platform::SetDeviceId(device_id_);
platform::GpuMemoryUsage(&gpu_used, &gpu_available); platform::GpuMemoryUsage(&gpu_used, &gpu_available);
double total_gpu_memory = (gpu_used + gpu_available) / 1024. / 1024.; double total_gpu_memory = (gpu_used + gpu_available) / 1024. / 1024.;
float fraction_of_gpu_memory = float fraction_of_gpu_memory =
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册