diff --git a/paddle/fluid/inference/anakin/engine.cc b/paddle/fluid/inference/anakin/engine.cc index 529a859458a9884a53778e7133ab121ed582a3fb..98bbeddd76c1f7f75c3c89ef0ac3860334104964 100644 --- a/paddle/fluid/inference/anakin/engine.cc +++ b/paddle/fluid/inference/anakin/engine.cc @@ -32,18 +32,26 @@ namespace paddle { namespace inference { namespace anakin { +template +extern std::once_flag + AnakinEngine::init_anakin_; + template AnakinEngine::AnakinEngine( bool need_summary, int device, int max_batch_size, std::map> max_input_shape, std::vector program_inputs, bool auto_config_layout) - : graph_(new AnakinGraphT()), - net_(new AnakinNetT(need_summary)) { - device_ = device; - max_batch_size_ = max_batch_size; - max_input_shape_ = max_input_shape; - program_inputs_ = program_inputs; - auto_config_layout_ = auto_config_layout; + : device_(device), + max_batch_size_(max_batch_size), + max_input_shape_(max_input_shape), + program_inputs_(program_inputs), + auto_config_layout_(auto_config_layout) { + std::call_once(init_anakin_, [this]() { + ::anakin::TargetWrapper::set_device(device_); + ::anakin::Env::env_init(); + }); + graph_.reset(new AnakinGraphT()); + net_.reset(new AnakinNetT(need_summary)); } template @@ -102,7 +110,7 @@ void AnakinEngine::BindInput( anakin_input = net_->get_in(input.first); } anakin_input->reshape(fluid_input_shape); - ::anakin::saber::Tensor tmp_anakin_tensor(data, TargetT(), 0, + ::anakin::saber::Tensor tmp_anakin_tensor(data, TargetT(), device_, fluid_input_shape); anakin_input->copy_from(tmp_anakin_tensor); } diff --git a/paddle/fluid/inference/anakin/engine.h b/paddle/fluid/inference/anakin/engine.h index fb40f56511ba255413d422f156f4265102616d03..04ac000e1ec4ea1b2fdf6153f724f73e7ea3cc88 100644 --- a/paddle/fluid/inference/anakin/engine.h +++ b/paddle/fluid/inference/anakin/engine.h @@ -114,12 +114,13 @@ class AnakinEngine { private: bool initialized_{false}; + int device_; int max_batch_size_; std::map> max_input_shape_; - int device_; + std::vector program_inputs_; std::unique_ptr graph_; std::unique_ptr net_; - std::vector program_inputs_; + static std::once_flag init_anakin_; std::unordered_map tensor_scales_; // Always be false in gpu mode but true in most cpu cases. bool auto_config_layout_; diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 2dc96c871517a0064faf97d47b48852d633f1739..890c90697bcd52e10560b04981cc50d7b58b2d6e 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -371,6 +371,7 @@ float AnalysisConfig::fraction_of_gpu_memory_for_pool() const { // Get the GPU memory details and calculate the fraction of memory for the // GPU memory pool. size_t gpu_used, gpu_available; + platform::SetDeviceId(device_id_); platform::GpuMemoryUsage(&gpu_used, &gpu_available); double total_gpu_memory = (gpu_used + gpu_available) / 1024. / 1024.; float fraction_of_gpu_memory =