update the initialization of anakin subgraph (#17880)

test=develop

update the initialization of anakin subgraph (#17880)
test=develop
d008260f · 石晓伟 · GitHub · ae576f3c · d008260f · d008260f
3 changed file
--- a/paddle/fluid/inference/anakin/engine.cc
+++ b/paddle/fluid/inference/anakin/engine.cc
@@ -32,18 +32,26 @@ namespace paddle {
 namespace inference {
 namespace anakin {
+template <typename TargetT, Precision PrecisionType, OpRunType RunType>
+extern std::once_flag
+    AnakinEngine<TargetT, PrecisionType, RunType>::init_anakin_;
 template <typename TargetT, Precision PrecisionType, OpRunType RunType>
 AnakinEngine<TargetT, PrecisionType, RunType>::AnakinEngine(
    bool need_summary, int device, int max_batch_size,
    std::map<std::string, std::vector<int>> max_input_shape,
    std::vector<std::string> program_inputs, bool auto_config_layout)
-    : graph_(new AnakinGraphT<TargetT, PrecisionType>()),
+    : device_(device),
-      net_(new AnakinNetT<TargetT, PrecisionType, RunType>(need_summary)) {
+      max_batch_size_(max_batch_size),
-  device_ = device;
+      max_input_shape_(max_input_shape),
-  max_batch_size_ = max_batch_size;
+      program_inputs_(program_inputs),
-  max_input_shape_ = max_input_shape;
+      auto_config_layout_(auto_config_layout) {
-  program_inputs_ = program_inputs;
+  std::call_once(init_anakin_, [this]() {
-  auto_config_layout_ = auto_config_layout;
+    ::anakin::TargetWrapper<TargetT>::set_device(device_);
+    ::anakin::Env<TargetT>::env_init();
+  });
+  graph_.reset(new AnakinGraphT<TargetT, PrecisionType>());
+  net_.reset(new AnakinNetT<TargetT, PrecisionType, RunType>(need_summary));
 }
 template <typename TargetT, Precision PrecisionType, OpRunType RunType>
@@ -102,7 +110,7 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::BindInput(
      anakin_input = net_->get_in(input.first);
    }
    anakin_input->reshape(fluid_input_shape);
-    ::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), 0,
+    ::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), device_,
                                                       fluid_input_shape);
    anakin_input->copy_from(tmp_anakin_tensor);
  }

--- a/paddle/fluid/inference/anakin/engine.h
+++ b/paddle/fluid/inference/anakin/engine.h
@@ -114,12 +114,13 @@ class AnakinEngine {
 private:
  bool initialized_{false};
+  int device_;
  int max_batch_size_;
  std::map<std::string, std::vector<int>> max_input_shape_;
-  int device_;
+  std::vector<std::string> program_inputs_;
  std::unique_ptr<GraphT> graph_;
  std::unique_ptr<NetT> net_;
-  std::vector<std::string> program_inputs_;
+  static std::once_flag init_anakin_;
  std::unordered_map<std::string, float> tensor_scales_;
  // Always be false in gpu mode but true in most cpu cases.
  bool auto_config_layout_;

--- a/paddle/fluid/inference/api/analysis_config.cc
+++ b/paddle/fluid/inference/api/analysis_config.cc
@@ -371,6 +371,7 @@ float AnalysisConfig::fraction_of_gpu_memory_for_pool() const {
  // Get the GPU memory details and calculate the fraction of memory for the
  // GPU memory pool.
  size_t gpu_used, gpu_available;
+  platform::SetDeviceId(device_id_);
  platform::GpuMemoryUsage(&gpu_used, &gpu_available);
  double total_gpu_memory = (gpu_used + gpu_available) / 1024. / 1024.;
  float fraction_of_gpu_memory =