Fix Bitmain Predictor::Clone() (#18599)

* update anakin-engine interfaces for content-dnn test=develop * support only-gpu mode of Anakin modify eltwise parse test=develop * modification for thread-safe test=develop * Integrated template instance test=develop * increase template parameters test=develop * support MLU predictor test=develop * update anakin cmake files test=develop * update TargetWrapper::set_device * update the initialization of anakin subgraph test=develop * use the default constructor of base class test=develop * load model from buffer with length test=develop * modify the access level of class test=develop * support anakin for bitmain arch test=develop * remove files * checkout cmakelists test=develop * modify interfaces test=develop * add cmake dependments test=develop * enforce the outputs of net test=develop

Fix Bitmain Predictor::Clone() (#18599)
* update anakin-engine interfaces for content-dnn test=develop * support only-gpu mode of Anakin modify eltwise parse test=develop * modification for thread-safe test=develop * Integrated template instance test=develop * increase template parameters test=develop * support MLU predictor test=develop * update anakin cmake files test=develop * update TargetWrapper::set_device * update the initialization of anakin subgraph test=develop * use the default constructor of base class test=develop * load model from buffer with length test=develop * modify the access level of class test=develop * support anakin for bitmain arch test=develop * remove files * checkout cmakelists test=develop * modify interfaces test=develop * add cmake dependments test=develop * enforce the outputs of net test=develop
25d80791 · 石晓伟 · GitHub · 97549a4f · 25d80791 · 25d80791
3 changed file
--- a/paddle/fluid/inference/api/CMakeLists.txt
+++ b/paddle/fluid/inference/api/CMakeLists.txt
@@ -70,9 +70,9 @@ cc_test(test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_
 if(ANAKIN_FOUND)
  # Do not turn warnings into errors.
  set_source_files_properties(api.cc api_anakin_engine.cc PROPERTIES COMPILE_FLAGS "-Wno-error")
-  cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc)
+  cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS boost xxhash)
  target_link_libraries(inference_anakin_api anakin anakin_saber_common)
-  cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc)
+  cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS boost xxhash)
  target_link_libraries(inference_anakin_api_shared anakin anakin_saber_common)
  function(anakin_target target_name)
    target_compile_options(${target_name} BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})

--- a/paddle/fluid/inference/api/api_anakin_engine.cc
+++ b/paddle/fluid/inference/api/api_anakin_engine.cc
@@ -62,8 +62,9 @@ void PaddleInferenceAnakinPredictor<T, P, R>::InitGraph() {
  } else {
    LOG(FATAL) << "Model load error.";
  }
-  auto inputs = this->graph_p_->get_ins();
-  for (auto &input_str : inputs) {
+  this->input_names_ = this->graph_p_->get_ins();
+  this->output_names_ = this->graph_p_->get_outs();
+  for (auto &input_str : this->input_names_) {
    if (this->config_.init_inputs_shape.find(input_str) ==
        this->config_.init_inputs_shape.end()) {
      LOG(FATAL) << input_str << " should be set in init_inputs_shape.";
@@ -201,7 +202,7 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
                 << "'s type is not float";
    }
    auto d_tensor_p = this->executor_p_->get_in(input.name);
-    auto net_shape = d_tensor_p->shape();
+    auto net_shape = d_tensor_p->valid_shape();
    if (net_shape.size() != input.shape.size()) {
      LOG(FATAL) << " input  " << input.name
                 << "'s shape size should be equal to that of net";
@@ -250,6 +251,10 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
    LOG(FATAL) << "At least one output should be set with tensors' names.";
  }
  for (auto &output : *output_data) {
+    if (std::find(this->output_names_.begin(), this->output_names_.end(),
+                  output.name) == this->output_names_.end()) {
+      LOG(FATAL) << output.name << " is not in the outputs of the graph.";
+    }
    auto *d_tensor_p = this->executor_p_->get_out(output.name);
    output.shape = d_tensor_p->valid_shape();
    if (output.data.length() < d_tensor_p->valid_size() * sizeof(float)) {
@@ -264,20 +269,23 @@ bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
  return true;
 }
 template <typename T, Precision P, OpRunType R>
-bool PaddleInferenceAnakinPredictor<T, P, R>::ResetConfig(
-    const AnakinConfig &config) {
-  this->config_ = config;
-  return true;
-}
-template <typename T, Precision P, OpRunType R>
-anakin::Net<T, P, R> &PaddleInferenceAnakinPredictor<T, P, R>::ResetExecuter(
-    std::shared_ptr<anakin::graph::Graph<T, P>> graph_p) {
-  this->graph_p_ = graph_p;
+bool PaddleInferenceAnakinPredictor<T, P, R>::Reset(
+    PaddleInferenceAnakinPredictor<T, P, R> *predictor) {
+  this->config_ = predictor->GetConfig();
+  this->graph_p_ = predictor->GetGraph();
+  this->input_names_ = predictor->GetInputNames();
+  this->output_names_ = predictor->GetOutputNames();
  this->ctx_p_ = std::make_shared<anakin::Context<T>>(
      this->config_.device_id, this->config_.data_stream_id,
      this->config_.compute_stream_id);
  this->InitNet();
-  return *this->executor_p_;
+  return true;
+}
+template <typename T, Precision P, OpRunType R>
+std::unique_ptr<PaddlePredictor>
+PaddleInferenceAnakinPredictor<T, P, R>::New() {
+  return std::unique_ptr<PaddlePredictor>(
+      new PaddleInferenceAnakinPredictor<T, P, R>());
 }
 // the cloned new Predictor of anakin share the same net weights from original
 // Predictor
@@ -285,21 +293,24 @@ template <typename T, Precision P, OpRunType R>
 std::unique_ptr<PaddlePredictor>
 PaddleInferenceAnakinPredictor<T, P, R>::Clone() {
  VLOG(3) << "Anakin Predictor::clone";
-  std::unique_ptr<PaddlePredictor> cls(
-      new PaddleInferenceAnakinPredictor<T, P, R>());
-  // construct executer from other graph
+  std::unique_ptr<PaddlePredictor> cls = std::move(this->New());
  auto anakin_predictor_p =
      dynamic_cast<PaddleInferenceAnakinPredictor<T, P, R> *>(cls.get());
  if (!anakin_predictor_p) {
    LOG(FATAL) << "fail to call Init";
  }
-  anakin_predictor_p->ResetConfig(this->config_);
-  anakin_predictor_p->ResetExecuter(this->graph_p_);
+  anakin_predictor_p->Reset(this);
  return cls;
 }

 #ifdef ANAKIN_MLU_PLACE
 template <Precision P, OpRunType R>
+std::unique_ptr<PaddlePredictor>
+PaddleInferenceAnakinMLUPredictor<P, R>::New() {
+  return std::unique_ptr<PaddlePredictor>(
+      new PaddleInferenceAnakinMLUPredictor<P, R>());
+}
+template <Precision P, OpRunType R>
 void PaddleInferenceAnakinMLUPredictor<P, R>::SetContext() {
  this->ctx_p_ = std::make_shared<anakin::Context<anakin::MLU>>(
      this->config_.device_id, this->config_.data_stream_id,
@@ -329,6 +340,11 @@ void PaddleInferenceAnakinMLUPredictor<P, R>::Predict() {

 #ifdef ANAKIN_BM_PLACE
 template <Precision P, OpRunType R>
+std::unique_ptr<PaddlePredictor> PaddleInferenceAnakinBMPredictor<P, R>::New() {
+  return std::unique_ptr<PaddlePredictor>(
+      new PaddleInferenceAnakinBMPredictor<P, R>());
+}
+template <Precision P, OpRunType R>
 void PaddleInferenceAnakinBMPredictor<P, R>::OptimizeGraph() {
  if (!this->graph_p_->fusion_optimize()) {
    LOG(FATAL) << "Graph optimization error.";

--- a/paddle/fluid/inference/api/api_anakin_engine.h
+++ b/paddle/fluid/inference/api/api_anakin_engine.h
@@ -20,6 +20,7 @@ limitations under the License. */
 #pragma once

 #include <memory>
+#include <string>
 #include <vector>

 #include "framework/core/net/net.h"
@@ -51,10 +52,18 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor {
           int batch_size = -1) override;

  std::unique_ptr<PaddlePredictor> Clone() override;
-  virtual bool ResetConfig(const AnakinConfig& config);
-  virtual anakin::Net<T, P, R>& ResetExecuter(
-      std::shared_ptr<anakin::graph::Graph<T, P>> graph_p);
+  bool Reset(PaddleInferenceAnakinPredictor<T, P, R>* predictor);
  void InitPredictor();
+  std::shared_ptr<anakin::graph::Graph<T, P>> GetGraph() {
+    return this->graph_p_;
+  }
+  std::vector<std::string> GetInputNames() override {
+    return this->input_names_;
+  }
+  std::vector<std::string> GetOutputNames() override {
+    return this->output_names_;
+  }
+  const AnakinConfig& GetConfig() const { return this->config_; }

  ~PaddleInferenceAnakinPredictor() override;

@@ -65,11 +74,14 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor {
  virtual void InitNet();
  virtual void SetContext();
  virtual void Predict();
+  virtual std::unique_ptr<PaddlePredictor> New();
  static std::mutex mutex_;
  AnakinConfig config_;
  std::shared_ptr<anakin::Context<T>> ctx_p_;
  std::shared_ptr<anakin::graph::Graph<T, P>> graph_p_;
  anakin::Net<T, P, R>* executor_p_{nullptr};
+  std::vector<std::string> input_names_;
+  std::vector<std::string> output_names_;

 private:
  bool RunImpl(const std::vector<PaddleTensor>& inputs,
@@ -82,10 +94,12 @@ template <Precision P, OpRunType R>
 class PaddleInferenceAnakinMLUPredictor final
    : public PaddleInferenceAnakinPredictor<anakin::MLU, P, R> {
 public:
+  PaddleInferenceAnakinMLUPredictor() = default;
  explicit PaddleInferenceAnakinMLUPredictor(const AnakinConfig& config) {
-    this->ResetConfig(config);
+    this->config_ = config;
    this->InitPredictor();
  }
+  std::unique_ptr<PaddlePredictor> New() override;
  void SetContext() override;
  void OptimizeGraph() override;
  void InitNet() override;
@@ -98,10 +112,12 @@ template <Precision P, OpRunType R>
 class PaddleInferenceAnakinBMPredictor final
    : public PaddleInferenceAnakinPredictor<anakin::BM, P, R> {
 public:
+  PaddleInferenceAnakinBMPredictor() = default;
  explicit PaddleInferenceAnakinBMPredictor(const AnakinConfig& config) {
-    this->ResetConfig(config);
+    this->config_ = config;
    this->InitPredictor();
  }
+  std::unique_ptr<PaddlePredictor> New() override;
  void OptimizeGraph() override;
  void InitNet() override;
  void Predict() override;