[CustomDevice] add inference support (#42036)

02e5c4be · ronnywang · GitHub · 83a4b26a · 02e5c4be · 02e5c4be
9 changed file
--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -109,7 +109,11 @@ endif()
 set_target_properties(paddle_inference_shared PROPERTIES OUTPUT_NAME paddle_inference)
 if(NOT APPLE AND NOT WIN32)
  # TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac.
-  set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference.map")
+  if (WITH_CUSTOM_DEVICE)
+    set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference_custom_device.map")
+  else()
+    set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference.map")
+  endif()
  set_target_properties(paddle_inference_shared PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
  # check symbol hidden
  FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/check_symbol.cmake

--- a/paddle/fluid/inference/api/analysis_config.cc
+++ b/paddle/fluid/inference/api/analysis_config.cc
@@ -158,6 +158,19 @@ void AnalysisConfig::EnableNpu(int device_id) {
  Update();
 }

+void AnalysisConfig::EnableCustomDevice(const std::string &device_type,
+                                        int device_id) {
+#ifdef PADDLE_WITH_CUSTOM_DEVICE
+  use_custom_device_ = true;
+  custom_device_id_ = device_id;
+  custom_device_type_ = device_type;
+#else
+  LOG(ERROR) << "Please compile with CustomDevice to EnableCustomDevice()";
+  use_custom_device_ = false;
+#endif
+  Update();
+}
+
 void AnalysisConfig::EnableIpu(int ipu_device_num, int ipu_micro_batch_size,
                               bool ipu_enable_pipelining,
                               int ipu_batches_per_step) {
@@ -324,6 +337,11 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
  // fleet exe related
  CP_MEMBER(dist_config_);

+  // custom device related.
+  CP_MEMBER(use_custom_device_);
+  CP_MEMBER(custom_device_type_);
+  CP_MEMBER(custom_device_id_);
+
  if (use_gpu_) {
    PADDLE_ENFORCE_EQ(use_xpu_, false,
                      platform::errors::InvalidArgument(
@@ -539,7 +557,8 @@ void AnalysisConfig::Update() {
  if (!pass_builder_ || ((use_gpu() ^ pass_builder_->use_gpu())) ||
      ((use_xpu() ^ pass_builder_->use_xpu())) ||
      ((use_npu() ^ pass_builder_->use_npu())) ||
-      ((use_ipu() ^ pass_builder_->use_ipu()))) {
+      ((use_ipu() ^ pass_builder_->use_ipu())) ||
+      ((use_custom_device() ^ pass_builder_->use_custom_device()))) {
    if (use_gpu()) {
      pass_builder_.reset(new GpuPassStrategy);

@@ -562,6 +581,12 @@ void AnalysisConfig::Update() {
          platform::errors::InvalidArgument(
              "Only one choice can be made between GPU and NPU."));
      pass_builder_.reset(new NpuPassStrategy);
+    } else if (use_custom_device()) {
+      PADDLE_ENFORCE_EQ(
+          use_gpu(), false,
+          platform::errors::InvalidArgument(
+              "Only one choice can be made between GPU and CustomDevice."));
+      pass_builder_.reset(new CustomDevicePassStrategy);
    } else {
      pass_builder_.reset(new CpuPassStrategy);
    }
@@ -588,6 +613,13 @@ void AnalysisConfig::Update() {
              "Only one choice can be made between GPU and NPU."));
      pass_builder_.reset(new NpuPassStrategy(
          *static_cast<NpuPassStrategy *>(pass_builder_.get())));
+    } else if (use_custom_device()) {
+      PADDLE_ENFORCE_EQ(
+          use_gpu(), false,
+          platform::errors::InvalidArgument(
+              "Only one choice can be made between GPU and CustomDevice."));
+      pass_builder_.reset(new CustomDevicePassStrategy(
+          *static_cast<CustomDevicePassStrategy *>(pass_builder_.get())));
    } else {
      pass_builder_.reset(new CpuPassStrategy(
          *static_cast<CpuPassStrategy *>(pass_builder_.get())));
@@ -733,7 +765,13 @@ void AnalysisConfig::Update() {
        "but did not have the option -DWITH_IPU compiled."));
 #endif
  }
-
+  if (use_custom_device_) {
+#ifndef PADDLE_WITH_CUSTOM_DEVICE
+    PADDLE_THROW(platform::errors::Unavailable(
+        "You tried to enable the custom device "
+        "but did not have the option -DWITH_CUSTOM_DEVICE compiled."));
+#endif
+  }
  if (ir_debug_) {
    pass_builder()->TurnOnDebug();
  }

--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -332,6 +332,15 @@ bool AnalysisPredictor::CreateExecutor() {
    PADDLE_THROW(platform::errors::Unavailable(
        "You tried to use IPU forward propagation, but Paddle was not compiled "
        "with WITH_IPU."));
+#endif
+  } else if (config_.use_custom_device()) {
+#ifdef PADDLE_WITH_CUSTOM_DEVICE
+    place_ = paddle::platform::CustomPlace(config_.custom_device_type());
+#else
+    PADDLE_THROW(platform::errors::Unavailable(
+        "You tried to use CustomDevice forward propagation, but Paddle was not "
+        "compiled "
+        "with WITH_CUSTOM_DEVICE."));
 #endif
  } else {
    place_ = paddle::platform::CPUPlace();
@@ -1241,6 +1250,12 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
  } else if (platform::is_npu_place(place_)) {
    auto npu_place = place_;
    res->SetPlace(PaddlePlace::kNPU, npu_place.GetDeviceId());
+  } else if (platform::is_custom_place(place_)) {
+    auto custom_place = place_;
+    auto paddleplace = static_cast<PaddlePlace>(
+        static_cast<size_t>(PaddlePlace::kCUSTOM) +
+        phi::GetOrRegisterGlobalDeviceTypeId(place_.GetDeviceType()));
+    res->SetPlace(paddleplace, custom_place.GetDeviceId());
  } else {
    auto gpu_place = place_;
    res->SetPlace(PaddlePlace::kGPU, gpu_place.GetDeviceId());
@@ -1290,6 +1305,12 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
  } else if (platform::is_npu_place(place_)) {
    auto npu_place = place_;
    res->SetPlace(PaddlePlace::kNPU, npu_place.GetDeviceId());
+  } else if (platform::is_custom_place(place_)) {
+    auto custom_place = place_;
+    auto paddleplace = static_cast<PaddlePlace>(
+        static_cast<size_t>(PaddlePlace::kCUSTOM) +
+        phi::GetOrRegisterGlobalDeviceTypeId(place_.GetDeviceType()));
+    res->SetPlace(paddleplace, custom_place.GetDeviceId());
  } else {
    auto gpu_place = place_;
    res->SetPlace(PaddlePlace::kGPU, gpu_place.GetDeviceId());

--- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc
+++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
@@ -224,8 +224,23 @@ void Tensor::CopyFromCpu(const T *data) {
        "with NPU."));
 #endif
  } else {
+#ifdef PADDLE_WITH_CUSTOM_DEVICE
+    auto device_type_id =
+        static_cast<size_t>(place_) - static_cast<size_t>(PlaceType::kCUSTOM);
+    paddle::platform::DeviceContextPool &pool =
+        paddle::platform::DeviceContextPool::Instance();
+    paddle::platform::CustomPlace custom_place(
+        phi::GetGlobalDeviceType(device_type_id), device_);
+    auto *t_data = tensor->mutable_data<T>(custom_place);
+    auto *dev_ctx = static_cast<const paddle::platform::CustomDeviceContext *>(
+        pool.Get(custom_place));
+    paddle::memory::Copy(custom_place, static_cast<void *>(t_data),
+                         paddle::platform::CPUPlace(), data, ele_size,
+                         dev_ctx->stream());
+#else
    PADDLE_THROW(paddle::platform::errors::InvalidArgument(
        "The analysis predictor supports CPU, GPU, NPU and XPU now."));
+#endif
  }
 }

@@ -398,8 +413,20 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb,
        "with NPU."));
 #endif
  } else {
+#ifdef PADDLE_WITH_CUSTOM_DEVICE
+    paddle::platform::DeviceContextPool &pool =
+        paddle::platform::DeviceContextPool::Instance();
+    auto custom_place = t_place;
+    auto *dev_ctx = static_cast<const paddle::platform::CustomDeviceContext *>(
+        pool.Get(custom_place));
+    paddle::memory::Copy(paddle::platform::CPUPlace(),
+                         static_cast<void *>(data), custom_place, t_data,
+                         ele_num * sizeof(T), dev_ctx->stream());
+// TODO(wangran16): sync_stream
+#else
    PADDLE_THROW(paddle::platform::errors::InvalidArgument(
        "The analysis predictor supports CPU, GPU, NPU and XPU now."));
+#endif
  }
 }


--- a/paddle/fluid/inference/api/paddle_analysis_config.h
+++ b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -332,6 +332,14 @@ struct PD_INFER_DECL AnalysisConfig {
  ///
  void EnableNpu(int device_id = 0);
  ///
+  /// \brief Turn on CustomDevice.
+  ///
+  /// \param device_type device_type the custom device to use.
+  ///
+  /// \param device_id device_id the custom device to use (default is 0).
+  ///
+  void EnableCustomDevice(const std::string& device_type, int device_id);
+  ///
  /// \brief Turn on ONNXRuntime.
  ///
  void EnableONNXRuntime();
@@ -366,6 +374,11 @@ struct PD_INFER_DECL AnalysisConfig {
  /// \return bool Whether the IPU is turned on.
  ///
  bool use_ipu() const { return use_ipu_; }
+  /// \brief A boolean state telling whether the CustomDevice is turned on.
+  ///
+  /// \return bool Whether the CustomDevice is turned on.
+  ///
+  bool use_custom_device() const { return use_custom_device_; }
  ///
  /// \brief A boolean state telling whether the ONNXRuntime is turned on.
  ///
@@ -403,6 +416,17 @@ struct PD_INFER_DECL AnalysisConfig {
  ///
  int ipu_device_num() const { return ipu_device_num_; }
  ///
+  /// \brief Get the custom device id.
+  ///
+  /// \return int The custom device id.
+  ///
+  int custom_device_id() const { return custom_device_id_; }
+  /// \brief Get the custom device type.
+  ///
+  /// \return string The custom device type.
+  ///
+  std::string custom_device_type() const { return custom_device_type_; }
+  ///
  /// \brief Get the initial size in MB of the GPU memory pool.
  ///
  /// \return int The initial size in MB of the GPU memory pool.
@@ -900,6 +924,11 @@ struct PD_INFER_DECL AnalysisConfig {
  bool use_npu_{false};
  int npu_device_id_{0};

+  // CustomDevice related
+  bool use_custom_device_{false};
+  int custom_device_id_{0};
+  std::string custom_device_type_;
+
  // ONNXRuntime related
  bool use_onnxruntime_{false};
  bool enable_ort_optimization_{false};

--- a/paddle/fluid/inference/api/paddle_pass_builder.h
+++ b/paddle/fluid/inference/api/paddle_pass_builder.h
@@ -166,6 +166,10 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
  /// \return A bool variable implying whether we are in ipu mode.
  bool use_ipu() const { return use_ipu_; }

+  /// \brief Check if we are using CustomDevice.
+  /// \return A bool variable implying whether we are in CustomDevice mode.
+  bool use_custom_device() const { return use_custom_device_; }
+
  /// \brief Default destructor.
  virtual ~PassStrategy() = default;

@@ -177,6 +181,7 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
  bool use_ipu_{false};
  bool use_mkldnn_{false};
  bool use_gpu_fp16_{false};
+  bool use_custom_device_{false};
  /// \endcond
 };

@@ -291,6 +296,22 @@ class PD_INFER_DECL NpuPassStrategy final : public PassStrategy {
  }
 };

+/// \class CustomDevicePassStrategy
+/// \brief The CustomDevice passes controller, it is used in AnalysisPredictor
+/// with CustomDevice
+/// mode.
+class PD_INFER_DECL CustomDevicePassStrategy final : public PassStrategy {
+ public:
+  CustomDevicePassStrategy() : PassStrategy({}) { use_custom_device_ = true; }
+
+  /// \brief Construct by copying another CustomDevicePassStrategy object.
+  /// \param[in] other The CustomDevicePassStrategy object we want to copy.
+  explicit CustomDevicePassStrategy(const CustomDevicePassStrategy &other)
+      : PassStrategy(other.AllPasses()) {
+    use_custom_device_ = true;
+  }
+};
+
 /// \class IpuPassStrategy
 /// \brief The IPU passes controller, it is used in AnalysisPredictor with IPU
 /// mode.

--- a/paddle/fluid/inference/api/paddle_tensor.h
+++ b/paddle/fluid/inference/api/paddle_tensor.h
@@ -54,7 +54,7 @@ enum DataType {
  // TODO(Superjomn) support more data types if needed.
 };

-enum class PlaceType { kUNK = -1, kCPU, kGPU, kXPU, kNPU, kIPU };
+enum class PlaceType { kUNK = -1, kCPU, kGPU, kXPU, kNPU, kIPU, kCUSTOM };

 enum class DataLayout { kUNK = -1, kAny, kNHWC, kNCHW };


--- a/paddle/fluid/inference/paddle_inference.map
+++ b/paddle/fluid/inference/paddle_inference.map
@@ -6,4 +6,3 @@
 	local:
 		*;
 };
-
--- a/paddle/fluid/inference/paddle_inference_custom_device.map
+++ b/paddle/fluid/inference/paddle_inference_custom_device.map
+{
+	global:
+		*paddle*;
+		*Pass*;
+		*profile*;
+		*phi*;
+		*FLAGS_*;
+	local:
+		*;
+};