diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index 7fae481f582898f82935d2fee06127b36d724a28..633f481df808b214e2459758e2b0b69aed92be37 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -109,7 +109,11 @@ endif() set_target_properties(paddle_inference_shared PROPERTIES OUTPUT_NAME paddle_inference) if(NOT APPLE AND NOT WIN32) # TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac. - set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference.map") + if (WITH_CUSTOM_DEVICE) + set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference_custom_device.map") + else() + set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference.map") + endif() set_target_properties(paddle_inference_shared PROPERTIES LINK_FLAGS "${LINK_FLAGS}") # check symbol hidden FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/check_symbol.cmake diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 4827fe6c1ac97e7383e8253947a382bc495e0fe3..735e1b7be4c1fadacb9fc6fe90fb578863a5c32a 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -158,6 +158,19 @@ void AnalysisConfig::EnableNpu(int device_id) { Update(); } +void AnalysisConfig::EnableCustomDevice(const std::string &device_type, + int device_id) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + use_custom_device_ = true; + custom_device_id_ = device_id; + custom_device_type_ = device_type; +#else + LOG(ERROR) << "Please compile with CustomDevice to EnableCustomDevice()"; + use_custom_device_ = false; +#endif + Update(); +} + void AnalysisConfig::EnableIpu(int ipu_device_num, int ipu_micro_batch_size, bool ipu_enable_pipelining, int ipu_batches_per_step) { @@ -324,6 +337,11 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { // fleet exe related CP_MEMBER(dist_config_); + // custom device related. + CP_MEMBER(use_custom_device_); + CP_MEMBER(custom_device_type_); + CP_MEMBER(custom_device_id_); + if (use_gpu_) { PADDLE_ENFORCE_EQ(use_xpu_, false, platform::errors::InvalidArgument( @@ -539,7 +557,8 @@ void AnalysisConfig::Update() { if (!pass_builder_ || ((use_gpu() ^ pass_builder_->use_gpu())) || ((use_xpu() ^ pass_builder_->use_xpu())) || ((use_npu() ^ pass_builder_->use_npu())) || - ((use_ipu() ^ pass_builder_->use_ipu()))) { + ((use_ipu() ^ pass_builder_->use_ipu())) || + ((use_custom_device() ^ pass_builder_->use_custom_device()))) { if (use_gpu()) { pass_builder_.reset(new GpuPassStrategy); @@ -562,6 +581,12 @@ void AnalysisConfig::Update() { platform::errors::InvalidArgument( "Only one choice can be made between GPU and NPU.")); pass_builder_.reset(new NpuPassStrategy); + } else if (use_custom_device()) { + PADDLE_ENFORCE_EQ( + use_gpu(), false, + platform::errors::InvalidArgument( + "Only one choice can be made between GPU and CustomDevice.")); + pass_builder_.reset(new CustomDevicePassStrategy); } else { pass_builder_.reset(new CpuPassStrategy); } @@ -588,6 +613,13 @@ void AnalysisConfig::Update() { "Only one choice can be made between GPU and NPU.")); pass_builder_.reset(new NpuPassStrategy( *static_cast(pass_builder_.get()))); + } else if (use_custom_device()) { + PADDLE_ENFORCE_EQ( + use_gpu(), false, + platform::errors::InvalidArgument( + "Only one choice can be made between GPU and CustomDevice.")); + pass_builder_.reset(new CustomDevicePassStrategy( + *static_cast(pass_builder_.get()))); } else { pass_builder_.reset(new CpuPassStrategy( *static_cast(pass_builder_.get()))); @@ -733,7 +765,13 @@ void AnalysisConfig::Update() { "but did not have the option -DWITH_IPU compiled.")); #endif } - + if (use_custom_device_) { +#ifndef PADDLE_WITH_CUSTOM_DEVICE + PADDLE_THROW(platform::errors::Unavailable( + "You tried to enable the custom device " + "but did not have the option -DWITH_CUSTOM_DEVICE compiled.")); +#endif + } if (ir_debug_) { pass_builder()->TurnOnDebug(); } diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 4f0d4a908380f5bb50c13fe05d865777490a0f0c..0d3a687c461d101b276b11dc571c5679a8217d19 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -332,6 +332,15 @@ bool AnalysisPredictor::CreateExecutor() { PADDLE_THROW(platform::errors::Unavailable( "You tried to use IPU forward propagation, but Paddle was not compiled " "with WITH_IPU.")); +#endif + } else if (config_.use_custom_device()) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + place_ = paddle::platform::CustomPlace(config_.custom_device_type()); +#else + PADDLE_THROW(platform::errors::Unavailable( + "You tried to use CustomDevice forward propagation, but Paddle was not " + "compiled " + "with WITH_CUSTOM_DEVICE.")); #endif } else { place_ = paddle::platform::CPUPlace(); @@ -1241,6 +1250,12 @@ std::unique_ptr AnalysisPredictor::GetInputTensor( } else if (platform::is_npu_place(place_)) { auto npu_place = place_; res->SetPlace(PaddlePlace::kNPU, npu_place.GetDeviceId()); + } else if (platform::is_custom_place(place_)) { + auto custom_place = place_; + auto paddleplace = static_cast( + static_cast(PaddlePlace::kCUSTOM) + + phi::GetOrRegisterGlobalDeviceTypeId(place_.GetDeviceType())); + res->SetPlace(paddleplace, custom_place.GetDeviceId()); } else { auto gpu_place = place_; res->SetPlace(PaddlePlace::kGPU, gpu_place.GetDeviceId()); @@ -1290,6 +1305,12 @@ std::unique_ptr AnalysisPredictor::GetOutputTensor( } else if (platform::is_npu_place(place_)) { auto npu_place = place_; res->SetPlace(PaddlePlace::kNPU, npu_place.GetDeviceId()); + } else if (platform::is_custom_place(place_)) { + auto custom_place = place_; + auto paddleplace = static_cast( + static_cast(PaddlePlace::kCUSTOM) + + phi::GetOrRegisterGlobalDeviceTypeId(place_.GetDeviceType())); + res->SetPlace(paddleplace, custom_place.GetDeviceId()); } else { auto gpu_place = place_; res->SetPlace(PaddlePlace::kGPU, gpu_place.GetDeviceId()); diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc index 0c68acfe9804704e833f79f66f97bd50e302f23d..bb966dc5c6c1b5b4377c54f9f615775e212675d9 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc @@ -224,8 +224,23 @@ void Tensor::CopyFromCpu(const T *data) { "with NPU.")); #endif } else { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + auto device_type_id = + static_cast(place_) - static_cast(PlaceType::kCUSTOM); + paddle::platform::DeviceContextPool &pool = + paddle::platform::DeviceContextPool::Instance(); + paddle::platform::CustomPlace custom_place( + phi::GetGlobalDeviceType(device_type_id), device_); + auto *t_data = tensor->mutable_data(custom_place); + auto *dev_ctx = static_cast( + pool.Get(custom_place)); + paddle::memory::Copy(custom_place, static_cast(t_data), + paddle::platform::CPUPlace(), data, ele_size, + dev_ctx->stream()); +#else PADDLE_THROW(paddle::platform::errors::InvalidArgument( "The analysis predictor supports CPU, GPU, NPU and XPU now.")); +#endif } } @@ -398,8 +413,20 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb, "with NPU.")); #endif } else { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + paddle::platform::DeviceContextPool &pool = + paddle::platform::DeviceContextPool::Instance(); + auto custom_place = t_place; + auto *dev_ctx = static_cast( + pool.Get(custom_place)); + paddle::memory::Copy(paddle::platform::CPUPlace(), + static_cast(data), custom_place, t_data, + ele_num * sizeof(T), dev_ctx->stream()); +// TODO(wangran16): sync_stream +#else PADDLE_THROW(paddle::platform::errors::InvalidArgument( "The analysis predictor supports CPU, GPU, NPU and XPU now.")); +#endif } } diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 9c48d822b4d0d881f8ba3302521024c75e27f206..8edbc494ab8865cfcd206539f035d9ae78dc26a8 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -332,6 +332,14 @@ struct PD_INFER_DECL AnalysisConfig { /// void EnableNpu(int device_id = 0); /// + /// \brief Turn on CustomDevice. + /// + /// \param device_type device_type the custom device to use. + /// + /// \param device_id device_id the custom device to use (default is 0). + /// + void EnableCustomDevice(const std::string& device_type, int device_id); + /// /// \brief Turn on ONNXRuntime. /// void EnableONNXRuntime(); @@ -366,6 +374,11 @@ struct PD_INFER_DECL AnalysisConfig { /// \return bool Whether the IPU is turned on. /// bool use_ipu() const { return use_ipu_; } + /// \brief A boolean state telling whether the CustomDevice is turned on. + /// + /// \return bool Whether the CustomDevice is turned on. + /// + bool use_custom_device() const { return use_custom_device_; } /// /// \brief A boolean state telling whether the ONNXRuntime is turned on. /// @@ -403,6 +416,17 @@ struct PD_INFER_DECL AnalysisConfig { /// int ipu_device_num() const { return ipu_device_num_; } /// + /// \brief Get the custom device id. + /// + /// \return int The custom device id. + /// + int custom_device_id() const { return custom_device_id_; } + /// \brief Get the custom device type. + /// + /// \return string The custom device type. + /// + std::string custom_device_type() const { return custom_device_type_; } + /// /// \brief Get the initial size in MB of the GPU memory pool. /// /// \return int The initial size in MB of the GPU memory pool. @@ -900,6 +924,11 @@ struct PD_INFER_DECL AnalysisConfig { bool use_npu_{false}; int npu_device_id_{0}; + // CustomDevice related + bool use_custom_device_{false}; + int custom_device_id_{0}; + std::string custom_device_type_; + // ONNXRuntime related bool use_onnxruntime_{false}; bool enable_ort_optimization_{false}; diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index db6bde62ddc7c9656167e6acb2b050418635012b..f01799c646077862b723937272089d5f8af01ce8 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -166,6 +166,10 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder { /// \return A bool variable implying whether we are in ipu mode. bool use_ipu() const { return use_ipu_; } + /// \brief Check if we are using CustomDevice. + /// \return A bool variable implying whether we are in CustomDevice mode. + bool use_custom_device() const { return use_custom_device_; } + /// \brief Default destructor. virtual ~PassStrategy() = default; @@ -177,6 +181,7 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder { bool use_ipu_{false}; bool use_mkldnn_{false}; bool use_gpu_fp16_{false}; + bool use_custom_device_{false}; /// \endcond }; @@ -291,6 +296,22 @@ class PD_INFER_DECL NpuPassStrategy final : public PassStrategy { } }; +/// \class CustomDevicePassStrategy +/// \brief The CustomDevice passes controller, it is used in AnalysisPredictor +/// with CustomDevice +/// mode. +class PD_INFER_DECL CustomDevicePassStrategy final : public PassStrategy { + public: + CustomDevicePassStrategy() : PassStrategy({}) { use_custom_device_ = true; } + + /// \brief Construct by copying another CustomDevicePassStrategy object. + /// \param[in] other The CustomDevicePassStrategy object we want to copy. + explicit CustomDevicePassStrategy(const CustomDevicePassStrategy &other) + : PassStrategy(other.AllPasses()) { + use_custom_device_ = true; + } +}; + /// \class IpuPassStrategy /// \brief The IPU passes controller, it is used in AnalysisPredictor with IPU /// mode. diff --git a/paddle/fluid/inference/api/paddle_tensor.h b/paddle/fluid/inference/api/paddle_tensor.h index 3cd2df3aef639a4d6318fd12a5b52496bbdb6730..11086b369fc1522188c08f26589280c6508eab0a 100644 --- a/paddle/fluid/inference/api/paddle_tensor.h +++ b/paddle/fluid/inference/api/paddle_tensor.h @@ -54,7 +54,7 @@ enum DataType { // TODO(Superjomn) support more data types if needed. }; -enum class PlaceType { kUNK = -1, kCPU, kGPU, kXPU, kNPU, kIPU }; +enum class PlaceType { kUNK = -1, kCPU, kGPU, kXPU, kNPU, kIPU, kCUSTOM }; enum class DataLayout { kUNK = -1, kAny, kNHWC, kNCHW }; diff --git a/paddle/fluid/inference/paddle_inference.map b/paddle/fluid/inference/paddle_inference.map index 5bb9b8d75620b25231b2b9237dd840a9909137d0..05935701635d9ca3199c767243d492f1a1868822 100644 --- a/paddle/fluid/inference/paddle_inference.map +++ b/paddle/fluid/inference/paddle_inference.map @@ -6,4 +6,3 @@ local: *; }; - diff --git a/paddle/fluid/inference/paddle_inference_custom_device.map b/paddle/fluid/inference/paddle_inference_custom_device.map new file mode 100644 index 0000000000000000000000000000000000000000..52bc2870482e27caaae9c6d4c2296e2103dcef2f --- /dev/null +++ b/paddle/fluid/inference/paddle_inference_custom_device.map @@ -0,0 +1,10 @@ +{ + global: + *paddle*; + *Pass*; + *profile*; + *phi*; + *FLAGS_*; + local: + *; +};