From 02e5c4befe78cad9301cffa8c5fa92401cd0cf5e Mon Sep 17 00:00:00 2001 From: ronnywang <524019753@qq.com> Date: Tue, 10 May 2022 20:13:16 +0800 Subject: [PATCH] [CustomDevice] add inference support (#42036) --- paddle/fluid/inference/CMakeLists.txt | 6 ++- paddle/fluid/inference/api/analysis_config.cc | 42 ++++++++++++++++++- .../fluid/inference/api/analysis_predictor.cc | 21 ++++++++++ .../inference/api/details/zero_copy_tensor.cc | 27 ++++++++++++ .../inference/api/paddle_analysis_config.h | 29 +++++++++++++ .../fluid/inference/api/paddle_pass_builder.h | 21 ++++++++++ paddle/fluid/inference/api/paddle_tensor.h | 2 +- paddle/fluid/inference/paddle_inference.map | 1 - .../paddle_inference_custom_device.map | 10 +++++ 9 files changed, 154 insertions(+), 5 deletions(-) create mode 100644 paddle/fluid/inference/paddle_inference_custom_device.map diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index 7fae481f582..633f481df80 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -109,7 +109,11 @@ endif() set_target_properties(paddle_inference_shared PROPERTIES OUTPUT_NAME paddle_inference) if(NOT APPLE AND NOT WIN32) # TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac. - set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference.map") + if (WITH_CUSTOM_DEVICE) + set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference_custom_device.map") + else() + set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference.map") + endif() set_target_properties(paddle_inference_shared PROPERTIES LINK_FLAGS "${LINK_FLAGS}") # check symbol hidden FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/check_symbol.cmake diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 4827fe6c1ac..735e1b7be4c 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -158,6 +158,19 @@ void AnalysisConfig::EnableNpu(int device_id) { Update(); } +void AnalysisConfig::EnableCustomDevice(const std::string &device_type, + int device_id) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + use_custom_device_ = true; + custom_device_id_ = device_id; + custom_device_type_ = device_type; +#else + LOG(ERROR) << "Please compile with CustomDevice to EnableCustomDevice()"; + use_custom_device_ = false; +#endif + Update(); +} + void AnalysisConfig::EnableIpu(int ipu_device_num, int ipu_micro_batch_size, bool ipu_enable_pipelining, int ipu_batches_per_step) { @@ -324,6 +337,11 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { // fleet exe related CP_MEMBER(dist_config_); + // custom device related. + CP_MEMBER(use_custom_device_); + CP_MEMBER(custom_device_type_); + CP_MEMBER(custom_device_id_); + if (use_gpu_) { PADDLE_ENFORCE_EQ(use_xpu_, false, platform::errors::InvalidArgument( @@ -539,7 +557,8 @@ void AnalysisConfig::Update() { if (!pass_builder_ || ((use_gpu() ^ pass_builder_->use_gpu())) || ((use_xpu() ^ pass_builder_->use_xpu())) || ((use_npu() ^ pass_builder_->use_npu())) || - ((use_ipu() ^ pass_builder_->use_ipu()))) { + ((use_ipu() ^ pass_builder_->use_ipu())) || + ((use_custom_device() ^ pass_builder_->use_custom_device()))) { if (use_gpu()) { pass_builder_.reset(new GpuPassStrategy); @@ -562,6 +581,12 @@ void AnalysisConfig::Update() { platform::errors::InvalidArgument( "Only one choice can be made between GPU and NPU.")); pass_builder_.reset(new NpuPassStrategy); + } else if (use_custom_device()) { + PADDLE_ENFORCE_EQ( + use_gpu(), false, + platform::errors::InvalidArgument( + "Only one choice can be made between GPU and CustomDevice.")); + pass_builder_.reset(new CustomDevicePassStrategy); } else { pass_builder_.reset(new CpuPassStrategy); } @@ -588,6 +613,13 @@ void AnalysisConfig::Update() { "Only one choice can be made between GPU and NPU.")); pass_builder_.reset(new NpuPassStrategy( *static_cast(pass_builder_.get()))); + } else if (use_custom_device()) { + PADDLE_ENFORCE_EQ( + use_gpu(), false, + platform::errors::InvalidArgument( + "Only one choice can be made between GPU and CustomDevice.")); + pass_builder_.reset(new CustomDevicePassStrategy( + *static_cast(pass_builder_.get()))); } else { pass_builder_.reset(new CpuPassStrategy( *static_cast(pass_builder_.get()))); @@ -733,7 +765,13 @@ void AnalysisConfig::Update() { "but did not have the option -DWITH_IPU compiled.")); #endif } - + if (use_custom_device_) { +#ifndef PADDLE_WITH_CUSTOM_DEVICE + PADDLE_THROW(platform::errors::Unavailable( + "You tried to enable the custom device " + "but did not have the option -DWITH_CUSTOM_DEVICE compiled.")); +#endif + } if (ir_debug_) { pass_builder()->TurnOnDebug(); } diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 4f0d4a90838..0d3a687c461 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -332,6 +332,15 @@ bool AnalysisPredictor::CreateExecutor() { PADDLE_THROW(platform::errors::Unavailable( "You tried to use IPU forward propagation, but Paddle was not compiled " "with WITH_IPU.")); +#endif + } else if (config_.use_custom_device()) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + place_ = paddle::platform::CustomPlace(config_.custom_device_type()); +#else + PADDLE_THROW(platform::errors::Unavailable( + "You tried to use CustomDevice forward propagation, but Paddle was not " + "compiled " + "with WITH_CUSTOM_DEVICE.")); #endif } else { place_ = paddle::platform::CPUPlace(); @@ -1241,6 +1250,12 @@ std::unique_ptr AnalysisPredictor::GetInputTensor( } else if (platform::is_npu_place(place_)) { auto npu_place = place_; res->SetPlace(PaddlePlace::kNPU, npu_place.GetDeviceId()); + } else if (platform::is_custom_place(place_)) { + auto custom_place = place_; + auto paddleplace = static_cast( + static_cast(PaddlePlace::kCUSTOM) + + phi::GetOrRegisterGlobalDeviceTypeId(place_.GetDeviceType())); + res->SetPlace(paddleplace, custom_place.GetDeviceId()); } else { auto gpu_place = place_; res->SetPlace(PaddlePlace::kGPU, gpu_place.GetDeviceId()); @@ -1290,6 +1305,12 @@ std::unique_ptr AnalysisPredictor::GetOutputTensor( } else if (platform::is_npu_place(place_)) { auto npu_place = place_; res->SetPlace(PaddlePlace::kNPU, npu_place.GetDeviceId()); + } else if (platform::is_custom_place(place_)) { + auto custom_place = place_; + auto paddleplace = static_cast( + static_cast(PaddlePlace::kCUSTOM) + + phi::GetOrRegisterGlobalDeviceTypeId(place_.GetDeviceType())); + res->SetPlace(paddleplace, custom_place.GetDeviceId()); } else { auto gpu_place = place_; res->SetPlace(PaddlePlace::kGPU, gpu_place.GetDeviceId()); diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc index 0c68acfe980..bb966dc5c6c 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc @@ -224,8 +224,23 @@ void Tensor::CopyFromCpu(const T *data) { "with NPU.")); #endif } else { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + auto device_type_id = + static_cast(place_) - static_cast(PlaceType::kCUSTOM); + paddle::platform::DeviceContextPool &pool = + paddle::platform::DeviceContextPool::Instance(); + paddle::platform::CustomPlace custom_place( + phi::GetGlobalDeviceType(device_type_id), device_); + auto *t_data = tensor->mutable_data(custom_place); + auto *dev_ctx = static_cast( + pool.Get(custom_place)); + paddle::memory::Copy(custom_place, static_cast(t_data), + paddle::platform::CPUPlace(), data, ele_size, + dev_ctx->stream()); +#else PADDLE_THROW(paddle::platform::errors::InvalidArgument( "The analysis predictor supports CPU, GPU, NPU and XPU now.")); +#endif } } @@ -398,8 +413,20 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb, "with NPU.")); #endif } else { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + paddle::platform::DeviceContextPool &pool = + paddle::platform::DeviceContextPool::Instance(); + auto custom_place = t_place; + auto *dev_ctx = static_cast( + pool.Get(custom_place)); + paddle::memory::Copy(paddle::platform::CPUPlace(), + static_cast(data), custom_place, t_data, + ele_num * sizeof(T), dev_ctx->stream()); +// TODO(wangran16): sync_stream +#else PADDLE_THROW(paddle::platform::errors::InvalidArgument( "The analysis predictor supports CPU, GPU, NPU and XPU now.")); +#endif } } diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 9c48d822b4d..8edbc494ab8 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -332,6 +332,14 @@ struct PD_INFER_DECL AnalysisConfig { /// void EnableNpu(int device_id = 0); /// + /// \brief Turn on CustomDevice. + /// + /// \param device_type device_type the custom device to use. + /// + /// \param device_id device_id the custom device to use (default is 0). + /// + void EnableCustomDevice(const std::string& device_type, int device_id); + /// /// \brief Turn on ONNXRuntime. /// void EnableONNXRuntime(); @@ -366,6 +374,11 @@ struct PD_INFER_DECL AnalysisConfig { /// \return bool Whether the IPU is turned on. /// bool use_ipu() const { return use_ipu_; } + /// \brief A boolean state telling whether the CustomDevice is turned on. + /// + /// \return bool Whether the CustomDevice is turned on. + /// + bool use_custom_device() const { return use_custom_device_; } /// /// \brief A boolean state telling whether the ONNXRuntime is turned on. /// @@ -403,6 +416,17 @@ struct PD_INFER_DECL AnalysisConfig { /// int ipu_device_num() const { return ipu_device_num_; } /// + /// \brief Get the custom device id. + /// + /// \return int The custom device id. + /// + int custom_device_id() const { return custom_device_id_; } + /// \brief Get the custom device type. + /// + /// \return string The custom device type. + /// + std::string custom_device_type() const { return custom_device_type_; } + /// /// \brief Get the initial size in MB of the GPU memory pool. /// /// \return int The initial size in MB of the GPU memory pool. @@ -900,6 +924,11 @@ struct PD_INFER_DECL AnalysisConfig { bool use_npu_{false}; int npu_device_id_{0}; + // CustomDevice related + bool use_custom_device_{false}; + int custom_device_id_{0}; + std::string custom_device_type_; + // ONNXRuntime related bool use_onnxruntime_{false}; bool enable_ort_optimization_{false}; diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index db6bde62ddc..f01799c6460 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -166,6 +166,10 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder { /// \return A bool variable implying whether we are in ipu mode. bool use_ipu() const { return use_ipu_; } + /// \brief Check if we are using CustomDevice. + /// \return A bool variable implying whether we are in CustomDevice mode. + bool use_custom_device() const { return use_custom_device_; } + /// \brief Default destructor. virtual ~PassStrategy() = default; @@ -177,6 +181,7 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder { bool use_ipu_{false}; bool use_mkldnn_{false}; bool use_gpu_fp16_{false}; + bool use_custom_device_{false}; /// \endcond }; @@ -291,6 +296,22 @@ class PD_INFER_DECL NpuPassStrategy final : public PassStrategy { } }; +/// \class CustomDevicePassStrategy +/// \brief The CustomDevice passes controller, it is used in AnalysisPredictor +/// with CustomDevice +/// mode. +class PD_INFER_DECL CustomDevicePassStrategy final : public PassStrategy { + public: + CustomDevicePassStrategy() : PassStrategy({}) { use_custom_device_ = true; } + + /// \brief Construct by copying another CustomDevicePassStrategy object. + /// \param[in] other The CustomDevicePassStrategy object we want to copy. + explicit CustomDevicePassStrategy(const CustomDevicePassStrategy &other) + : PassStrategy(other.AllPasses()) { + use_custom_device_ = true; + } +}; + /// \class IpuPassStrategy /// \brief The IPU passes controller, it is used in AnalysisPredictor with IPU /// mode. diff --git a/paddle/fluid/inference/api/paddle_tensor.h b/paddle/fluid/inference/api/paddle_tensor.h index 3cd2df3aef6..11086b369fc 100644 --- a/paddle/fluid/inference/api/paddle_tensor.h +++ b/paddle/fluid/inference/api/paddle_tensor.h @@ -54,7 +54,7 @@ enum DataType { // TODO(Superjomn) support more data types if needed. }; -enum class PlaceType { kUNK = -1, kCPU, kGPU, kXPU, kNPU, kIPU }; +enum class PlaceType { kUNK = -1, kCPU, kGPU, kXPU, kNPU, kIPU, kCUSTOM }; enum class DataLayout { kUNK = -1, kAny, kNHWC, kNCHW }; diff --git a/paddle/fluid/inference/paddle_inference.map b/paddle/fluid/inference/paddle_inference.map index 5bb9b8d7562..05935701635 100644 --- a/paddle/fluid/inference/paddle_inference.map +++ b/paddle/fluid/inference/paddle_inference.map @@ -6,4 +6,3 @@ local: *; }; - diff --git a/paddle/fluid/inference/paddle_inference_custom_device.map b/paddle/fluid/inference/paddle_inference_custom_device.map new file mode 100644 index 00000000000..52bc2870482 --- /dev/null +++ b/paddle/fluid/inference/paddle_inference_custom_device.map @@ -0,0 +1,10 @@ +{ + global: + *paddle*; + *Pass*; + *profile*; + *phi*; + *FLAGS_*; + local: + *; +}; -- GitLab