From 4e3fb2198678e1f206d146e46af3f05d97f6a17e Mon Sep 17 00:00:00 2001 From: Wilber Date: Wed, 14 Jul 2021 19:03:22 +0800 Subject: [PATCH] Inference support Ascend910 (#34101) --- paddle/fluid/inference/api/analysis_config.cc | 57 ++++++++++++++++++- .../fluid/inference/api/analysis_predictor.cc | 14 +++++ paddle/fluid/inference/api/api_impl.cc | 19 ++++++- paddle/fluid/inference/api/api_impl_tester.cc | 13 +++++ .../inference/api/details/zero_copy_tensor.cc | 37 +++++++++++- .../inference/api/paddle_analysis_config.h | 27 ++++++++- paddle/fluid/inference/api/paddle_api.h | 1 + .../fluid/inference/api/paddle_pass_builder.h | 22 ++++++- paddle/fluid/inference/api/paddle_tensor.h | 2 +- paddle/fluid/inference/capi_exp/pd_config.cc | 14 +++++ paddle/fluid/inference/capi_exp/pd_config.h | 24 ++++++++ paddle/fluid/inference/goapi/config.go | 27 +++++++++ paddle/fluid/inference/tests/test_helper.h | 3 + paddle/fluid/pybind/inference_api.cc | 7 ++- 14 files changed, 257 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index b5ca0ef5924..58b0a3536a4 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -36,6 +36,8 @@ PassStrategy *AnalysisConfig::pass_builder() const { pass_builder_.reset(new GpuPassStrategy); } else if (use_xpu_) { pass_builder_.reset(new XpuPassStrategy); + } else if (use_npu_) { + pass_builder_.reset(new NpuPassStrategy); } else { LOG(INFO) << "Create CPU IR passes"; pass_builder_.reset(new CpuPassStrategy); @@ -110,6 +112,18 @@ void AnalysisConfig::EnableXpu(int l3_workspace_size, bool locked, Update(); } +void AnalysisConfig::EnableNpu(int device_id) { +#ifdef PADDLE_WITH_ASCEND_CL + use_npu_ = true; + npu_device_id_ = device_id; +#else + LOG(ERROR) << "Please compile with npu to EnableNpu()"; + use_npu_ = false; +#endif + + Update(); +} + AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { #define CP_MEMBER(member__) member__ = other.member__; @@ -127,7 +141,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(use_gpu_); CP_MEMBER(use_cudnn_); CP_MEMBER(gpu_device_id_); - CP_MEMBER(xpu_device_id_); CP_MEMBER(memory_pool_init_size_mb_); CP_MEMBER(enable_memory_optim_); @@ -167,7 +180,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(lite_ops_filter_); CP_MEMBER(lite_zero_copy_); + // XPU related. CP_MEMBER(use_xpu_); + CP_MEMBER(xpu_device_id_); CP_MEMBER(xpu_l3_workspace_size_); CP_MEMBER(xpu_locked_); CP_MEMBER(xpu_autotune_); @@ -175,6 +190,10 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(xpu_precision_); CP_MEMBER(xpu_adaptive_seqlen_); + // NPU related. + CP_MEMBER(use_npu_); + CP_MEMBER(npu_device_id_); + // profile related. CP_MEMBER(with_profile_); @@ -202,6 +221,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { } else if (use_xpu_) { pass_builder_.reset(new XpuPassStrategy( *static_cast(other.pass_builder()))); + } else if (use_npu_) { + pass_builder_.reset(new NpuPassStrategy( + *static_cast(other.pass_builder()))); } else { pass_builder_.reset(new CpuPassStrategy( *static_cast(other.pass_builder()))); @@ -376,7 +398,9 @@ void AnalysisConfig::Update() { if (info == serialized_info_cache_) return; // Transfer pass_builder and copy the existing compatible passes. - if (!pass_builder_ || ((use_gpu() ^ pass_builder_->use_gpu()))) { + if (!pass_builder_ || ((use_gpu() ^ pass_builder_->use_gpu())) || + ((use_xpu() ^ pass_builder_->use_xpu())) || + ((use_npu() ^ pass_builder_->use_npu()))) { if (use_gpu()) { pass_builder_.reset(new GpuPassStrategy); @@ -390,6 +414,12 @@ void AnalysisConfig::Update() { platform::errors::InvalidArgument( "Only one choice can be made between CPU and XPU.")); pass_builder_.reset(new XpuPassStrategy); + } else if (use_npu()) { + PADDLE_ENFORCE_EQ( + use_gpu(), false, + platform::errors::InvalidArgument( + "Only one choice can be made between GPU and NPU.")); + pass_builder_.reset(new NpuPassStrategy); } else { pass_builder_.reset(new CpuPassStrategy); } @@ -405,6 +435,13 @@ void AnalysisConfig::Update() { "Only one choice can be made between CPU and XPU.")); pass_builder_.reset(new XpuPassStrategy( *static_cast(pass_builder_.get()))); + } else if (use_npu()) { + PADDLE_ENFORCE_EQ( + use_gpu(), false, + platform::errors::InvalidArgument( + "Only one choice can be made between GPU and NPU.")); + pass_builder_.reset(new NpuPassStrategy( + *static_cast(pass_builder_.get()))); } else { pass_builder_.reset(new CpuPassStrategy( *static_cast(pass_builder_.get()))); @@ -502,6 +539,19 @@ void AnalysisConfig::Update() { #endif } + if (use_npu_) { +#ifdef PADDLE_WITH_ASCEND_CL + PADDLE_ENFORCE_EQ(use_gpu_, false, + platform::errors::Unavailable( + "Currently, NPU and GPU cannot be enabled in the " + "same analysis configuration.")); +#else + PADDLE_THROW(platform::errors::Unavailable( + "You tried to use an NPU device, but Paddle was not compiled " + "with NPU-runtime.")); +#endif + } + if (ir_debug_) { pass_builder()->TurnOnDebug(); } @@ -566,6 +616,9 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << xpu_precision_; ss << xpu_adaptive_seqlen_; + ss << use_npu_; + ss << npu_device_id_; + ss << thread_local_stream_; return ss.str(); diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 1aa46ab5713..dd3a33130a3 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -264,6 +264,14 @@ bool AnalysisPredictor::CreateExecutor() { "with WITH_XPU.")); #endif // PADDLE_WITH_XPU } + } else if (config_.use_npu()) { +#ifdef PADDLE_WITH_ASCEND_CL + place_ = paddle::platform::NPUPlace(config_.npu_device_id()); +#else + PADDLE_THROW(platform::errors::Unavailable( + "You tried to use NPU forward propagation, but Paddle was not compiled " + "with WITH_ASCEND_CL.")); +#endif } else { place_ = paddle::platform::CPUPlace(); } @@ -847,6 +855,9 @@ std::unique_ptr AnalysisPredictor::GetInputTensor( auto xpu_place = BOOST_GET_CONST(platform::XPUPlace, place_); res->SetPlace(PaddlePlace::kXPU, xpu_place.GetDeviceId()); } + } else if (platform::is_npu_place(place_)) { + auto npu_place = BOOST_GET_CONST(platform::NPUPlace, place_); + res->SetPlace(PaddlePlace::kNPU, npu_place.GetDeviceId()); } else { auto gpu_place = BOOST_GET_CONST(platform::CUDAPlace, place_); res->SetPlace(PaddlePlace::kGPU, gpu_place.GetDeviceId()); @@ -879,6 +890,9 @@ std::unique_ptr AnalysisPredictor::GetOutputTensor( auto xpu_place = BOOST_GET_CONST(platform::XPUPlace, place_); res->SetPlace(PaddlePlace::kXPU, xpu_place.GetDeviceId()); } + } else if (platform::is_npu_place(place_)) { + auto npu_place = BOOST_GET_CONST(platform::NPUPlace, place_); + res->SetPlace(PaddlePlace::kNPU, npu_place.GetDeviceId()); } else { auto gpu_place = BOOST_GET_CONST(platform::CUDAPlace, place_); res->SetPlace(PaddlePlace::kGPU, gpu_place.GetDeviceId()); diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index 6930b3bd2e9..bb104015947 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -21,6 +21,7 @@ limitations under the License. */ #include "paddle/fluid/inference/api/api_impl.h" #include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/platform/cpu_helper.h" +#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" DEFINE_bool(profile, false, "Turn on profiler for fluid"); @@ -78,6 +79,8 @@ bool NativePaddlePredictor::Init( place_ = paddle::platform::CUDAPlace(config_.device); } else if (config_.use_xpu) { place_ = paddle::platform::XPUPlace(config_.device); + } else if (config_.use_npu) { + place_ = paddle::platform::NPUPlace(config_.device); } else { place_ = paddle::platform::CPUPlace(); } @@ -255,7 +258,7 @@ bool NativePaddlePredictor::SetFeed(const std::vector &inputs, PADDLE_THROW(platform::errors::Unavailable( "Not compile with CUDA, should not reach here.")); #endif - } else { + } else if (platform::is_xpu_place(place_)) { #ifdef PADDLE_WITH_XPU auto dst_xpu_place = BOOST_GET_CONST(platform::XPUPlace, place_); memory::Copy(dst_xpu_place, static_cast(input_ptr), @@ -264,6 +267,20 @@ bool NativePaddlePredictor::SetFeed(const std::vector &inputs, #else PADDLE_THROW(platform::errors::Unavailable( "Not compile with XPU, should not reach here.")); +#endif + } else { +#ifdef PADDLE_WITH_ASCEND_CL + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto *dev_ctx = + static_cast(pool.Get(place_)); + auto dst_npu_place = BOOST_GET_CONST(platform::NPUPlace, place_); + memory::Copy(dst_npu_place, static_cast(input_ptr), + platform::CPUPlace(), inputs[i].data.data(), + inputs[i].data.length(), dev_ctx->stream()); +#else + PADDLE_THROW(platform::errors::Unavailable( + "Not compile with NPU, should not reach here.")); #endif } diff --git a/paddle/fluid/inference/api/api_impl_tester.cc b/paddle/fluid/inference/api/api_impl_tester.cc index e3fad1fec06..89aec34110b 100644 --- a/paddle/fluid/inference/api/api_impl_tester.cc +++ b/paddle/fluid/inference/api/api_impl_tester.cc @@ -67,6 +67,7 @@ void MainWord2Vec(const paddle::PaddlePlace& place) { auto predictor = CreatePaddlePredictor(config); config.use_gpu = paddle::gpu_place_used(place); config.use_xpu = paddle::xpu_place_used(place); + config.use_npu = paddle::npu_place_used(place); framework::LoDTensor first_word, second_word, third_word, fourth_word; framework::LoD lod{{0, 1}}; @@ -119,6 +120,7 @@ void MainImageClassification(const paddle::PaddlePlace& place) { NativeConfig config = GetConfig(); config.use_gpu = paddle::gpu_place_used(place); config.use_xpu = paddle::xpu_place_used(place); + config.use_npu = paddle::npu_place_used(place); config.model_dir = FLAGS_book_dirname + "/image_classification_resnet.inference.model"; @@ -163,6 +165,7 @@ void MainThreadsWord2Vec(const paddle::PaddlePlace& place) { NativeConfig config = GetConfig(); config.use_gpu = paddle::gpu_place_used(place); config.use_xpu = paddle::xpu_place_used(place); + config.use_npu = paddle::npu_place_used(place); auto main_predictor = CreatePaddlePredictor(config); // prepare inputs data and reference results @@ -227,6 +230,7 @@ void MainThreadsImageClassification(const paddle::PaddlePlace& place) { NativeConfig config = GetConfig(); config.use_gpu = paddle::gpu_place_used(place); config.use_xpu = paddle::xpu_place_used(place); + config.use_npu = paddle::npu_place_used(place); config.model_dir = FLAGS_book_dirname + "/image_classification_resnet.inference.model"; @@ -297,6 +301,15 @@ TEST(inference_api_native, image_classification_xpu) { } #endif +#ifdef PADDLE_WITH_ASCEND_CL +TEST(inference_api_native, word2vec_npu) { + MainWord2Vec(paddle::PaddlePlace::kNPU); +} +// TEST(inference_api_native, image_classification_npu) { +// MainImageClassification(paddle::PaddlePlace::kNPU); +// } +#endif + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) TEST(inference_api_native, word2vec_gpu) { MainWord2Vec(paddle::PaddlePlace::kGPU); diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc index 313cbfb7c78..5ed6691ebb8 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc @@ -16,6 +16,7 @@ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" +#include "paddle/fluid/inference/api/paddle_tensor.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/enforce.h" @@ -150,10 +151,26 @@ void Tensor::CopyFromCpu(const T *data) { PADDLE_THROW(paddle::platform::errors::Unavailable( "Can not create tensor with XPU place because paddle is not compiled " "with XPU.")); +#endif + } else if (place_ == PlaceType::kNPU) { +#ifdef PADDLE_WITH_ASCEND_CL + paddle::platform::DeviceContextPool &pool = + paddle::platform::DeviceContextPool::Instance(); + paddle::platform::NPUPlace npu_place(device_); + auto *t_data = tensor->mutable_data(npu_place); + auto *dev_ctx = static_cast( + pool.Get(npu_place)); + paddle::memory::Copy(npu_place, static_cast(t_data), + paddle::platform::CPUPlace(), data, ele_size, + dev_ctx->stream()); +#else + PADDLE_THROW(paddle::platform::errors::Unavailable( + "Can not create tensor with NPU place because paddle is not compiled " + "with NPU.")); #endif } else { PADDLE_THROW(paddle::platform::errors::InvalidArgument( - "The analysis predictor supports CPU, GPU and XPU now.")); + "The analysis predictor supports CPU, GPU, NPU and XPU now.")); } } @@ -212,10 +229,26 @@ void Tensor::CopyToCpu(T *data) { PADDLE_THROW(paddle::platform::errors::Unavailable( "Can not create tensor with XPU place because paddle is not compiled " "with XPU.")); +#endif + } else if (place_ == PlaceType::kNPU) { +#ifdef PADDLE_WITH_ASCEND_CL + paddle::platform::DeviceContextPool &pool = + paddle::platform::DeviceContextPool::Instance(); + auto npu_place = BOOST_GET_CONST(paddle::platform::NPUPlace, t_place); + auto *dev_ctx = static_cast( + pool.Get(npu_place)); + paddle::memory::Copy(paddle::platform::CPUPlace(), + static_cast(data), npu_place, t_data, + ele_num * sizeof(T), dev_ctx->stream()); + aclrtSynchronizeStream(dev_ctx->stream()); +#else + PADDLE_THROW(paddle::platform::errors::Unavailable( + "Can not create tensor with NPU place because paddle is not compiled " + "with NPU.")); #endif } else { PADDLE_THROW(paddle::platform::errors::InvalidArgument( - "The analysis predictor supports CPU, GPU and XPU now.")); + "The analysis predictor supports CPU, GPU, NPU and XPU now.")); } } template PD_INFER_DECL void Tensor::CopyFromCpu(const float *data); diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 81e742e8a6f..58d02d8d1e7 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -203,6 +203,12 @@ struct PD_INFER_DECL AnalysisConfig { const std::string& precision = "int16", bool adaptive_seqlen = false); /// + /// \brief Turn on NPU. + /// + /// \param device_id device_id the NPU card to use (default is 0). + /// + void EnableNpu(int device_id = 0); + /// /// \brief A boolean state telling whether the GPU is turned on. /// /// \return bool Whether the GPU is turned on. @@ -215,6 +221,12 @@ struct PD_INFER_DECL AnalysisConfig { /// bool use_xpu() const { return use_xpu_; } /// + /// \brief A boolean state telling whether the NPU is turned on. + /// + /// \return bool Whether the NPU is turned on. + /// + bool use_npu() const { return use_npu_; } + /// /// \brief Get the GPU device id. /// /// \return int The GPU device id. @@ -227,6 +239,12 @@ struct PD_INFER_DECL AnalysisConfig { /// int xpu_device_id() const { return xpu_device_id_; } /// + /// \brief Get the NPU device id. + /// + /// \return int The NPU device id. + /// + int npu_device_id() const { return npu_device_id_; } + /// /// \brief Get the initial size in MB of the GPU memory pool. /// /// \return int The initial size in MB of the GPU memory pool. @@ -619,11 +637,15 @@ struct PD_INFER_DECL AnalysisConfig { // GPU related. bool use_gpu_{false}; int gpu_device_id_{0}; - int xpu_device_id_{0}; uint64_t memory_pool_init_size_mb_{100}; // initial size is 100MB. + bool thread_local_stream_{false}; bool use_cudnn_{false}; + // NPU related + bool use_npu_{false}; + int npu_device_id_{0}; + // Padding related bool use_fc_padding_{true}; @@ -689,8 +711,9 @@ struct PD_INFER_DECL AnalysisConfig { Precision lite_precision_mode_; bool lite_zero_copy_; - bool thread_local_stream_{false}; + // XPU related. bool use_xpu_{false}; + int xpu_device_id_{0}; int xpu_l3_workspace_size_; bool xpu_locked_; bool xpu_autotune_; diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h index 3e92ffaf9dc..de6b28de275 100644 --- a/paddle/fluid/inference/api/paddle_api.h +++ b/paddle/fluid/inference/api/paddle_api.h @@ -303,6 +303,7 @@ struct PD_INFER_DECL NativeConfig : public PaddlePredictor::Config { /// GPU related fields. bool use_xpu{false}; bool use_gpu{false}; + bool use_npu{false}; int device{0}; float fraction_of_gpu_memory{ -1.f}; ///< Change to a float in (0,1] if needed. diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index d7556b50031..f25060cd091 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -144,6 +144,10 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder { /// \return A bool variable implying whether we are in xpu mode. bool use_xpu() const { return use_xpu_; } + /// \brief Check if we are using npu. + /// \return A bool variable implying whether we are in npu mode. + bool use_npu() const { return use_npu_; } + /// \brief Default destructor. virtual ~PassStrategy() = default; @@ -151,6 +155,7 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder { /// \cond Protected bool use_xpu_{false}; bool use_gpu_{false}; + bool use_npu_{false}; bool use_mkldnn_{false}; /// \endcond }; @@ -236,7 +241,22 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy { /// mode. class PD_INFER_DECL XpuPassStrategy final : public PassStrategy { public: - XpuPassStrategy() : PassStrategy({}) {} + XpuPassStrategy() : PassStrategy({}) { use_xpu_ = true; } +}; + +/// \class NpuPassStrategy +/// \brief The NPU passes controller, it is used in AnalysisPredictor with NPU +/// mode. +class PD_INFER_DECL NpuPassStrategy final : public PassStrategy { + public: + NpuPassStrategy() : PassStrategy({}) { use_npu_ = true; } + + /// \brief Construct by copying another NpuPassStrategy object. + /// \param[in] other The NpuPassStrategy object we want to copy. + explicit NpuPassStrategy(const NpuPassStrategy &other) + : PassStrategy(other.AllPasses()) { + use_npu_ = true; + } }; /// \brief List of tensorRT subgraph passes. diff --git a/paddle/fluid/inference/api/paddle_tensor.h b/paddle/fluid/inference/api/paddle_tensor.h index 9c4e5858af3..fa3067b62d6 100644 --- a/paddle/fluid/inference/api/paddle_tensor.h +++ b/paddle/fluid/inference/api/paddle_tensor.h @@ -28,7 +28,7 @@ enum DataType { // TODO(Superjomn) support more data types if needed. }; -enum class PlaceType { kUNK = -1, kCPU, kGPU, kXPU }; +enum class PlaceType { kUNK = -1, kCPU, kGPU, kXPU, kNPU }; /// \brief Represents an n-dimensional array of values. /// The Tensor is used to store the input or output of the network. diff --git a/paddle/fluid/inference/capi_exp/pd_config.cc b/paddle/fluid/inference/capi_exp/pd_config.cc index e9104ef5237..bd96f401233 100644 --- a/paddle/fluid/inference/capi_exp/pd_config.cc +++ b/paddle/fluid/inference/capi_exp/pd_config.cc @@ -135,11 +135,21 @@ void PD_ConfigEnableXpu(__pd_keep PD_Config* pd_config, precision, adaptive_seqlen); } +void PD_ConfigEnableNpu(__pd_keep PD_Config* pd_config, int32_t device_id) { + CHECK_AND_CONVERT_PD_CONFIG; + config->EnableNpu(device_id); +} + PD_Bool PD_ConfigUseXpu(__pd_keep PD_Config* pd_config) { CHECK_AND_CONVERT_PD_CONFIG; return config->use_xpu(); } +PD_Bool PD_ConfigUseNpu(__pd_keep PD_Config* pd_config) { + CHECK_AND_CONVERT_PD_CONFIG; + return config->use_npu(); +} + int32_t PD_ConfigGpuDeviceId(__pd_keep PD_Config* pd_config) { CHECK_AND_CONVERT_PD_CONFIG; return config->gpu_device_id(); @@ -148,6 +158,10 @@ int32_t PD_ConfigXpuDeviceId(__pd_keep PD_Config* pd_config) { CHECK_AND_CONVERT_PD_CONFIG; return config->xpu_device_id(); } +int32_t PD_ConfigNpuDeviceId(__pd_keep PD_Config* pd_config) { + CHECK_AND_CONVERT_PD_CONFIG; + return config->npu_device_id(); +} int32_t PD_ConfigMemoryPoolInitSizeMb(__pd_keep PD_Config* pd_config) { CHECK_AND_CONVERT_PD_CONFIG; return config->memory_pool_init_size_mb(); diff --git a/paddle/fluid/inference/capi_exp/pd_config.h b/paddle/fluid/inference/capi_exp/pd_config.h index a47ca5d2768..ac0ed8c8689 100644 --- a/paddle/fluid/inference/capi_exp/pd_config.h +++ b/paddle/fluid/inference/capi_exp/pd_config.h @@ -177,6 +177,14 @@ PADDLE_CAPI_EXPORT extern void PD_ConfigEnableXpu( PD_Bool autotune, const char* autotune_file, const char* precision, PD_Bool adaptive_seqlen); /// +/// \brief Turn on NPU. +/// +/// \param[in] pd_onfig config +/// \param[in] device_id device_id the NPU card to use. +/// +PADDLE_CAPI_EXPORT extern void PD_ConfigEnableNpu( + __pd_keep PD_Config* pd_config, int32_t device_id); +/// /// \brief A boolean state telling whether the XPU is turned on. /// /// \param[in] pd_onfig config @@ -185,6 +193,14 @@ PADDLE_CAPI_EXPORT extern void PD_ConfigEnableXpu( PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigUseXpu( __pd_keep PD_Config* pd_config); /// +/// \brief A boolean state telling whether the NPU is turned on. +/// +/// \param[in] pd_onfig config +/// \return Whether the NPU is turned on. +/// +PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigUseNpu( + __pd_keep PD_Config* pd_config); +/// /// \brief Get the GPU device id. /// /// \param[in] pd_onfig config @@ -201,6 +217,14 @@ PADDLE_CAPI_EXPORT extern int32_t PD_ConfigGpuDeviceId( PADDLE_CAPI_EXPORT extern int32_t PD_ConfigXpuDeviceId( __pd_keep PD_Config* pd_config); /// +/// \brief Get the NPU device id. +/// +/// \param[in] pd_onfig config +/// \return The NPU device id. +/// +PADDLE_CAPI_EXPORT extern int32_t PD_ConfigNpuDeviceId( + __pd_keep PD_Config* pd_config); +/// /// \brief Get the initial size in MB of the GPU memory pool. /// /// \param[in] pd_onfig config diff --git a/paddle/fluid/inference/goapi/config.go b/paddle/fluid/inference/goapi/config.go index 9200de3d08f..866ae0e38b7 100644 --- a/paddle/fluid/inference/goapi/config.go +++ b/paddle/fluid/inference/goapi/config.go @@ -181,6 +181,15 @@ func (config *Config) EnableXpu(l3WorkspaceSize int32, locked bool, autotune boo cAutotuneFile, cPrecision, cvtGoBoolToPD(adaptiveSeqlen)) } +/// +/// \brief Turn on NPU. +/// +/// \param deviceId the NPU card to use. +/// +func (config *Config) EnableNpu(deviceId int32) { + C.PD_ConfigEnableNpu(config.c, C.int32_t(deviceId)) +} + /// /// \brief A boolean state telling whether the GPU is turned on. /// @@ -199,6 +208,15 @@ func (config *Config) UseXpu() bool { return cvtPDBoolToGo(C.PD_ConfigUseXpu(config.c)) } +/// +/// \brief A boolean state telling whether the NPU is turned on. +/// +/// \return bool Whether the NPU is turned on. +/// +func (config *Config) UseNpu() bool { + return cvtPDBoolToGo(C.PD_ConfigUseNpu(config.c)) +} + /// /// \brief Get the GPU device id. /// @@ -217,6 +235,15 @@ func (config *Config) XpuDeviceId() int32 { return int32(C.PD_ConfigXpuDeviceId(config.c)) } +/// +/// \brief Get the NPU device id. +/// +/// \return int32 The NPU device id. +/// +func (config *Config) NpuDeviceId() int32 { + return int32(C.PD_ConfigNpuDeviceId(config.c)) +} + /// /// \brief Get the initial size in MB of the GPU memory pool. /// diff --git a/paddle/fluid/inference/tests/test_helper.h b/paddle/fluid/inference/tests/test_helper.h index fc2c6a030a6..cf8a32ba94a 100644 --- a/paddle/fluid/inference/tests/test_helper.h +++ b/paddle/fluid/inference/tests/test_helper.h @@ -34,6 +34,9 @@ bool gpu_place_used(const paddle::PaddlePlace& place) { bool xpu_place_used(const paddle::PaddlePlace& place) { return place == paddle::PaddlePlace::kXPU; } +bool npu_place_used(const paddle::PaddlePlace& place) { + return place == paddle::PaddlePlace::kNPU; +} bool cpu_place_used(const paddle::PaddlePlace& place) { return place == paddle::PaddlePlace::kCPU; } diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index b2572e5aa4b..6a949ba2a60 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -379,7 +379,8 @@ void BindPaddlePlace(py::module *m) { .value("UNK", PaddlePlace::kUNK) .value("CPU", PaddlePlace::kCPU) .value("GPU", PaddlePlace::kGPU) - .value("XPU", PaddlePlace::kXPU); + .value("XPU", PaddlePlace::kXPU) + .value("NPU", PaddlePlace::kNPU); } void BindPaddlePredictor(py::module *m) { @@ -409,6 +410,7 @@ void BindNativeConfig(py::module *m) { .def(py::init<>()) .def_readwrite("use_gpu", &NativeConfig::use_gpu) .def_readwrite("use_xpu", &NativeConfig::use_xpu) + .def_readwrite("use_npu", &NativeConfig::use_npu) .def_readwrite("device", &NativeConfig::device) .def_readwrite("fraction_of_gpu_memory", &NativeConfig::fraction_of_gpu_memory) @@ -471,11 +473,14 @@ void BindAnalysisConfig(py::module *m) { py::arg("locked") = false, py::arg("autotune") = true, py::arg("autotune_file") = "", py::arg("precision") = "int16", py::arg("adaptive_seqlen") = false) + .def("enable_npu", &AnalysisConfig::EnableNpu, py::arg("device_id") = 0) .def("disable_gpu", &AnalysisConfig::DisableGpu) .def("use_gpu", &AnalysisConfig::use_gpu) .def("use_xpu", &AnalysisConfig::use_xpu) + .def("use_npu", &AnalysisConfig::use_npu) .def("gpu_device_id", &AnalysisConfig::gpu_device_id) .def("xpu_device_id", &AnalysisConfig::xpu_device_id) + .def("npu_device_id", &AnalysisConfig::npu_device_id) .def("memory_pool_init_size_mb", &AnalysisConfig::memory_pool_init_size_mb) .def("fraction_of_gpu_memory_for_pool", -- GitLab