未验证 提交 02e5c4be 编写于 作者: R ronnywang 提交者: GitHub

[CustomDevice] add inference support (#42036)

上级 83a4b26a
......@@ -109,7 +109,11 @@ endif()
set_target_properties(paddle_inference_shared PROPERTIES OUTPUT_NAME paddle_inference)
if(NOT APPLE AND NOT WIN32)
# TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac.
if (WITH_CUSTOM_DEVICE)
set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference_custom_device.map")
else()
set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference.map")
endif()
set_target_properties(paddle_inference_shared PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
# check symbol hidden
FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/check_symbol.cmake
......
......@@ -158,6 +158,19 @@ void AnalysisConfig::EnableNpu(int device_id) {
Update();
}
void AnalysisConfig::EnableCustomDevice(const std::string &device_type,
int device_id) {
#ifdef PADDLE_WITH_CUSTOM_DEVICE
use_custom_device_ = true;
custom_device_id_ = device_id;
custom_device_type_ = device_type;
#else
LOG(ERROR) << "Please compile with CustomDevice to EnableCustomDevice()";
use_custom_device_ = false;
#endif
Update();
}
void AnalysisConfig::EnableIpu(int ipu_device_num, int ipu_micro_batch_size,
bool ipu_enable_pipelining,
int ipu_batches_per_step) {
......@@ -324,6 +337,11 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// fleet exe related
CP_MEMBER(dist_config_);
// custom device related.
CP_MEMBER(use_custom_device_);
CP_MEMBER(custom_device_type_);
CP_MEMBER(custom_device_id_);
if (use_gpu_) {
PADDLE_ENFORCE_EQ(use_xpu_, false,
platform::errors::InvalidArgument(
......@@ -539,7 +557,8 @@ void AnalysisConfig::Update() {
if (!pass_builder_ || ((use_gpu() ^ pass_builder_->use_gpu())) ||
((use_xpu() ^ pass_builder_->use_xpu())) ||
((use_npu() ^ pass_builder_->use_npu())) ||
((use_ipu() ^ pass_builder_->use_ipu()))) {
((use_ipu() ^ pass_builder_->use_ipu())) ||
((use_custom_device() ^ pass_builder_->use_custom_device()))) {
if (use_gpu()) {
pass_builder_.reset(new GpuPassStrategy);
......@@ -562,6 +581,12 @@ void AnalysisConfig::Update() {
platform::errors::InvalidArgument(
"Only one choice can be made between GPU and NPU."));
pass_builder_.reset(new NpuPassStrategy);
} else if (use_custom_device()) {
PADDLE_ENFORCE_EQ(
use_gpu(), false,
platform::errors::InvalidArgument(
"Only one choice can be made between GPU and CustomDevice."));
pass_builder_.reset(new CustomDevicePassStrategy);
} else {
pass_builder_.reset(new CpuPassStrategy);
}
......@@ -588,6 +613,13 @@ void AnalysisConfig::Update() {
"Only one choice can be made between GPU and NPU."));
pass_builder_.reset(new NpuPassStrategy(
*static_cast<NpuPassStrategy *>(pass_builder_.get())));
} else if (use_custom_device()) {
PADDLE_ENFORCE_EQ(
use_gpu(), false,
platform::errors::InvalidArgument(
"Only one choice can be made between GPU and CustomDevice."));
pass_builder_.reset(new CustomDevicePassStrategy(
*static_cast<CustomDevicePassStrategy *>(pass_builder_.get())));
} else {
pass_builder_.reset(new CpuPassStrategy(
*static_cast<CpuPassStrategy *>(pass_builder_.get())));
......@@ -733,7 +765,13 @@ void AnalysisConfig::Update() {
"but did not have the option -DWITH_IPU compiled."));
#endif
}
if (use_custom_device_) {
#ifndef PADDLE_WITH_CUSTOM_DEVICE
PADDLE_THROW(platform::errors::Unavailable(
"You tried to enable the custom device "
"but did not have the option -DWITH_CUSTOM_DEVICE compiled."));
#endif
}
if (ir_debug_) {
pass_builder()->TurnOnDebug();
}
......
......@@ -332,6 +332,15 @@ bool AnalysisPredictor::CreateExecutor() {
PADDLE_THROW(platform::errors::Unavailable(
"You tried to use IPU forward propagation, but Paddle was not compiled "
"with WITH_IPU."));
#endif
} else if (config_.use_custom_device()) {
#ifdef PADDLE_WITH_CUSTOM_DEVICE
place_ = paddle::platform::CustomPlace(config_.custom_device_type());
#else
PADDLE_THROW(platform::errors::Unavailable(
"You tried to use CustomDevice forward propagation, but Paddle was not "
"compiled "
"with WITH_CUSTOM_DEVICE."));
#endif
} else {
place_ = paddle::platform::CPUPlace();
......@@ -1241,6 +1250,12 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
} else if (platform::is_npu_place(place_)) {
auto npu_place = place_;
res->SetPlace(PaddlePlace::kNPU, npu_place.GetDeviceId());
} else if (platform::is_custom_place(place_)) {
auto custom_place = place_;
auto paddleplace = static_cast<PaddlePlace>(
static_cast<size_t>(PaddlePlace::kCUSTOM) +
phi::GetOrRegisterGlobalDeviceTypeId(place_.GetDeviceType()));
res->SetPlace(paddleplace, custom_place.GetDeviceId());
} else {
auto gpu_place = place_;
res->SetPlace(PaddlePlace::kGPU, gpu_place.GetDeviceId());
......@@ -1290,6 +1305,12 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
} else if (platform::is_npu_place(place_)) {
auto npu_place = place_;
res->SetPlace(PaddlePlace::kNPU, npu_place.GetDeviceId());
} else if (platform::is_custom_place(place_)) {
auto custom_place = place_;
auto paddleplace = static_cast<PaddlePlace>(
static_cast<size_t>(PaddlePlace::kCUSTOM) +
phi::GetOrRegisterGlobalDeviceTypeId(place_.GetDeviceType()));
res->SetPlace(paddleplace, custom_place.GetDeviceId());
} else {
auto gpu_place = place_;
res->SetPlace(PaddlePlace::kGPU, gpu_place.GetDeviceId());
......
......@@ -224,8 +224,23 @@ void Tensor::CopyFromCpu(const T *data) {
"with NPU."));
#endif
} else {
#ifdef PADDLE_WITH_CUSTOM_DEVICE
auto device_type_id =
static_cast<size_t>(place_) - static_cast<size_t>(PlaceType::kCUSTOM);
paddle::platform::DeviceContextPool &pool =
paddle::platform::DeviceContextPool::Instance();
paddle::platform::CustomPlace custom_place(
phi::GetGlobalDeviceType(device_type_id), device_);
auto *t_data = tensor->mutable_data<T>(custom_place);
auto *dev_ctx = static_cast<const paddle::platform::CustomDeviceContext *>(
pool.Get(custom_place));
paddle::memory::Copy(custom_place, static_cast<void *>(t_data),
paddle::platform::CPUPlace(), data, ele_size,
dev_ctx->stream());
#else
PADDLE_THROW(paddle::platform::errors::InvalidArgument(
"The analysis predictor supports CPU, GPU, NPU and XPU now."));
#endif
}
}
......@@ -398,8 +413,20 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb,
"with NPU."));
#endif
} else {
#ifdef PADDLE_WITH_CUSTOM_DEVICE
paddle::platform::DeviceContextPool &pool =
paddle::platform::DeviceContextPool::Instance();
auto custom_place = t_place;
auto *dev_ctx = static_cast<const paddle::platform::CustomDeviceContext *>(
pool.Get(custom_place));
paddle::memory::Copy(paddle::platform::CPUPlace(),
static_cast<void *>(data), custom_place, t_data,
ele_num * sizeof(T), dev_ctx->stream());
// TODO(wangran16): sync_stream
#else
PADDLE_THROW(paddle::platform::errors::InvalidArgument(
"The analysis predictor supports CPU, GPU, NPU and XPU now."));
#endif
}
}
......
......@@ -332,6 +332,14 @@ struct PD_INFER_DECL AnalysisConfig {
///
void EnableNpu(int device_id = 0);
///
/// \brief Turn on CustomDevice.
///
/// \param device_type device_type the custom device to use.
///
/// \param device_id device_id the custom device to use (default is 0).
///
void EnableCustomDevice(const std::string& device_type, int device_id);
///
/// \brief Turn on ONNXRuntime.
///
void EnableONNXRuntime();
......@@ -366,6 +374,11 @@ struct PD_INFER_DECL AnalysisConfig {
/// \return bool Whether the IPU is turned on.
///
bool use_ipu() const { return use_ipu_; }
/// \brief A boolean state telling whether the CustomDevice is turned on.
///
/// \return bool Whether the CustomDevice is turned on.
///
bool use_custom_device() const { return use_custom_device_; }
///
/// \brief A boolean state telling whether the ONNXRuntime is turned on.
///
......@@ -403,6 +416,17 @@ struct PD_INFER_DECL AnalysisConfig {
///
int ipu_device_num() const { return ipu_device_num_; }
///
/// \brief Get the custom device id.
///
/// \return int The custom device id.
///
int custom_device_id() const { return custom_device_id_; }
/// \brief Get the custom device type.
///
/// \return string The custom device type.
///
std::string custom_device_type() const { return custom_device_type_; }
///
/// \brief Get the initial size in MB of the GPU memory pool.
///
/// \return int The initial size in MB of the GPU memory pool.
......@@ -900,6 +924,11 @@ struct PD_INFER_DECL AnalysisConfig {
bool use_npu_{false};
int npu_device_id_{0};
// CustomDevice related
bool use_custom_device_{false};
int custom_device_id_{0};
std::string custom_device_type_;
// ONNXRuntime related
bool use_onnxruntime_{false};
bool enable_ort_optimization_{false};
......
......@@ -166,6 +166,10 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
/// \return A bool variable implying whether we are in ipu mode.
bool use_ipu() const { return use_ipu_; }
/// \brief Check if we are using CustomDevice.
/// \return A bool variable implying whether we are in CustomDevice mode.
bool use_custom_device() const { return use_custom_device_; }
/// \brief Default destructor.
virtual ~PassStrategy() = default;
......@@ -177,6 +181,7 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
bool use_ipu_{false};
bool use_mkldnn_{false};
bool use_gpu_fp16_{false};
bool use_custom_device_{false};
/// \endcond
};
......@@ -291,6 +296,22 @@ class PD_INFER_DECL NpuPassStrategy final : public PassStrategy {
}
};
/// \class CustomDevicePassStrategy
/// \brief The CustomDevice passes controller, it is used in AnalysisPredictor
/// with CustomDevice
/// mode.
class PD_INFER_DECL CustomDevicePassStrategy final : public PassStrategy {
public:
CustomDevicePassStrategy() : PassStrategy({}) { use_custom_device_ = true; }
/// \brief Construct by copying another CustomDevicePassStrategy object.
/// \param[in] other The CustomDevicePassStrategy object we want to copy.
explicit CustomDevicePassStrategy(const CustomDevicePassStrategy &other)
: PassStrategy(other.AllPasses()) {
use_custom_device_ = true;
}
};
/// \class IpuPassStrategy
/// \brief The IPU passes controller, it is used in AnalysisPredictor with IPU
/// mode.
......
......@@ -54,7 +54,7 @@ enum DataType {
// TODO(Superjomn) support more data types if needed.
};
enum class PlaceType { kUNK = -1, kCPU, kGPU, kXPU, kNPU, kIPU };
enum class PlaceType { kUNK = -1, kCPU, kGPU, kXPU, kNPU, kIPU, kCUSTOM };
enum class DataLayout { kUNK = -1, kAny, kNHWC, kNCHW };
......
{
global:
*paddle*;
*Pass*;
*profile*;
*phi*;
*FLAGS_*;
local:
*;
};
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册