未验证 提交 a85e038a 编写于 作者: 张春乔 提交者: GitHub

rm is_npu_place (#53105)

上级 f2c595a6
...@@ -27,7 +27,6 @@ static inline bool NeedCast(const paddle::Tensor& tensor, ...@@ -27,7 +27,6 @@ static inline bool NeedCast(const paddle::Tensor& tensor,
if (paddle::platform::is_gpu_place(place) || if (paddle::platform::is_gpu_place(place) ||
paddle::platform::is_cuda_pinned_place(place) || paddle::platform::is_cuda_pinned_place(place) ||
paddle::platform::is_xpu_place(place) || paddle::platform::is_xpu_place(place) ||
paddle::platform::is_npu_place(place) ||
paddle::platform::is_npu_pinned_place(place) || paddle::platform::is_npu_pinned_place(place) ||
paddle::platform::is_custom_place(place)) { paddle::platform::is_custom_place(place)) {
// CudaPinndePlace is added for varbase created by dataloader // CudaPinndePlace is added for varbase created by dataloader
......
...@@ -96,7 +96,6 @@ inline phi::DataType GetDtypeWithPlace( ...@@ -96,7 +96,6 @@ inline phi::DataType GetDtypeWithPlace(
is_right_place = (paddle::platform::is_gpu_place(place) || is_right_place = (paddle::platform::is_gpu_place(place) ||
paddle::platform::is_cuda_pinned_place(place) || paddle::platform::is_cuda_pinned_place(place) ||
paddle::platform::is_xpu_place(place) || paddle::platform::is_xpu_place(place) ||
paddle::platform::is_npu_place(place) ||
paddle::platform::is_npu_pinned_place(place) || paddle::platform::is_npu_pinned_place(place) ||
paddle::platform::is_custom_place(place)); paddle::platform::is_custom_place(place));
if (is_right_place) { if (is_right_place) {
......
...@@ -27,7 +27,6 @@ static inline bool NeedCast(const paddle::Tensor& tensor, ...@@ -27,7 +27,6 @@ static inline bool NeedCast(const paddle::Tensor& tensor,
if (paddle::platform::is_gpu_place(place) || if (paddle::platform::is_gpu_place(place) ||
paddle::platform::is_cuda_pinned_place(place) || paddle::platform::is_cuda_pinned_place(place) ||
paddle::platform::is_xpu_place(place) || paddle::platform::is_xpu_place(place) ||
paddle::platform::is_npu_place(place) ||
paddle::platform::is_npu_pinned_place(place) || paddle::platform::is_npu_pinned_place(place) ||
paddle::platform::is_custom_place(place) || paddle::platform::is_custom_place(place) ||
paddle::platform::is_cpu_place(place)) { paddle::platform::is_cpu_place(place)) {
......
...@@ -29,7 +29,7 @@ void TransDataDevice(const phi::DenseTensor &in, ...@@ -29,7 +29,7 @@ void TransDataDevice(const phi::DenseTensor &in,
"supported between CPU and CUDA.")); "supported between CPU and CUDA."));
// NOTE(zhiqiu): Special case for CPU->NPU, avoid stream sync. // NOTE(zhiqiu): Special case for CPU->NPU, avoid stream sync.
if (platform::is_cpu_place(in.place()) && platform::is_npu_place(dst_place)) { if (platform::is_cpu_place(in.place())) {
paddle::framework::TensorCopy( paddle::framework::TensorCopy(
in, in,
dst_place, dst_place,
......
...@@ -227,8 +227,7 @@ void DataTranferHelper::RunAndConstructOpFuncNode( ...@@ -227,8 +227,7 @@ void DataTranferHelper::RunAndConstructOpFuncNode(
// NOTE(winter-wang): in npu and custom device, D2H kernel is asynchronous. // NOTE(winter-wang): in npu and custom device, D2H kernel is asynchronous.
// need to explicit synchronization. // need to explicit synchronization.
if ((platform::is_npu_place(place) || platform::is_custom_place(place)) && if ((platform::is_custom_place(place)) && op_type == kMemcpyD2H) {
op_type == kMemcpyD2H) {
dev_ctx->Wait(); dev_ctx->Wait();
} }
...@@ -419,7 +418,6 @@ std::shared_ptr<OperatorBase> TransferDevice(const std::string& var_name, ...@@ -419,7 +418,6 @@ std::shared_ptr<OperatorBase> TransferDevice(const std::string& var_name,
if (IsSupportedHeterPlace(dst_place)) { if (IsSupportedHeterPlace(dst_place)) {
op_type = kMemcpyH2D; op_type = kMemcpyH2D;
int dst_place_type = platform::is_gpu_place(dst_place) ? 0 int dst_place_type = platform::is_gpu_place(dst_place) ? 0
: platform::is_npu_place(dst_place) ? 1
: platform::is_ipu_place(dst_place) ? 3 : platform::is_ipu_place(dst_place) ? 3
: platform::is_xpu_place(dst_place) ? 2 : platform::is_xpu_place(dst_place) ? 2
: platform::is_custom_place(dst_place) ? 6 : platform::is_custom_place(dst_place) ? 6
......
...@@ -146,9 +146,8 @@ bool IsGradOp(const std::string& op_name) { ...@@ -146,9 +146,8 @@ bool IsGradOp(const std::string& op_name) {
} }
bool IsSupportedHeterPlace(const phi::Place& place) { bool IsSupportedHeterPlace(const phi::Place& place) {
return platform::is_gpu_place(place) || platform::is_npu_place(place) || return platform::is_gpu_place(place) || platform::is_xpu_place(place) ||
platform::is_xpu_place(place) || platform::is_ipu_place(place) || platform::is_ipu_place(place) || platform::is_custom_place(place);
platform::is_custom_place(place);
} }
bool IsMemcpyD2H(const Instruction& instr) { bool IsMemcpyD2H(const Instruction& instr) {
......
...@@ -152,8 +152,7 @@ DeviceContext* StreamAnalyzer::ParseDeviceContext( ...@@ -152,8 +152,7 @@ DeviceContext* StreamAnalyzer::ParseDeviceContext(
// only gpu/npu need update. xpu not need, because xpu memcpy op kernel is // only gpu/npu need update. xpu not need, because xpu memcpy op kernel is
// synchronous. // synchronous.
if (platform::is_gpu_place(place_) || platform::is_npu_place(place_) || if (platform::is_gpu_place(place_) || platform::is_custom_place(place_)) {
platform::is_custom_place(place_)) {
VLOG(6) << "Parse DeviceContext for " << op_type VLOG(6) << "Parse DeviceContext for " << op_type
<< ", execution stream = " << execution_stream; << ", execution stream = " << execution_stream;
if (execution_stream != kDefaultStream) { if (execution_stream != kDefaultStream) {
...@@ -447,8 +446,6 @@ platform::DeviceType StreamAnalyzer::GetWaiterType( ...@@ -447,8 +446,6 @@ platform::DeviceType StreamAnalyzer::GetWaiterType(
} else { } else {
if (platform::is_xpu_place(place_)) { if (platform::is_xpu_place(place_)) {
return platform::kXPU; return platform::kXPU;
} else if (platform::is_npu_place(place_)) {
return platform::kNPU;
} else if (platform::is_custom_place(place_)) { } else if (platform::is_custom_place(place_)) {
return platform::kCUSTOM_DEVICE; return platform::kCUSTOM_DEVICE;
} }
...@@ -464,7 +461,7 @@ DownstreamRunType StreamAnalyzer::AnalyseRunTypeForTwoInstructions( ...@@ -464,7 +461,7 @@ DownstreamRunType StreamAnalyzer::AnalyseRunTypeForTwoInstructions(
} }
// npu d2h kernel is asynchronous. // npu d2h kernel is asynchronous.
if (platform::is_npu_place(place_) || platform::is_custom_place(place_)) { if (platform::is_custom_place(place_)) {
if (interpreter::IsCpuOp(cur_instr) || if (interpreter::IsCpuOp(cur_instr) ||
interpreter::IsMemcpyH2D(next_instr)) { interpreter::IsMemcpyH2D(next_instr)) {
return DownstreamRunType::kDirectRun; return DownstreamRunType::kDirectRun;
......
...@@ -672,7 +672,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places, ...@@ -672,7 +672,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
const BuildStrategy &build_strategy, const BuildStrategy &build_strategy,
ir::Graph *graph) ir::Graph *graph)
: member_(new ParallelExecutorPrivate(places, scope)) { : member_(new ParallelExecutorPrivate(places, scope)) {
PADDLE_ENFORCE_EQ(places.size() > 0 && !platform::is_npu_place(places[0]), PADDLE_ENFORCE_EQ(places.size() > 0,
true, true,
platform::errors::Unavailable( platform::errors::Unavailable(
"NPU is not supported in ParallelExecutor.")); "NPU is not supported in ParallelExecutor."));
......
...@@ -275,7 +275,7 @@ void TensorCopyImpl(const TENSOR& src, ...@@ -275,7 +275,7 @@ void TensorCopyImpl(const TENSOR& src,
TENSOR* dst) { TENSOR* dst) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
const platform::DeviceContext* dev_ctx; const platform::DeviceContext* dev_ctx;
if (platform::is_gpu_place(dst_place) || platform::is_npu_place(dst_place) || if (platform::is_gpu_place(dst_place) ||
platform::is_custom_place(dst_place)) { platform::is_custom_place(dst_place)) {
dev_ctx = pool.Get(dst_place); dev_ctx = pool.Get(dst_place);
} else { } else {
...@@ -614,7 +614,6 @@ void TensorFromStream(std::istream& is, ...@@ -614,7 +614,6 @@ void TensorFromStream(std::istream& is,
size_t size = tensor->numel() * framework::SizeOfType(desc.data_type()); size_t size = tensor->numel() * framework::SizeOfType(desc.data_type());
if (platform::is_gpu_place(dev_ctx.GetPlace()) || if (platform::is_gpu_place(dev_ctx.GetPlace()) ||
platform::is_xpu_place(dev_ctx.GetPlace()) || platform::is_xpu_place(dev_ctx.GetPlace()) ||
platform::is_npu_place(dev_ctx.GetPlace()) ||
platform::is_custom_place(dev_ctx.GetPlace())) { platform::is_custom_place(dev_ctx.GetPlace())) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \ #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \
defined(PADDLE_WITH_XPU) || defined(PADDLE_WITH_CUSTOM_DEVICE) defined(PADDLE_WITH_XPU) || defined(PADDLE_WITH_CUSTOM_DEVICE)
...@@ -626,8 +625,7 @@ void TensorFromStream(std::istream& is, ...@@ -626,8 +625,7 @@ void TensorFromStream(std::istream& is,
is.read(static_cast<char*>(buf), size); is.read(static_cast<char*>(buf), size);
auto dst_place = dev_ctx.GetPlace(); auto dst_place = dev_ctx.GetPlace();
framework::TensorCopy(cpu_tensor, dst_place, dev_ctx, tensor); framework::TensorCopy(cpu_tensor, dst_place, dev_ctx, tensor);
if (platform::is_npu_place(dev_ctx.GetPlace()) || if (platform::is_custom_place(dev_ctx.GetPlace())) {
platform::is_custom_place(dev_ctx.GetPlace())) {
dev_ctx.Wait(); dev_ctx.Wait();
} }
#else #else
...@@ -689,7 +687,6 @@ void TensorFromStream(std::istream& is, ...@@ -689,7 +687,6 @@ void TensorFromStream(std::istream& is,
size_t size = tensor->numel() * framework::SizeOfType(desc.data_type()); size_t size = tensor->numel() * framework::SizeOfType(desc.data_type());
if (platform::is_gpu_place(dev_ctx.GetPlace()) || if (platform::is_gpu_place(dev_ctx.GetPlace()) ||
platform::is_xpu_place(dev_ctx.GetPlace()) || platform::is_xpu_place(dev_ctx.GetPlace()) ||
platform::is_npu_place(dev_ctx.GetPlace()) ||
platform::is_custom_place(dev_ctx.GetPlace())) { platform::is_custom_place(dev_ctx.GetPlace())) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \ #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \
defined(PADDLE_WITH_XPU) || defined(PADDLE_WITH_CUSTOM_DEVICE) defined(PADDLE_WITH_XPU) || defined(PADDLE_WITH_CUSTOM_DEVICE)
...@@ -701,8 +698,7 @@ void TensorFromStream(std::istream& is, ...@@ -701,8 +698,7 @@ void TensorFromStream(std::istream& is,
is.read(static_cast<char*>(buf), size); is.read(static_cast<char*>(buf), size);
auto dst_place = dev_ctx.GetPlace(); auto dst_place = dev_ctx.GetPlace();
framework::TensorCopy(cpu_tensor, dst_place, dev_ctx, tensor); framework::TensorCopy(cpu_tensor, dst_place, dev_ctx, tensor);
if (platform::is_npu_place(dev_ctx.GetPlace()) || if (platform::is_custom_place(dev_ctx.GetPlace())) {
platform::is_custom_place(dev_ctx.GetPlace())) {
dev_ctx.Wait(); dev_ctx.Wait();
} }
#else #else
...@@ -712,9 +708,6 @@ void TensorFromStream(std::istream& is, ...@@ -712,9 +708,6 @@ void TensorFromStream(std::istream& is,
} else if (platform::is_xpu_place(dev_ctx.GetPlace())) { } else if (platform::is_xpu_place(dev_ctx.GetPlace())) {
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unimplemented(
"XPUPlace is not supported when not compiled with XPU")); "XPUPlace is not supported when not compiled with XPU"));
} else if (platform::is_npu_place(dev_ctx.GetPlace())) {
PADDLE_THROW(platform::errors::Unimplemented(
"NPUPlace is not supported when not compiled with NPU"));
} else { } else {
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unimplemented(
"CutomPlace is not supported when not compiled with CustomDevice")); "CutomPlace is not supported when not compiled with CustomDevice"));
......
...@@ -51,7 +51,6 @@ OpSupportedInfos(const std::string& place, ...@@ -51,7 +51,6 @@ OpSupportedInfos(const std::string& place,
{"GPU", &platform::is_gpu_place}, {"GPU", &platform::is_gpu_place},
{"CPU", &platform::is_cpu_place}, {"CPU", &platform::is_cpu_place},
{"XPU", &platform::is_xpu_place}, {"XPU", &platform::is_xpu_place},
{"NPU", &platform::is_npu_place},
}; };
PADDLE_ENFORCE_NE(is_target_place.count(query_place), PADDLE_ENFORCE_NE(is_target_place.count(query_place),
0, 0,
...@@ -245,7 +244,6 @@ inline bool NeedCast(const std::shared_ptr<VarType>& var) { ...@@ -245,7 +244,6 @@ inline bool NeedCast(const std::shared_ptr<VarType>& var) {
paddle::platform::is_cuda_pinned_place(place) || paddle::platform::is_cuda_pinned_place(place) ||
paddle::platform::is_xpu_place(place) || paddle::platform::is_xpu_place(place) ||
paddle::platform::is_custom_place(place) || paddle::platform::is_custom_place(place) ||
paddle::platform::is_npu_place(place) ||
paddle::platform::is_npu_pinned_place(place)) { paddle::platform::is_npu_pinned_place(place)) {
// CudaPinndePlace is added for varbase created by dataloader // CudaPinndePlace is added for varbase created by dataloader
if (data_type == paddle::framework::proto::VarType::FP32 || if (data_type == paddle::framework::proto::VarType::FP32 ||
......
...@@ -42,9 +42,6 @@ void Group::DivNRanks(const platform::DeviceContext &context, int64_t nranks) { ...@@ -42,9 +42,6 @@ void Group::DivNRanks(const platform::DeviceContext &context, int64_t nranks) {
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
DivNRanks(tensor, nranks, context); DivNRanks(tensor, nranks, context);
#endif #endif
} else if (platform::is_npu_place(tensor->place())) {
// TODO(kuizhiqing)
VLOG(4) << "divnrank for npu not support yet";
} else if (platform::is_cpu_place(tensor->place())) { } else if (platform::is_cpu_place(tensor->place())) {
VLOG(4) << "before div 2" << *tensor; VLOG(4) << "before div 2" << *tensor;
VLOG(4) << "NDiv for cpu devices : rank = " << nranks; VLOG(4) << "NDiv for cpu devices : rank = " << nranks;
......
...@@ -135,11 +135,6 @@ paddle::framework::GarbageCollector* Tracer::MutableGarbageCollectorIfNotExists( ...@@ -135,11 +135,6 @@ paddle::framework::GarbageCollector* Tracer::MutableGarbageCollectorIfNotExists(
} else if (platform::is_cpu_place(place)) { } else if (platform::is_cpu_place(place)) {
gc.reset(new framework::CPUGarbageCollector(place, 0)); gc.reset(new framework::CPUGarbageCollector(place, 0));
VLOG(10) << "Created GarbageCollector at " << place; VLOG(10) << "Created GarbageCollector at " << place;
} else if (platform::is_npu_place(place)) {
PADDLE_THROW(platform::errors::PermissionDenied(
"Paddle can't use NPU device since it's not compiled with NPU,"
"Please recompile or reinstall Paddle with NPU support."));
} else if (platform::is_ipu_place(place)) { } else if (platform::is_ipu_place(place)) {
#if defined(PADDLE_WITH_IPU) #if defined(PADDLE_WITH_IPU)
gc.reset(new framework::IPUGarbageCollector(place, 0)); gc.reset(new framework::IPUGarbageCollector(place, 0));
...@@ -289,9 +284,6 @@ void Tracer::TraceOpImpl(const std::string& type, ...@@ -289,9 +284,6 @@ void Tracer::TraceOpImpl(const std::string& type,
PADDLE_THROW(platform::errors::PreconditionNotMet( PADDLE_THROW(platform::errors::PreconditionNotMet(
"PaddlePaddle should compile with XPU if use XPUPlace.")); "PaddlePaddle should compile with XPU if use XPUPlace."));
#endif #endif
} else if (platform::is_npu_place(place)) {
PADDLE_THROW(platform::errors::PreconditionNotMet(
"PaddlePaddle should compile with NPU if use NPUPlace."));
} else if (platform::is_custom_place(place)) { } else if (platform::is_custom_place(place)) {
#ifdef PADDLE_WITH_CUSTOM_DEVICE #ifdef PADDLE_WITH_CUSTOM_DEVICE
phi::DeviceManager::SetDevice(place); phi::DeviceManager::SetDevice(place);
......
...@@ -1874,9 +1874,6 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor( ...@@ -1874,9 +1874,6 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
auto xpu_place = place_; auto xpu_place = place_;
res->SetPlace(PaddlePlace::kXPU, xpu_place.GetDeviceId()); res->SetPlace(PaddlePlace::kXPU, xpu_place.GetDeviceId());
} }
} else if (platform::is_npu_place(place_)) {
auto npu_place = place_;
res->SetPlace(PaddlePlace::kNPU, npu_place.GetDeviceId());
} else if (platform::is_custom_place(place_)) { } else if (platform::is_custom_place(place_)) {
auto custom_place = place_; auto custom_place = place_;
auto paddleplace = static_cast<PaddlePlace>( auto paddleplace = static_cast<PaddlePlace>(
...@@ -1931,9 +1928,6 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor( ...@@ -1931,9 +1928,6 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
auto xpu_place = place_; auto xpu_place = place_;
res->SetPlace(PaddlePlace::kXPU, xpu_place.GetDeviceId()); res->SetPlace(PaddlePlace::kXPU, xpu_place.GetDeviceId());
} }
} else if (platform::is_npu_place(place_)) {
auto npu_place = place_;
res->SetPlace(PaddlePlace::kNPU, npu_place.GetDeviceId());
} else if (platform::is_custom_place(place_)) { } else if (platform::is_custom_place(place_)) {
auto custom_place = place_; auto custom_place = place_;
auto paddleplace = static_cast<PaddlePlace>( auto paddleplace = static_cast<PaddlePlace>(
......
...@@ -152,8 +152,6 @@ T *Tensor::data(PlaceType *place, int *size) const { ...@@ -152,8 +152,6 @@ T *Tensor::data(PlaceType *place, int *size) const {
*place = PlaceType::kGPU; *place = PlaceType::kGPU;
} else if (paddle::platform::is_xpu_place(tensor->place())) { } else if (paddle::platform::is_xpu_place(tensor->place())) {
*place = PlaceType::kXPU; *place = PlaceType::kXPU;
} else if (paddle::platform::is_npu_place(tensor->place())) {
*place = PlaceType::kNPU;
} else if (paddle::platform::is_custom_place(tensor->place())) { } else if (paddle::platform::is_custom_place(tensor->place())) {
*place = PlaceType::kCUSTOM; *place = PlaceType::kCUSTOM;
} else { } else {
......
...@@ -52,7 +52,6 @@ class ArrayOp : public framework::OperatorBase { ...@@ -52,7 +52,6 @@ class ArrayOp : public framework::OperatorBase {
size_t offset; size_t offset;
if (platform::is_gpu_place(i_tensor.place()) || if (platform::is_gpu_place(i_tensor.place()) ||
platform::is_xpu_place(i_tensor.place()) || platform::is_xpu_place(i_tensor.place()) ||
platform::is_npu_place(i_tensor.place()) ||
platform::is_custom_place(i_tensor.place())) { platform::is_custom_place(i_tensor.place())) {
// FIXME: Avoid copy from GPU to CPU // FIXME: Avoid copy from GPU to CPU
phi::DenseTensor t; phi::DenseTensor t;
......
...@@ -36,8 +36,7 @@ struct BeamSearchDecodeFunctor { ...@@ -36,8 +36,7 @@ struct BeamSearchDecodeFunctor {
tensor_on_gpu_ = false; tensor_on_gpu_ = false;
tensor_on_npu_ = false; tensor_on_npu_ = false;
// First make a copy of GPU data on CPU // First make a copy of GPU data on CPU
if (platform::is_gpu_place(step_ids_origin_[0].place()) || if (platform::is_gpu_place(step_ids_origin_[0].place())) {
platform::is_npu_place(step_ids_origin_[0].place())) {
if (platform::is_gpu_place(step_ids_origin_[0].place())) { if (platform::is_gpu_place(step_ids_origin_[0].place())) {
tensor_on_gpu_ = true; tensor_on_gpu_ = true;
} else { } else {
...@@ -61,8 +60,7 @@ struct BeamSearchDecodeFunctor { ...@@ -61,8 +60,7 @@ struct BeamSearchDecodeFunctor {
step_ids_.push_back(out); step_ids_.push_back(out);
} }
} }
if (platform::is_gpu_place(step_scores_origin_[0].place()) || if (platform::is_gpu_place(step_scores_origin_[0].place())) {
platform::is_npu_place(step_scores_origin_[0].place())) {
if (platform::is_gpu_place(step_scores_origin_[0].place())) { if (platform::is_gpu_place(step_scores_origin_[0].place())) {
tensor_on_gpu_ = true; tensor_on_gpu_ = true;
} else { } else {
......
...@@ -83,7 +83,6 @@ class ConditionalOp : public framework::OperatorBase { ...@@ -83,7 +83,6 @@ class ConditionalOp : public framework::OperatorBase {
platform::DeviceContextPool::Instance().Get(ips[0]->place())->Wait(); platform::DeviceContextPool::Instance().Get(ips[0]->place())->Wait();
res = cpu_tensor.data<bool>()[0]; res = cpu_tensor.data<bool>()[0];
#endif #endif
} else if (platform::is_npu_place(ips[0]->place())) {
} else if (platform::is_xpu_place(ips[0]->place())) { } else if (platform::is_xpu_place(ips[0]->place())) {
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
phi::DenseTensor cpu_tensor; phi::DenseTensor cpu_tensor;
......
...@@ -225,7 +225,6 @@ bool GetCondData(const phi::DenseTensor &cond) { ...@@ -225,7 +225,6 @@ bool GetCondData(const phi::DenseTensor &cond) {
return cond.data<bool>()[0]; return cond.data<bool>()[0];
} }
// when platform::is_gpu_place(cond.place()) or // when platform::is_gpu_place(cond.place()) or
// platform::is_npu_place(cond.place()) or
// platform::is_xpu_place(cond.place()) is true // platform::is_xpu_place(cond.place()) is true
std::unique_ptr<phi::DenseTensor> cpu_cond{new phi::DenseTensor()}; std::unique_ptr<phi::DenseTensor> cpu_cond{new phi::DenseTensor()};
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \ #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \
......
...@@ -75,7 +75,6 @@ phi::KernelKey GetReduceExpectedKernelType( ...@@ -75,7 +75,6 @@ phi::KernelKey GetReduceExpectedKernelType(
if (input_data_type == framework::proto::VarType::FP16) { if (input_data_type == framework::proto::VarType::FP16) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx.GetPlace()) || platform::is_gpu_place(ctx.GetPlace()) ||
platform::is_npu_place(ctx.GetPlace()) ||
platform::is_xpu_place(ctx.GetPlace()) || platform::is_xpu_place(ctx.GetPlace()) ||
platform::is_custom_place(ctx.GetPlace()), platform::is_custom_place(ctx.GetPlace()),
true, true,
......
...@@ -384,8 +384,6 @@ void BufferedReader::ReadNextImpl(paddle::framework::LoDTensorArray *out) { ...@@ -384,8 +384,6 @@ void BufferedReader::ReadNextImpl(paddle::framework::LoDTensorArray *out) {
if (platform::is_gpu_place(place_)) { if (platform::is_gpu_place(place_)) {
*out = std::move(cuda_buffer_[i]); *out = std::move(cuda_buffer_[i]);
} else if (platform::is_npu_place(place_)) {
*out = std::move(npu_buffer_[i]);
} else if (platform::is_xpu_place(place_)) { } else if (platform::is_xpu_place(place_)) {
*out = std::move(xpu_buffer_[i]); *out = std::move(xpu_buffer_[i]);
} else if (platform::is_custom_place(place_)) { } else if (platform::is_custom_place(place_)) {
......
...@@ -624,7 +624,6 @@ class ReduceBaseOp : public framework::OperatorWithKernel { ...@@ -624,7 +624,6 @@ class ReduceBaseOp : public framework::OperatorWithKernel {
if (input_data_type == framework::proto::VarType::FP16) { if (input_data_type == framework::proto::VarType::FP16) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx.GetPlace()) || platform::is_gpu_place(ctx.GetPlace()) ||
platform::is_npu_place(ctx.GetPlace()) ||
platform::is_xpu_place(ctx.GetPlace()) || platform::is_xpu_place(ctx.GetPlace()) ||
platform::is_custom_place(ctx.GetPlace()), platform::is_custom_place(ctx.GetPlace()),
true, true,
......
...@@ -43,7 +43,6 @@ class SoftmaxOp : public framework::OperatorWithKernel { ...@@ -43,7 +43,6 @@ class SoftmaxOp : public framework::OperatorWithKernel {
if (input_data_type == framework::proto::VarType::FP16) { if (input_data_type == framework::proto::VarType::FP16) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx.GetPlace()) || platform::is_gpu_place(ctx.GetPlace()) ||
platform::is_npu_place(ctx.GetPlace()) ||
platform::is_xpu_place(ctx.GetPlace()) || platform::is_xpu_place(ctx.GetPlace()) ||
platform::is_custom_place(ctx.GetPlace()), platform::is_custom_place(ctx.GetPlace()),
true, true,
...@@ -128,7 +127,6 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { ...@@ -128,7 +127,6 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
ctx, framework::GradVarName("Out")); ctx, framework::GradVarName("Out"));
if (input_data_type == framework::proto::VarType::FP16) { if (input_data_type == framework::proto::VarType::FP16) {
if (!(platform::is_gpu_place(ctx.GetPlace()) || if (!(platform::is_gpu_place(ctx.GetPlace()) ||
platform::is_npu_place(ctx.GetPlace()) ||
platform::is_xpu_place(ctx.GetPlace()) || platform::is_xpu_place(ctx.GetPlace()) ||
platform::is_custom_place(ctx.GetPlace()))) platform::is_custom_place(ctx.GetPlace())))
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
......
...@@ -29,8 +29,7 @@ inline std::vector<int> get_repeat_times( ...@@ -29,8 +29,7 @@ inline std::vector<int> get_repeat_times(
auto* repeat_data = repeat_tensor->data<int>(); auto* repeat_data = repeat_tensor->data<int>();
phi::DenseTensor cpu_repeat_tensor; phi::DenseTensor cpu_repeat_tensor;
if (platform::is_gpu_place(repeat_tensor->place()) || if (platform::is_gpu_place(repeat_tensor->place()) ||
platform::is_xpu_place(repeat_tensor->place()) || platform::is_xpu_place(repeat_tensor->place())) {
platform::is_npu_place(repeat_tensor->place())) {
paddle::framework::TensorCopySync( paddle::framework::TensorCopySync(
*repeat_tensor, platform::CPUPlace(), &cpu_repeat_tensor); *repeat_tensor, platform::CPUPlace(), &cpu_repeat_tensor);
repeat_data = cpu_repeat_tensor.data<int>(); repeat_data = cpu_repeat_tensor.data<int>();
...@@ -48,8 +47,7 @@ inline std::vector<int> get_repeat_times( ...@@ -48,8 +47,7 @@ inline std::vector<int> get_repeat_times(
for (size_t i = 0; i < list_repeat_times_tensor.size(); ++i) { for (size_t i = 0; i < list_repeat_times_tensor.size(); ++i) {
auto tensor = list_repeat_times_tensor[i]; auto tensor = list_repeat_times_tensor[i];
if (platform::is_gpu_place(tensor->place()) || if (platform::is_gpu_place(tensor->place()) ||
platform::is_xpu_place(tensor->place()) || platform::is_xpu_place(tensor->place())) {
platform::is_npu_place(tensor->place())) {
phi::DenseTensor temp; phi::DenseTensor temp;
paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp);
vec_repeat_times.push_back(*temp.data<int32_t>()); vec_repeat_times.push_back(*temp.data<int32_t>());
......
...@@ -45,8 +45,6 @@ DeviceType Place2DeviceType(const platform::Place& place) { ...@@ -45,8 +45,6 @@ DeviceType Place2DeviceType(const platform::Place& place) {
return platform::DeviceType::XPU; return platform::DeviceType::XPU;
} else if (platform::is_ipu_place(place)) { } else if (platform::is_ipu_place(place)) {
return platform::DeviceType::IPU; return platform::DeviceType::IPU;
} else if (platform::is_npu_place(place)) {
return platform::DeviceType::NPU;
} else if (platform::is_custom_place(place)) { } else if (platform::is_custom_place(place)) {
return platform::DeviceType::CUSTOM_DEVICE; return platform::DeviceType::CUSTOM_DEVICE;
} else { } else {
......
...@@ -33,10 +33,6 @@ bool is_xpu_place(const Place &p) { ...@@ -33,10 +33,6 @@ bool is_xpu_place(const Place &p) {
return p.GetType() == phi::AllocationType::XPU; return p.GetType() == phi::AllocationType::XPU;
} }
bool is_npu_place(const Place &p) {
return p.GetType() == phi::AllocationType::NPU;
}
bool is_ipu_place(const Place &p) { bool is_ipu_place(const Place &p) {
return p.GetType() == phi::AllocationType::IPU; return p.GetType() == phi::AllocationType::IPU;
} }
...@@ -73,8 +69,6 @@ bool is_same_place(const Place &p1, const Place &p2) { ...@@ -73,8 +69,6 @@ bool is_same_place(const Place &p1, const Place &p2) {
return true; return true;
} else if (is_xpu_place(p1)) { } else if (is_xpu_place(p1)) {
return p1 == p2; return p1 == p2;
} else if (is_npu_place(p1)) {
return p1 == p2;
} else if (is_ipu_place(p1)) { } else if (is_ipu_place(p1)) {
return p1 == p2; return p1 == p2;
} else if (is_custom_place(p1)) { } else if (is_custom_place(p1)) {
...@@ -93,8 +87,6 @@ std::string PlaceHelper::GetDeviceType(const Place &place) { ...@@ -93,8 +87,6 @@ std::string PlaceHelper::GetDeviceType(const Place &place) {
return "cpu"; return "cpu";
} else if (is_gpu_place(place)) { } else if (is_gpu_place(place)) {
return "gpu"; return "gpu";
} else if (is_npu_place(place)) {
return "npu";
} else if (is_xpu_place(place)) { } else if (is_xpu_place(place)) {
return "xpu"; return "xpu";
} else if (is_custom_place(place)) { } else if (is_custom_place(place)) {
......
...@@ -47,7 +47,6 @@ class PlaceHelper { ...@@ -47,7 +47,6 @@ class PlaceHelper {
bool is_gpu_place(const Place &); bool is_gpu_place(const Place &);
bool is_xpu_place(const Place &); bool is_xpu_place(const Place &);
bool is_npu_place(const Place &);
bool is_ipu_place(const Place &); bool is_ipu_place(const Place &);
bool is_cpu_place(const Place &); bool is_cpu_place(const Place &);
bool is_cuda_pinned_place(const Place &); bool is_cuda_pinned_place(const Place &);
......
...@@ -153,8 +153,6 @@ void InitTensorWithNumpyValue(TensorObject* self, ...@@ -153,8 +153,6 @@ void InitTensorWithNumpyValue(TensorObject* self,
} else if (platform::is_cuda_pinned_place(place)) { } else if (platform::is_cuda_pinned_place(place)) {
SetTensorFromPyArray<platform::CUDAPinnedPlace>( SetTensorFromPyArray<platform::CUDAPinnedPlace>(
impl_ptr, array, place, zero_copy); impl_ptr, array, place, zero_copy);
} else if (platform::is_npu_place(place)) {
SetTensorFromPyArray<platform::NPUPlace>(impl_ptr, array, place, zero_copy);
} else if (platform::is_custom_place(place)) { } else if (platform::is_custom_place(place)) {
SetTensorFromPyArray<platform::CustomPlace>( SetTensorFromPyArray<platform::CustomPlace>(
impl_ptr, array, place, zero_copy); impl_ptr, array, place, zero_copy);
......
...@@ -102,8 +102,6 @@ void InitTensorWithNumpyValue(const py::object& array, ...@@ -102,8 +102,6 @@ void InitTensorWithNumpyValue(const py::object& array,
} else if (platform::is_cuda_pinned_place(place)) { } else if (platform::is_cuda_pinned_place(place)) {
SetTensorFromPyArray<platform::CUDAPinnedPlace>( SetTensorFromPyArray<platform::CUDAPinnedPlace>(
impl_ptr, array, place, zero_copy); impl_ptr, array, place, zero_copy);
} else if (platform::is_npu_place(place)) {
SetTensorFromPyArray<platform::NPUPlace>(impl_ptr, array, place, zero_copy);
} else if (platform::is_custom_place(place)) { } else if (platform::is_custom_place(place)) {
SetTensorFromPyArray<platform::CustomPlace>( SetTensorFromPyArray<platform::CustomPlace>(
impl_ptr, array, place, zero_copy); impl_ptr, array, place, zero_copy);
......
...@@ -199,8 +199,6 @@ static void InitVarBaseAndTensor(imperative::VarBase *self, ...@@ -199,8 +199,6 @@ static void InitVarBaseAndTensor(imperative::VarBase *self,
} else if (platform::is_cuda_pinned_place(place)) { } else if (platform::is_cuda_pinned_place(place)) {
SetTensorFromPyArray<platform::CUDAPinnedPlace>( SetTensorFromPyArray<platform::CUDAPinnedPlace>(
tensor, array, place, zero_copy); tensor, array, place, zero_copy);
} else if (platform::is_npu_place(place)) {
SetTensorFromPyArray<platform::NPUPlace>(tensor, array, place, zero_copy);
} else if (platform::is_ipu_place(place)) { } else if (platform::is_ipu_place(place)) {
SetTensorFromPyArray<platform::IPUPlace>(tensor, array, place, zero_copy); SetTensorFromPyArray<platform::IPUPlace>(tensor, array, place, zero_copy);
} else if (platform::is_custom_place(place)) { } else if (platform::is_custom_place(place)) {
......
...@@ -648,8 +648,6 @@ void BindPlace(pybind11::module &m) { // NOLINT ...@@ -648,8 +648,6 @@ void BindPlace(pybind11::module &m) { // NOLINT
[](platform::Place &self) { return platform::is_cpu_place(self); }) [](platform::Place &self) { return platform::is_cpu_place(self); })
.def("is_xpu_place", .def("is_xpu_place",
[](platform::Place &self) { return platform::is_xpu_place(self); }) [](platform::Place &self) { return platform::is_xpu_place(self); })
.def("is_npu_place",
[](platform::Place &self) { return platform::is_npu_place(self); })
.def("is_ipu_place", .def("is_ipu_place",
[](platform::Place &self) { return platform::is_ipu_place(self); }) [](platform::Place &self) { return platform::is_ipu_place(self); })
.def("is_cuda_pinned_place", .def("is_cuda_pinned_place",
......
...@@ -960,7 +960,6 @@ inline py::array TensorToPyArray(const phi::DenseTensor &tensor, ...@@ -960,7 +960,6 @@ inline py::array TensorToPyArray(const phi::DenseTensor &tensor,
} }
bool is_gpu_tensor = platform::is_gpu_place(tensor.place()); bool is_gpu_tensor = platform::is_gpu_place(tensor.place());
bool is_xpu_tensor = platform::is_xpu_place(tensor.place()); bool is_xpu_tensor = platform::is_xpu_place(tensor.place());
bool is_npu_tensor = platform::is_npu_place(tensor.place());
bool is_custom_device_tensor = platform::is_custom_place(tensor.place()); bool is_custom_device_tensor = platform::is_custom_place(tensor.place());
const auto &tensor_dims = tensor.dims(); const auto &tensor_dims = tensor.dims();
auto tensor_dtype = framework::TransToProtoVarType(tensor.dtype()); auto tensor_dtype = framework::TransToProtoVarType(tensor.dtype());
...@@ -981,8 +980,7 @@ inline py::array TensorToPyArray(const phi::DenseTensor &tensor, ...@@ -981,8 +980,7 @@ inline py::array TensorToPyArray(const phi::DenseTensor &tensor,
std::string py_dtype_str = details::TensorDTypeToPyDTypeStr( std::string py_dtype_str = details::TensorDTypeToPyDTypeStr(
framework::TransToProtoVarType(tensor.dtype())); framework::TransToProtoVarType(tensor.dtype()));
if (!is_gpu_tensor && !is_xpu_tensor && !is_npu_tensor && if (!is_gpu_tensor && !is_xpu_tensor && !is_custom_device_tensor) {
!is_custom_device_tensor) {
if (!need_deep_copy) { if (!need_deep_copy) {
auto base = py::cast(std::move(tensor)); auto base = py::cast(std::move(tensor));
return py::array(py::dtype(py_dtype_str.c_str()), return py::array(py::dtype(py_dtype_str.c_str()),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册