未验证 提交 a85e038a 编写于 作者: 张春乔 提交者: GitHub

rm is_npu_place (#53105)

上级 f2c595a6
......@@ -27,7 +27,6 @@ static inline bool NeedCast(const paddle::Tensor& tensor,
if (paddle::platform::is_gpu_place(place) ||
paddle::platform::is_cuda_pinned_place(place) ||
paddle::platform::is_xpu_place(place) ||
paddle::platform::is_npu_place(place) ||
paddle::platform::is_npu_pinned_place(place) ||
paddle::platform::is_custom_place(place)) {
// CudaPinndePlace is added for varbase created by dataloader
......
......@@ -96,7 +96,6 @@ inline phi::DataType GetDtypeWithPlace(
is_right_place = (paddle::platform::is_gpu_place(place) ||
paddle::platform::is_cuda_pinned_place(place) ||
paddle::platform::is_xpu_place(place) ||
paddle::platform::is_npu_place(place) ||
paddle::platform::is_npu_pinned_place(place) ||
paddle::platform::is_custom_place(place));
if (is_right_place) {
......
......@@ -27,7 +27,6 @@ static inline bool NeedCast(const paddle::Tensor& tensor,
if (paddle::platform::is_gpu_place(place) ||
paddle::platform::is_cuda_pinned_place(place) ||
paddle::platform::is_xpu_place(place) ||
paddle::platform::is_npu_place(place) ||
paddle::platform::is_npu_pinned_place(place) ||
paddle::platform::is_custom_place(place) ||
paddle::platform::is_cpu_place(place)) {
......
......@@ -29,7 +29,7 @@ void TransDataDevice(const phi::DenseTensor &in,
"supported between CPU and CUDA."));
// NOTE(zhiqiu): Special case for CPU->NPU, avoid stream sync.
if (platform::is_cpu_place(in.place()) && platform::is_npu_place(dst_place)) {
if (platform::is_cpu_place(in.place())) {
paddle::framework::TensorCopy(
in,
dst_place,
......
......@@ -227,8 +227,7 @@ void DataTranferHelper::RunAndConstructOpFuncNode(
// NOTE(winter-wang): in npu and custom device, D2H kernel is asynchronous.
// need to explicit synchronization.
if ((platform::is_npu_place(place) || platform::is_custom_place(place)) &&
op_type == kMemcpyD2H) {
if ((platform::is_custom_place(place)) && op_type == kMemcpyD2H) {
dev_ctx->Wait();
}
......@@ -419,7 +418,6 @@ std::shared_ptr<OperatorBase> TransferDevice(const std::string& var_name,
if (IsSupportedHeterPlace(dst_place)) {
op_type = kMemcpyH2D;
int dst_place_type = platform::is_gpu_place(dst_place) ? 0
: platform::is_npu_place(dst_place) ? 1
: platform::is_ipu_place(dst_place) ? 3
: platform::is_xpu_place(dst_place) ? 2
: platform::is_custom_place(dst_place) ? 6
......
......@@ -146,9 +146,8 @@ bool IsGradOp(const std::string& op_name) {
}
bool IsSupportedHeterPlace(const phi::Place& place) {
return platform::is_gpu_place(place) || platform::is_npu_place(place) ||
platform::is_xpu_place(place) || platform::is_ipu_place(place) ||
platform::is_custom_place(place);
return platform::is_gpu_place(place) || platform::is_xpu_place(place) ||
platform::is_ipu_place(place) || platform::is_custom_place(place);
}
bool IsMemcpyD2H(const Instruction& instr) {
......
......@@ -152,8 +152,7 @@ DeviceContext* StreamAnalyzer::ParseDeviceContext(
// only gpu/npu need update. xpu not need, because xpu memcpy op kernel is
// synchronous.
if (platform::is_gpu_place(place_) || platform::is_npu_place(place_) ||
platform::is_custom_place(place_)) {
if (platform::is_gpu_place(place_) || platform::is_custom_place(place_)) {
VLOG(6) << "Parse DeviceContext for " << op_type
<< ", execution stream = " << execution_stream;
if (execution_stream != kDefaultStream) {
......@@ -447,8 +446,6 @@ platform::DeviceType StreamAnalyzer::GetWaiterType(
} else {
if (platform::is_xpu_place(place_)) {
return platform::kXPU;
} else if (platform::is_npu_place(place_)) {
return platform::kNPU;
} else if (platform::is_custom_place(place_)) {
return platform::kCUSTOM_DEVICE;
}
......@@ -464,7 +461,7 @@ DownstreamRunType StreamAnalyzer::AnalyseRunTypeForTwoInstructions(
}
// npu d2h kernel is asynchronous.
if (platform::is_npu_place(place_) || platform::is_custom_place(place_)) {
if (platform::is_custom_place(place_)) {
if (interpreter::IsCpuOp(cur_instr) ||
interpreter::IsMemcpyH2D(next_instr)) {
return DownstreamRunType::kDirectRun;
......
......@@ -672,7 +672,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
const BuildStrategy &build_strategy,
ir::Graph *graph)
: member_(new ParallelExecutorPrivate(places, scope)) {
PADDLE_ENFORCE_EQ(places.size() > 0 && !platform::is_npu_place(places[0]),
PADDLE_ENFORCE_EQ(places.size() > 0,
true,
platform::errors::Unavailable(
"NPU is not supported in ParallelExecutor."));
......
......@@ -275,7 +275,7 @@ void TensorCopyImpl(const TENSOR& src,
TENSOR* dst) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
const platform::DeviceContext* dev_ctx;
if (platform::is_gpu_place(dst_place) || platform::is_npu_place(dst_place) ||
if (platform::is_gpu_place(dst_place) ||
platform::is_custom_place(dst_place)) {
dev_ctx = pool.Get(dst_place);
} else {
......@@ -614,7 +614,6 @@ void TensorFromStream(std::istream& is,
size_t size = tensor->numel() * framework::SizeOfType(desc.data_type());
if (platform::is_gpu_place(dev_ctx.GetPlace()) ||
platform::is_xpu_place(dev_ctx.GetPlace()) ||
platform::is_npu_place(dev_ctx.GetPlace()) ||
platform::is_custom_place(dev_ctx.GetPlace())) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \
defined(PADDLE_WITH_XPU) || defined(PADDLE_WITH_CUSTOM_DEVICE)
......@@ -626,8 +625,7 @@ void TensorFromStream(std::istream& is,
is.read(static_cast<char*>(buf), size);
auto dst_place = dev_ctx.GetPlace();
framework::TensorCopy(cpu_tensor, dst_place, dev_ctx, tensor);
if (platform::is_npu_place(dev_ctx.GetPlace()) ||
platform::is_custom_place(dev_ctx.GetPlace())) {
if (platform::is_custom_place(dev_ctx.GetPlace())) {
dev_ctx.Wait();
}
#else
......@@ -689,7 +687,6 @@ void TensorFromStream(std::istream& is,
size_t size = tensor->numel() * framework::SizeOfType(desc.data_type());
if (platform::is_gpu_place(dev_ctx.GetPlace()) ||
platform::is_xpu_place(dev_ctx.GetPlace()) ||
platform::is_npu_place(dev_ctx.GetPlace()) ||
platform::is_custom_place(dev_ctx.GetPlace())) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \
defined(PADDLE_WITH_XPU) || defined(PADDLE_WITH_CUSTOM_DEVICE)
......@@ -701,8 +698,7 @@ void TensorFromStream(std::istream& is,
is.read(static_cast<char*>(buf), size);
auto dst_place = dev_ctx.GetPlace();
framework::TensorCopy(cpu_tensor, dst_place, dev_ctx, tensor);
if (platform::is_npu_place(dev_ctx.GetPlace()) ||
platform::is_custom_place(dev_ctx.GetPlace())) {
if (platform::is_custom_place(dev_ctx.GetPlace())) {
dev_ctx.Wait();
}
#else
......@@ -712,9 +708,6 @@ void TensorFromStream(std::istream& is,
} else if (platform::is_xpu_place(dev_ctx.GetPlace())) {
PADDLE_THROW(platform::errors::Unimplemented(
"XPUPlace is not supported when not compiled with XPU"));
} else if (platform::is_npu_place(dev_ctx.GetPlace())) {
PADDLE_THROW(platform::errors::Unimplemented(
"NPUPlace is not supported when not compiled with NPU"));
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"CutomPlace is not supported when not compiled with CustomDevice"));
......
......@@ -51,7 +51,6 @@ OpSupportedInfos(const std::string& place,
{"GPU", &platform::is_gpu_place},
{"CPU", &platform::is_cpu_place},
{"XPU", &platform::is_xpu_place},
{"NPU", &platform::is_npu_place},
};
PADDLE_ENFORCE_NE(is_target_place.count(query_place),
0,
......@@ -245,7 +244,6 @@ inline bool NeedCast(const std::shared_ptr<VarType>& var) {
paddle::platform::is_cuda_pinned_place(place) ||
paddle::platform::is_xpu_place(place) ||
paddle::platform::is_custom_place(place) ||
paddle::platform::is_npu_place(place) ||
paddle::platform::is_npu_pinned_place(place)) {
// CudaPinndePlace is added for varbase created by dataloader
if (data_type == paddle::framework::proto::VarType::FP32 ||
......
......@@ -42,9 +42,6 @@ void Group::DivNRanks(const platform::DeviceContext &context, int64_t nranks) {
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
DivNRanks(tensor, nranks, context);
#endif
} else if (platform::is_npu_place(tensor->place())) {
// TODO(kuizhiqing)
VLOG(4) << "divnrank for npu not support yet";
} else if (platform::is_cpu_place(tensor->place())) {
VLOG(4) << "before div 2" << *tensor;
VLOG(4) << "NDiv for cpu devices : rank = " << nranks;
......
......@@ -135,11 +135,6 @@ paddle::framework::GarbageCollector* Tracer::MutableGarbageCollectorIfNotExists(
} else if (platform::is_cpu_place(place)) {
gc.reset(new framework::CPUGarbageCollector(place, 0));
VLOG(10) << "Created GarbageCollector at " << place;
} else if (platform::is_npu_place(place)) {
PADDLE_THROW(platform::errors::PermissionDenied(
"Paddle can't use NPU device since it's not compiled with NPU,"
"Please recompile or reinstall Paddle with NPU support."));
} else if (platform::is_ipu_place(place)) {
#if defined(PADDLE_WITH_IPU)
gc.reset(new framework::IPUGarbageCollector(place, 0));
......@@ -289,9 +284,6 @@ void Tracer::TraceOpImpl(const std::string& type,
PADDLE_THROW(platform::errors::PreconditionNotMet(
"PaddlePaddle should compile with XPU if use XPUPlace."));
#endif
} else if (platform::is_npu_place(place)) {
PADDLE_THROW(platform::errors::PreconditionNotMet(
"PaddlePaddle should compile with NPU if use NPUPlace."));
} else if (platform::is_custom_place(place)) {
#ifdef PADDLE_WITH_CUSTOM_DEVICE
phi::DeviceManager::SetDevice(place);
......
......@@ -1874,9 +1874,6 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
auto xpu_place = place_;
res->SetPlace(PaddlePlace::kXPU, xpu_place.GetDeviceId());
}
} else if (platform::is_npu_place(place_)) {
auto npu_place = place_;
res->SetPlace(PaddlePlace::kNPU, npu_place.GetDeviceId());
} else if (platform::is_custom_place(place_)) {
auto custom_place = place_;
auto paddleplace = static_cast<PaddlePlace>(
......@@ -1931,9 +1928,6 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
auto xpu_place = place_;
res->SetPlace(PaddlePlace::kXPU, xpu_place.GetDeviceId());
}
} else if (platform::is_npu_place(place_)) {
auto npu_place = place_;
res->SetPlace(PaddlePlace::kNPU, npu_place.GetDeviceId());
} else if (platform::is_custom_place(place_)) {
auto custom_place = place_;
auto paddleplace = static_cast<PaddlePlace>(
......
......@@ -152,8 +152,6 @@ T *Tensor::data(PlaceType *place, int *size) const {
*place = PlaceType::kGPU;
} else if (paddle::platform::is_xpu_place(tensor->place())) {
*place = PlaceType::kXPU;
} else if (paddle::platform::is_npu_place(tensor->place())) {
*place = PlaceType::kNPU;
} else if (paddle::platform::is_custom_place(tensor->place())) {
*place = PlaceType::kCUSTOM;
} else {
......
......@@ -52,7 +52,6 @@ class ArrayOp : public framework::OperatorBase {
size_t offset;
if (platform::is_gpu_place(i_tensor.place()) ||
platform::is_xpu_place(i_tensor.place()) ||
platform::is_npu_place(i_tensor.place()) ||
platform::is_custom_place(i_tensor.place())) {
// FIXME: Avoid copy from GPU to CPU
phi::DenseTensor t;
......
......@@ -36,8 +36,7 @@ struct BeamSearchDecodeFunctor {
tensor_on_gpu_ = false;
tensor_on_npu_ = false;
// First make a copy of GPU data on CPU
if (platform::is_gpu_place(step_ids_origin_[0].place()) ||
platform::is_npu_place(step_ids_origin_[0].place())) {
if (platform::is_gpu_place(step_ids_origin_[0].place())) {
if (platform::is_gpu_place(step_ids_origin_[0].place())) {
tensor_on_gpu_ = true;
} else {
......@@ -61,8 +60,7 @@ struct BeamSearchDecodeFunctor {
step_ids_.push_back(out);
}
}
if (platform::is_gpu_place(step_scores_origin_[0].place()) ||
platform::is_npu_place(step_scores_origin_[0].place())) {
if (platform::is_gpu_place(step_scores_origin_[0].place())) {
if (platform::is_gpu_place(step_scores_origin_[0].place())) {
tensor_on_gpu_ = true;
} else {
......
......@@ -83,7 +83,6 @@ class ConditionalOp : public framework::OperatorBase {
platform::DeviceContextPool::Instance().Get(ips[0]->place())->Wait();
res = cpu_tensor.data<bool>()[0];
#endif
} else if (platform::is_npu_place(ips[0]->place())) {
} else if (platform::is_xpu_place(ips[0]->place())) {
#ifdef PADDLE_WITH_XPU
phi::DenseTensor cpu_tensor;
......
......@@ -225,7 +225,6 @@ bool GetCondData(const phi::DenseTensor &cond) {
return cond.data<bool>()[0];
}
// when platform::is_gpu_place(cond.place()) or
// platform::is_npu_place(cond.place()) or
// platform::is_xpu_place(cond.place()) is true
std::unique_ptr<phi::DenseTensor> cpu_cond{new phi::DenseTensor()};
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \
......
......@@ -75,7 +75,6 @@ phi::KernelKey GetReduceExpectedKernelType(
if (input_data_type == framework::proto::VarType::FP16) {
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx.GetPlace()) ||
platform::is_npu_place(ctx.GetPlace()) ||
platform::is_xpu_place(ctx.GetPlace()) ||
platform::is_custom_place(ctx.GetPlace()),
true,
......
......@@ -384,8 +384,6 @@ void BufferedReader::ReadNextImpl(paddle::framework::LoDTensorArray *out) {
if (platform::is_gpu_place(place_)) {
*out = std::move(cuda_buffer_[i]);
} else if (platform::is_npu_place(place_)) {
*out = std::move(npu_buffer_[i]);
} else if (platform::is_xpu_place(place_)) {
*out = std::move(xpu_buffer_[i]);
} else if (platform::is_custom_place(place_)) {
......
......@@ -624,7 +624,6 @@ class ReduceBaseOp : public framework::OperatorWithKernel {
if (input_data_type == framework::proto::VarType::FP16) {
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx.GetPlace()) ||
platform::is_npu_place(ctx.GetPlace()) ||
platform::is_xpu_place(ctx.GetPlace()) ||
platform::is_custom_place(ctx.GetPlace()),
true,
......
......@@ -43,7 +43,6 @@ class SoftmaxOp : public framework::OperatorWithKernel {
if (input_data_type == framework::proto::VarType::FP16) {
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx.GetPlace()) ||
platform::is_npu_place(ctx.GetPlace()) ||
platform::is_xpu_place(ctx.GetPlace()) ||
platform::is_custom_place(ctx.GetPlace()),
true,
......@@ -128,7 +127,6 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
ctx, framework::GradVarName("Out"));
if (input_data_type == framework::proto::VarType::FP16) {
if (!(platform::is_gpu_place(ctx.GetPlace()) ||
platform::is_npu_place(ctx.GetPlace()) ||
platform::is_xpu_place(ctx.GetPlace()) ||
platform::is_custom_place(ctx.GetPlace())))
PADDLE_THROW(platform::errors::InvalidArgument(
......
......@@ -29,8 +29,7 @@ inline std::vector<int> get_repeat_times(
auto* repeat_data = repeat_tensor->data<int>();
phi::DenseTensor cpu_repeat_tensor;
if (platform::is_gpu_place(repeat_tensor->place()) ||
platform::is_xpu_place(repeat_tensor->place()) ||
platform::is_npu_place(repeat_tensor->place())) {
platform::is_xpu_place(repeat_tensor->place())) {
paddle::framework::TensorCopySync(
*repeat_tensor, platform::CPUPlace(), &cpu_repeat_tensor);
repeat_data = cpu_repeat_tensor.data<int>();
......@@ -48,8 +47,7 @@ inline std::vector<int> get_repeat_times(
for (size_t i = 0; i < list_repeat_times_tensor.size(); ++i) {
auto tensor = list_repeat_times_tensor[i];
if (platform::is_gpu_place(tensor->place()) ||
platform::is_xpu_place(tensor->place()) ||
platform::is_npu_place(tensor->place())) {
platform::is_xpu_place(tensor->place())) {
phi::DenseTensor temp;
paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp);
vec_repeat_times.push_back(*temp.data<int32_t>());
......
......@@ -45,8 +45,6 @@ DeviceType Place2DeviceType(const platform::Place& place) {
return platform::DeviceType::XPU;
} else if (platform::is_ipu_place(place)) {
return platform::DeviceType::IPU;
} else if (platform::is_npu_place(place)) {
return platform::DeviceType::NPU;
} else if (platform::is_custom_place(place)) {
return platform::DeviceType::CUSTOM_DEVICE;
} else {
......
......@@ -33,10 +33,6 @@ bool is_xpu_place(const Place &p) {
return p.GetType() == phi::AllocationType::XPU;
}
bool is_npu_place(const Place &p) {
return p.GetType() == phi::AllocationType::NPU;
}
bool is_ipu_place(const Place &p) {
return p.GetType() == phi::AllocationType::IPU;
}
......@@ -73,8 +69,6 @@ bool is_same_place(const Place &p1, const Place &p2) {
return true;
} else if (is_xpu_place(p1)) {
return p1 == p2;
} else if (is_npu_place(p1)) {
return p1 == p2;
} else if (is_ipu_place(p1)) {
return p1 == p2;
} else if (is_custom_place(p1)) {
......@@ -93,8 +87,6 @@ std::string PlaceHelper::GetDeviceType(const Place &place) {
return "cpu";
} else if (is_gpu_place(place)) {
return "gpu";
} else if (is_npu_place(place)) {
return "npu";
} else if (is_xpu_place(place)) {
return "xpu";
} else if (is_custom_place(place)) {
......
......@@ -47,7 +47,6 @@ class PlaceHelper {
bool is_gpu_place(const Place &);
bool is_xpu_place(const Place &);
bool is_npu_place(const Place &);
bool is_ipu_place(const Place &);
bool is_cpu_place(const Place &);
bool is_cuda_pinned_place(const Place &);
......
......@@ -153,8 +153,6 @@ void InitTensorWithNumpyValue(TensorObject* self,
} else if (platform::is_cuda_pinned_place(place)) {
SetTensorFromPyArray<platform::CUDAPinnedPlace>(
impl_ptr, array, place, zero_copy);
} else if (platform::is_npu_place(place)) {
SetTensorFromPyArray<platform::NPUPlace>(impl_ptr, array, place, zero_copy);
} else if (platform::is_custom_place(place)) {
SetTensorFromPyArray<platform::CustomPlace>(
impl_ptr, array, place, zero_copy);
......
......@@ -102,8 +102,6 @@ void InitTensorWithNumpyValue(const py::object& array,
} else if (platform::is_cuda_pinned_place(place)) {
SetTensorFromPyArray<platform::CUDAPinnedPlace>(
impl_ptr, array, place, zero_copy);
} else if (platform::is_npu_place(place)) {
SetTensorFromPyArray<platform::NPUPlace>(impl_ptr, array, place, zero_copy);
} else if (platform::is_custom_place(place)) {
SetTensorFromPyArray<platform::CustomPlace>(
impl_ptr, array, place, zero_copy);
......
......@@ -199,8 +199,6 @@ static void InitVarBaseAndTensor(imperative::VarBase *self,
} else if (platform::is_cuda_pinned_place(place)) {
SetTensorFromPyArray<platform::CUDAPinnedPlace>(
tensor, array, place, zero_copy);
} else if (platform::is_npu_place(place)) {
SetTensorFromPyArray<platform::NPUPlace>(tensor, array, place, zero_copy);
} else if (platform::is_ipu_place(place)) {
SetTensorFromPyArray<platform::IPUPlace>(tensor, array, place, zero_copy);
} else if (platform::is_custom_place(place)) {
......
......@@ -648,8 +648,6 @@ void BindPlace(pybind11::module &m) { // NOLINT
[](platform::Place &self) { return platform::is_cpu_place(self); })
.def("is_xpu_place",
[](platform::Place &self) { return platform::is_xpu_place(self); })
.def("is_npu_place",
[](platform::Place &self) { return platform::is_npu_place(self); })
.def("is_ipu_place",
[](platform::Place &self) { return platform::is_ipu_place(self); })
.def("is_cuda_pinned_place",
......
......@@ -960,7 +960,6 @@ inline py::array TensorToPyArray(const phi::DenseTensor &tensor,
}
bool is_gpu_tensor = platform::is_gpu_place(tensor.place());
bool is_xpu_tensor = platform::is_xpu_place(tensor.place());
bool is_npu_tensor = platform::is_npu_place(tensor.place());
bool is_custom_device_tensor = platform::is_custom_place(tensor.place());
const auto &tensor_dims = tensor.dims();
auto tensor_dtype = framework::TransToProtoVarType(tensor.dtype());
......@@ -981,8 +980,7 @@ inline py::array TensorToPyArray(const phi::DenseTensor &tensor,
std::string py_dtype_str = details::TensorDTypeToPyDTypeStr(
framework::TransToProtoVarType(tensor.dtype()));
if (!is_gpu_tensor && !is_xpu_tensor && !is_npu_tensor &&
!is_custom_device_tensor) {
if (!is_gpu_tensor && !is_xpu_tensor && !is_custom_device_tensor) {
if (!need_deep_copy) {
auto base = py::cast(std::move(tensor));
return py::array(py::dtype(py_dtype_str.c_str()),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册