未验证 提交 6d396ace 编写于 作者: 张春乔 提交者: GitHub

rm npu (#53566)

上级 7dcf5e53
......@@ -89,11 +89,6 @@ struct DLDeviceVisitor
platform::errors::Unimplemented("platform::XPUPlace is not supported"));
}
inline ::DLDevice operator()(const platform::NPUPlace &place) const {
PADDLE_THROW(
platform::errors::Unimplemented("platform::NPUPlace is not supported"));
}
inline ::DLDevice operator()(const platform::NPUPinnedPlace &place) const {
PADDLE_THROW(platform::errors::Unimplemented(
"platform::NPUPinnedPlace is not supported"));
......
......@@ -55,8 +55,6 @@ static phi::Backend ConvertPlaceToBackend(const phi::Place& place) {
return phi::Backend::GPU;
case phi::AllocationType::XPU:
return phi::Backend::XPU;
case phi::AllocationType::NPU:
return phi::Backend::NPU;
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"Cannot convert place(%d).", static_cast<int>(place.GetType())));
......
......@@ -374,9 +374,6 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
#define REGISTER_OP_XPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, XPU, ::paddle::platform::XPUPlace, __VA_ARGS__)
#define REGISTER_OP_NPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, NPU, ::paddle::platform::NPUPlace, __VA_ARGS__)
#define REGISTER_OP_KERNEL_EX(op_type, library_type, place_class, \
customized_name, \
customized_type_value, \
......@@ -413,12 +410,6 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
__VA_ARGS__)
#define REGISTER_OP_NPU_KERNEL_FUNCTOR(op_type, ...) \
REGISTER_OP_KERNEL_EX( \
op_type, NPU, ::paddle::platform::NPUPlace, DEFAULT_TYPE, \
::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
__VA_ARGS__)
#define REGISTER_OP_IPU_KERNEL_FUNCTOR(op_type, ...) \
REGISTER_OP_KERNEL_EX( \
op_type, IPU, ::paddle::platform::IPUPlace, DEFAULT_TYPE, \
......
......@@ -1327,8 +1327,6 @@ void ParallelExecutor::InitExecutorPrivateMemberInfo(
device_name = "CPU";
} else if (member_->use_device_ == p::kCUDA) {
device_name = "CUDA";
} else if (member_->use_device_ == p::kNPU) {
device_name = "NPU";
} else if (member_->use_device_ == p::kXPU) {
device_name = "XPU";
} else {
......
......@@ -138,8 +138,6 @@ phi::Backend ConvertBackend(paddle_infer::PlaceType backend) {
case paddle_infer::PlaceType::kGPU:
// NOTE: phi also support phi::Backend::GPUDNN.
return phi::Backend::GPU;
case paddle_infer::PlaceType::kNPU:
return phi::Backend::NPU;
case paddle_infer::PlaceType::kXPU:
return phi::Backend::XPU;
case paddle_infer::PlaceType::kCPU:
......
......@@ -82,8 +82,6 @@ bool NativePaddlePredictor::Init(
place_ = paddle::platform::CUDAPlace(config_.device);
} else if (config_.use_xpu) {
place_ = paddle::platform::XPUPlace(config_.device);
} else if (config_.use_npu) {
place_ = paddle::platform::NPUPlace(config_.device);
} else {
place_ = paddle::platform::CPUPlace();
}
......
......@@ -124,9 +124,6 @@ T *Tensor::mutable_data(PlaceType place) {
case static_cast<int>(PlaceType::kXPU): {
return tensor->mutable_data<T>(paddle::platform::XPUPlace(device_));
}
case static_cast<int>(PlaceType::kNPU): {
return tensor->mutable_data<T>(paddle::platform::NPUPlace(device_));
}
case static_cast<int>(PlaceType::kCUSTOM): {
return tensor->mutable_data<T>(
paddle::platform::CustomPlace(device_type_, device_));
......
......@@ -67,7 +67,7 @@ enum DataType {
// TODO(Inference): support more data types if needed.
};
enum class PlaceType { kUNK = -1, kCPU, kGPU, kXPU, kNPU, kIPU, kCUSTOM };
enum class PlaceType { kUNK = -1, kCPU, kGPU, kXPU, kIPU, kCUSTOM };
enum class DataLayout { kUNK = -1, kAny, kNHWC, kNCHW };
......
......@@ -53,6 +53,3 @@ namespace plat = paddle::platform;
REGISTER_OP_WITHOUT_GRADIENT(c_sync_comm_stream,
ops::CSyncCommStreamOp,
ops::CSyncCommStreamOpMaker);
REGISTER_OP_NPU_KERNEL(c_sync_comm_stream,
ops::CSyncCommStreamKernel<float, plat::NPUPlace>);
......@@ -95,9 +95,6 @@ class FillConstantOp : public framework::OperatorWithKernel {
case 3:
kt.set_backend(phi::Backend::XPU);
break;
case 4:
kt.set_backend(phi::Backend::NPU);
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Could NOT determine the place of variable, place_type = %d .",
......@@ -161,8 +158,7 @@ class FillConstantOpMaker : public framework::OpProtoAndCheckerMaker {
"0: CPUPlace. "
"1: CUDAPlace. "
"2: CUDAPinnedPlace. "
"3: XPUPlace. "
"4: NPUPlace. ")
"3: XPUPlace. ")
.SetDefault(-1);
AddOutput("Out",
"(Tensor) Tensor of specified shape will be filled "
......
......@@ -86,16 +86,15 @@ class MemcpyD2HOpProtoMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Out",
"(phi::DenseTensor) The type of output "
"is the same as input X.");
AddAttr<int>(
"dst_place_type",
"Determine the dst place of tensor copy. "
"By Now it ONLY support XPU/NPUPlace/CUDAPlace <-> CUDAPinnedPlace/CPU"
"Other place type is Unimplemented and will cause ERROR."
"0: dst is on CPUPlace. "
"1: dst is on CUDAPinnedPlace. ");
AddAttr<int>("dst_place_type",
"Determine the dst place of tensor copy. "
"By Now it ONLY support XPU/CUDAPlace <-> CUDAPinnedPlace/CPU"
"Other place type is Unimplemented and will cause ERROR."
"0: dst is on CPUPlace. "
"1: dst is on CUDAPinnedPlace. ");
AddComment(R"DOC(
MemcpyD2H Operator.
By now, it ONLY supports the memcopy between NPUPlace/CUDAPlace <-> CUDAPinnedPlace/CPU.
By now, it ONLY supports the memcopy between CUDAPlace <-> CUDAPinnedPlace/CPU.
You would have to update it if you want other more capacities.
Out = X, when type in [phi::DenseTensor]
raise error if the type is not listed above.
......
......@@ -91,13 +91,12 @@ class MemcpyH2DOpProtoMaker : public framework::OpProtoAndCheckerMaker {
"Determine the dst place of tensor copy. "
"By Now it support:"
"0. CUDAPinnedPlace/CPU <->CUDAPlace"
"1. NPUPinnedPlace/CPU <-> NPUPlace"
"2. CPU <->XPUPlace"
"3. CPU <->IPUPlace"
"1. CPU <->XPUPlace"
"2. CPU <->IPUPlace"
"Other place type is Unimplemented and will cause ERROR.");
AddComment(R"DOC(
MemcpyD2H Operator.
By now, it ONLY supports the memcopy between CUDAPinnedPlace/CPU <-> NPUPlace/CUDAPlace.
By now, it ONLY supports the memcopy between CUDAPinnedPlace/CPU <-> CUDAPlace.
You would have to update it if you want other more capacities.
Out = X, when type in [phi::DenseTensor]
raise error if the type is not listed above.
......
......@@ -105,20 +105,17 @@ class MemcpyOpProtoMaker : public framework::OpProtoAndCheckerMaker {
"is the same as input X.");
AddAttr<int>("dst_place_type",
"Determine the dst place of tensor copy. "
"By Now it ONLY support CUDAPlace <-> CUDAPinnedPlace or "
"NPUPlace <-> CPUPlace. "
"By Now it ONLY support CUDAPlace <-> CUDAPinnedPlace."
"Other place type is Unimplemented and will cause ERROR."
"0: dst is on CPUPlace. "
"1: dst is on CUDAPlace. "
"2: dst is on CUDAPinnedPlace. "
"3: dst is on XPUPlace. "
"4: dst is on NPUPlace. "
"5: dst is on NPUPinnerPlace. "
"6: dst is on CustomDevicePlace");
"4: dst is on NPUPinnerPlace. "
"5: dst is on CustomDevicePlace");
AddComment(R"DOC(
Memcpy Operator.
By now, it ONLY supports the memcopy between CUDAPinnedPlace <-> CUDAPlace or
NPUPlace <-> CPUPlace, and used as an internal op by Recompute-Offload.
By now, it ONLY supports the memcopy between CUDAPinnedPlace <-> CUDAPlace, and used as an internal op by Recompute-Offload.
You would have to update it if you want other more capacities.
Out = X, when type in [phi::DenseTensor]
......
......@@ -106,7 +106,6 @@ DeviceType Place2DeviceType(const platform::Place& place);
constexpr DeviceType kCPU = DeviceType::CPU;
constexpr DeviceType kCUDA = DeviceType::CUDA;
constexpr DeviceType kXPU = DeviceType::XPU;
constexpr DeviceType kNPU = DeviceType::NPU;
constexpr DeviceType kIPU = DeviceType::IPU;
constexpr DeviceType kCUSTOM_DEVICE = DeviceType::CUSTOM_DEVICE;
......
......@@ -26,7 +26,6 @@
using ::paddle::platform::kCPU;
using ::paddle::platform::kCUDA;
using ::paddle::platform::kCUSTOM_DEVICE;
using ::paddle::platform::kNPU;
using ::paddle::platform::kXPU;
USE_EVENT(kCPU)
......
......@@ -102,8 +102,6 @@ Place PlaceHelper::CreatePlace(const std::string &dev_type, size_t dev_id) {
return platform::CPUPlace();
} else if (dev_type == "gpu") {
return platform::CUDAPlace(dev_id);
} else if (dev_type == "npu") {
return platform::NPUPlace(dev_id);
} else if (dev_type == "xpu") {
return platform::XPUPlace(dev_id);
} else {
......
......@@ -28,7 +28,6 @@ using Place = phi::Place;
using CPUPlace = phi::CPUPlace;
using CUDAPlace = phi::GPUPlace;
using CUDAPinnedPlace = phi::GPUPinnedPlace;
using NPUPlace = phi::NPUPlace;
using NPUPinnedPlace = phi::NPUPinnedPlace;
using XPUPlace = phi::XPUPlace;
using IPUPlace = phi::IPUPlace;
......@@ -88,11 +87,6 @@ typename Visitor::result_type VisitPlace(const Place &place,
return typename Visitor::result_type();
#endif
}
case phi::AllocationType::NPU: {
PADDLE_THROW(platform::errors::Unavailable(
"Paddle is not compiled with NPU. Cannot visit npu_pinned"));
return typename Visitor::result_type();
}
case phi::AllocationType::NPUPINNED: {
PADDLE_THROW(platform::errors::Unavailable(
"Paddle is not compiled with NPU. Cannot visit npu_pinned"));
......
......@@ -159,7 +159,7 @@ void InitTensorWithNumpyValue(TensorObject* self,
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Place should be one of "
"CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace/CustomPlace"));
"CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/CustomPlace"));
}
}
......
......@@ -108,7 +108,7 @@ void InitTensorWithNumpyValue(const py::object& array,
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Place should be one of "
"CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace/CustomPlace"));
"CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/CustomPlace"));
}
}
......
......@@ -52,7 +52,6 @@ extern PyTypeObject* g_place_pytype;
extern PyTypeObject* g_cudaplace_pytype;
extern PyTypeObject* g_cpuplace_pytype;
extern PyTypeObject* g_xpuplace_pytype;
extern PyTypeObject* g_npuplace_pytype;
extern PyTypeObject* g_cudapinnedplace_pytype;
extern PyTypeObject* g_customplace_pytype;
extern PyTypeObject* g_framework_tensor_pytype;
......@@ -529,9 +528,6 @@ platform::Place CastPyArg2Place(PyObject* obj, ssize_t arg_pos) {
} else if (PyObject_IsInstance(
obj, reinterpret_cast<PyObject*>(g_xpuplace_pytype))) {
place = ::pybind11::handle(obj).cast<platform::XPUPlace>();
} else if (PyObject_IsInstance(
obj, reinterpret_cast<PyObject*>(g_npuplace_pytype))) {
place = ::pybind11::handle(obj).cast<platform::NPUPlace>();
} else if (PyObject_IsInstance(
obj, reinterpret_cast<PyObject*>(g_cudapinnedplace_pytype))) {
place = ::pybind11::handle(obj).cast<platform::CUDAPinnedPlace>();
......@@ -542,7 +538,7 @@ platform::Place CastPyArg2Place(PyObject* obj, ssize_t arg_pos) {
PADDLE_THROW(platform::errors::InvalidArgument(
"argument (position %d) must be "
"one "
"of(Place,CUDAPlace,CPUPlace,XPUPlace,NPUPlace,CUDAPinnedPlace,"
"of(Place,CUDAPlace,CPUPlace,XPUPlace,CUDAPinnedPlace,"
"CustomPlace), "
"but got %s",
arg_pos + 1,
......
......@@ -144,8 +144,6 @@ static const platform::Place PyObjectToPlace(const py::object &place_obj) {
return place_obj.cast<platform::XPUPlace>();
} else if (py::isinstance<platform::CUDAPinnedPlace>(place_obj)) {
return place_obj.cast<platform::CUDAPinnedPlace>();
} else if (py::isinstance<platform::NPUPlace>(place_obj)) {
return place_obj.cast<platform::NPUPlace>();
} else if (py::isinstance<platform::IPUPlace>(place_obj)) {
return place_obj.cast<platform::IPUPlace>();
} else if (py::isinstance<platform::Place>(place_obj)) {
......@@ -155,7 +153,7 @@ static const platform::Place PyObjectToPlace(const py::object &place_obj) {
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Place should be one of "
"Place/CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace/IPUPlace/"
"Place/CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/IPUPlace/"
"CustomPlace"));
}
}
......@@ -208,7 +206,7 @@ static void InitVarBaseAndTensor(imperative::VarBase *self,
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Place should be one of "
"CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace/IPUPlace/"));
"CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/IPUPlace/"));
}
self->SetDataType(framework::TransToProtoVarType(tensor->dtype()));
}
......@@ -711,14 +709,6 @@ void BindImperative(py::module *m_ptr) {
py::arg("zero_copy") = false,
py::arg("name") = "",
py::arg("stop_gradient") = -1)
.def("__init__",
&InitVarBaseFromNumpyWithArg<platform::NPUPlace>,
py::arg("value"),
py::arg("place"),
py::arg("persistable") = false,
py::arg("zero_copy") = false,
py::arg("name") = "",
py::arg("stop_gradient") = -1)
.def("__init__",
&InitVarBaseFromNumpyWithArg<platform::CustomPlace>,
py::arg("value"),
......@@ -752,11 +742,6 @@ void BindImperative(py::module *m_ptr) {
py::arg("tensor"),
py::arg("place"),
py::arg("name") = "")
.def("__init__",
&InitVarBaseFromTensorWithArg<platform::NPUPlace>,
py::arg("tensor"),
py::arg("place"),
py::arg("name") = "")
.def("__init__",
&InitVarBaseFromTensorWithArg<platform::CustomPlace>,
py::arg("tensor"),
......@@ -1877,18 +1862,6 @@ void BindImperative(py::module *m_ptr) {
return new_var;
},
py::return_value_policy::copy)
.def(
"_copy_to",
[](const std::shared_ptr<imperative::VarBase> &self,
const platform::NPUPlace &place,
bool blocking) {
auto new_var = self->NewVarBase(place, blocking);
if (!blocking) {
IncreaseVarbaseReferenceCountUntilCopyComplete(self, place);
}
return new_var;
},
py::return_value_policy::copy)
.def(
"_copy_to",
[](const std::shared_ptr<imperative::VarBase> &self,
......@@ -2219,11 +2192,6 @@ void BindImperative(py::module *m_ptr) {
self.SetExpectedPlace(*p);
VLOG(4) << "Tracer(" << &self << ")"
<< " set expected place " << *p;
} else if (py::isinstance<platform::NPUPlace>(obj)) {
auto p = obj.cast<platform::NPUPlace *>();
self.SetExpectedPlace(*p);
VLOG(4) << "Tracer(" << &self << ")"
<< " set expected place " << *p;
} else if (py::isinstance<platform::IPUPlace>(obj)) {
auto p = obj.cast<platform::IPUPlace *>();
self.SetExpectedPlace(*p);
......@@ -2242,7 +2210,7 @@ void BindImperative(py::module *m_ptr) {
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Incompatible Place Type: supports XPUPlace, CUDAPlace, "
"CPUPlace, NPUPlace, IPUPlace"
"CPUPlace, IPUPlace"
"and CUDAPinnedPlace, "
"but got Unknown Type!"));
}
......@@ -2375,28 +2343,6 @@ void BindImperative(py::module *m_ptr) {
inplace_map);
}
})
.def("trace",
[](imperative::Tracer &self,
const std::string &type,
const PyNameVarBaseMap &ins,
const PyNameVarBaseMap &outs,
framework::AttributeMap attrs,
const platform::NPUPlace &place,
bool trace_backward,
const std::map<std::string, std::string> &inplace_map = {}) {
auto ins_map = ConvertToNameVarBaseMap(ins);
auto outs_map = ConvertToNameVarBaseMap(outs);
{
py::gil_scoped_release release;
self.TraceOp<imperative::VarBase>(type,
std::move(ins_map),
std::move(outs_map),
std::move(attrs),
place,
trace_backward,
inplace_map);
}
})
.def("trace",
[](imperative::Tracer &self,
const std::string &type,
......@@ -2488,7 +2434,6 @@ void BindImperative(py::module *m_ptr) {
m.def("varbase_copy", &VarBaseCopy<platform::CUDAPlace>);
m.def("varbase_copy", &VarBaseCopy<platform::XPUPlace>);
m.def("varbase_copy", &VarBaseCopy<platform::CUDAPinnedPlace>);
m.def("varbase_copy", &VarBaseCopy<platform::NPUPlace>);
m.def("varbase_copy", &VarBaseCopy<platform::CustomPlace>);
m.def(
......
......@@ -637,7 +637,6 @@ void BindPaddlePlace(py::module *m) {
.value("CPU", PaddlePlace::kCPU)
.value("GPU", PaddlePlace::kGPU)
.value("XPU", PaddlePlace::kXPU)
.value("NPU", PaddlePlace::kNPU)
.value("CUSTOM", PaddlePlace::kCUSTOM);
}
......
......@@ -189,7 +189,6 @@ PyTypeObject *g_customplace_pytype = nullptr;
PyTypeObject *g_cudaplace_pytype = nullptr;
PyTypeObject *g_cpuplace_pytype = nullptr;
PyTypeObject *g_xpuplace_pytype = nullptr;
PyTypeObject *g_npuplace_pytype = nullptr;
PyTypeObject *g_cudapinnedplace_pytype = nullptr;
PyTypeObject *g_ipuplace_pytype = nullptr;
......@@ -366,7 +365,6 @@ void BindPlace(pybind11::module &m) { // NOLINT
.def("_equals", &IsSamePlace<platform::CUDAPlace, platform::CUDAPlace>)
.def("_equals", &IsSamePlace<platform::CUDAPlace, platform::CPUPlace>)
.def("_equals", &IsSamePlace<platform::CUDAPlace, platform::XPUPlace>)
.def("_equals", &IsSamePlace<platform::CUDAPlace, platform::NPUPlace>)
.def("_equals",
&IsSamePlace<platform::CUDAPlace, platform::CUDAPinnedPlace>)
.def("_get_device_id",
......@@ -495,7 +493,6 @@ void BindPlace(pybind11::module &m) { // NOLINT
.def("_type", &PlaceIndex<platform::CPUPlace>)
.def("_equals", &IsSamePlace<platform::CPUPlace, platform::Place>)
.def("_equals", &IsSamePlace<platform::CPUPlace, platform::XPUPlace>)
.def("_equals", &IsSamePlace<platform::CPUPlace, platform::NPUPlace>)
.def("_equals", &IsSamePlace<platform::CPUPlace, platform::CUDAPlace>)
.def("_equals", &IsSamePlace<platform::CPUPlace, platform::CPUPlace>)
.def("_equals",
......@@ -548,8 +545,6 @@ void BindPlace(pybind11::module &m) { // NOLINT
&IsSamePlace<platform::CUDAPinnedPlace, platform::CUDAPlace>)
.def("_equals",
&IsSamePlace<platform::CUDAPinnedPlace, platform::XPUPlace>)
.def("_equals",
&IsSamePlace<platform::CUDAPinnedPlace, platform::NPUPlace>)
.def("_equals",
&IsSamePlace<platform::CUDAPinnedPlace, platform::CPUPlace>)
.def("_equals",
......@@ -557,30 +552,6 @@ void BindPlace(pybind11::module &m) { // NOLINT
.def("__repr__", string::to_string<const platform::CUDAPinnedPlace &>)
.def("__str__", string::to_string<const platform::CUDAPinnedPlace &>);
// NPUPlace
py::class_<platform::NPUPlace> npuplace(m, "NPUPlace", R"DOC(
NPUPlace is a descriptor of a device.
It represents a NPU device on which a tensor will be allocated and a model will run.
Examples:
.. code-block:: python
# required: npu
import paddle
place = paddle.NPUPlace(0)
)DOC");
g_npuplace_pytype = reinterpret_cast<PyTypeObject *>(npuplace.ptr());
npuplace.def("__init__", [](platform::NPUPlace &self, int dev_id) {})
.def("_type", &PlaceIndex<platform::NPUPlace>)
.def("_equals", &IsSamePlace<platform::NPUPlace, platform::Place>)
.def("_equals", &IsSamePlace<platform::NPUPlace, platform::CUDAPlace>)
.def("_equals", &IsSamePlace<platform::NPUPlace, platform::CPUPlace>)
.def("_equals", &IsSamePlace<platform::NPUPlace, platform::XPUPlace>)
.def("_equals", &IsSamePlace<platform::NPUPlace, platform::NPUPlace>)
.def("_equals",
&IsSamePlace<platform::NPUPlace, platform::CUDAPinnedPlace>)
.def("get_device_id",
[](const platform::NPUPlace &self) { return self.GetDeviceId(); })
.def("__str__", string::to_string<const platform::NPUPlace &>);
// IPUPlace
py::class_<platform::IPUPlace> ipuplace(m, "IPUPlace", R"DOC(
IPUPlace is a descriptor of a device.
......@@ -625,7 +596,6 @@ void BindPlace(pybind11::module &m) { // NOLINT
.def("_equals", &IsSamePlace<platform::IPUPlace, platform::CUDAPlace>)
.def("_equals", &IsSamePlace<platform::IPUPlace, platform::CPUPlace>)
.def("_equals", &IsSamePlace<platform::IPUPlace, platform::XPUPlace>)
.def("_equals", &IsSamePlace<platform::IPUPlace, platform::NPUPlace>)
.def("_equals", &IsSamePlace<platform::IPUPlace, platform::IPUPlace>)
.def("_equals",
&IsSamePlace<platform::IPUPlace, platform::CUDAPinnedPlace>)
......@@ -639,7 +609,6 @@ void BindPlace(pybind11::module &m) { // NOLINT
.def("_equals", &IsSamePlace<platform::Place, platform::CUDAPlace>)
.def("_equals", &IsSamePlace<platform::Place, platform::CPUPlace>)
.def("_equals", &IsSamePlace<platform::Place, platform::XPUPlace>)
.def("_equals", &IsSamePlace<platform::Place, platform::NPUPlace>)
.def("_equals", &IsSamePlace<platform::Place, platform::IPUPlace>)
.def("_equals", &IsSamePlace<platform::Place, platform::CUDAPinnedPlace>)
.def("_equals", &IsSamePlace<platform::Place, platform::CustomPlace>)
......@@ -685,10 +654,6 @@ void BindPlace(pybind11::module &m) { // NOLINT
const platform::CUDAPinnedPlace &cuda_pinned_place) {
self = cuda_pinned_place;
})
.def("set_place",
[](platform::Place &self, const platform::NPUPlace &npu_place) {
self = npu_place;
})
.def("set_place",
[](platform::Place &self, const platform::IPUPlace &ipu_place) {
self = ipu_place;
......
......@@ -245,10 +245,6 @@ void BindTensor(pybind11::module &m) { // NOLINT
[](phi::DenseTensor &self, paddle::platform::CPUPlace &place) {
self.mutable_data<float>(place);
})
.def("_alloc_float",
[](phi::DenseTensor &self, paddle::platform::NPUPlace &place) {
self.mutable_data<float>(place);
})
.def("_alloc_double",
[](phi::DenseTensor &self, paddle::platform::CPUPlace &place) {
self.mutable_data<double>(place);
......@@ -315,13 +311,6 @@ void BindTensor(pybind11::module &m) { // NOLINT
self.mutable_data(place, framework::TransToPhiDataType(type)));
})
.def("_clear", &phi::DenseTensor::clear)
.def("_mutable_data",
[](phi::DenseTensor &self,
paddle::platform::NPUPlace &place,
paddle::framework::proto::VarType::Type type) {
return reinterpret_cast<uintptr_t>(
self.mutable_data(place, framework::TransToPhiDataType(type)));
})
.def("_copy_from",
&TensorCopyFrom<paddle::platform::CPUPlace>,
py::arg("tensor"),
......@@ -342,11 +331,6 @@ void BindTensor(pybind11::module &m) { // NOLINT
py::arg("tensor"),
py::arg("place"),
py::arg("batch_size") = -1)
.def("_copy_from",
&TensorCopyFrom<paddle::platform::NPUPlace>,
py::arg("tensor"),
py::arg("place"),
py::arg("batch_size") = -1)
.def("_copy_from",
&TensorCopyFrom<paddle::platform::CUDAPinnedPlace>,
py::arg("tensor"),
......@@ -382,11 +366,6 @@ void BindTensor(pybind11::module &m) { // NOLINT
py::arg("array"),
py::arg("place"),
py::arg("zero_copy") = false)
.def("set",
SetTensorFromPyArray<paddle::platform::NPUPlace>,
py::arg("array"),
py::arg("place"),
py::arg("zero_copy") = false)
.def("set",
SetTensorFromPyArray<paddle::platform::IPUPlace>,
py::arg("array"),
......@@ -402,7 +381,7 @@ void BindTensor(pybind11::module &m) { // NOLINT
Args:
lod (numpy.ndarray): The data to set.
place (CPUPlace|CUDAPlace|XPUPlace|IPUPlace|CUDAPinnedPlace|NPUPlace): The place where the
place (CPUPlace|CUDAPlace|XPUPlace|IPUPlace|CUDAPinnedPlace): The place where the
Tensor is to be set.
zero_copy (bool, optional): Whether to share memory with the input numpy array.
This parameter only works with CPUPlace. Default: False.
......
......@@ -629,7 +629,7 @@ class PADDLE_API Tensor final {
* unified to Tensor, but Tensor itself is heterogeneous.
*
* Tensor can generally be represented by void* and size_t, place.
* This is suitable for most scenarios including CPU, GPU, HIP, NPU, etc.,
* This is suitable for most scenarios including CPU, GPU, HIP, etc.,
* but there are a few cases where this definition cannot be described,
* such as the Tensor representation in third-party lib such as Metal,
* OpenCL, etc., as well as some special Tensor implementations, including
......
......@@ -696,8 +696,6 @@ class DeviceTracerImpl : public DeviceTracer {
event->set_device_id(r.place.GetDeviceId());
} else if (r.place.GetType() == phi::AllocationType::GPUPINNED) {
event->set_place(proto::MemEvent::CUDAPinnedPlace);
} else if (r.place.GetType() == phi::AllocationType::NPU) {
event->set_place(proto::MemEvent::NPUPlace);
} else {
PADDLE_THROW(
errors::Unimplemented("The current place is not supported."));
......
......@@ -91,9 +91,6 @@ inline std::ostream& operator<<(std::ostream& os, Backend backend) {
case Backend::XPU:
os << "XPU";
break;
case Backend::NPU:
os << "NPU";
break;
case Backend::ONEDNN:
os << "ONEDNN";
break;
......@@ -137,8 +134,6 @@ inline Backend StringToBackend(const char* backend_cstr) {
return Backend::GPU;
} else if (s == std::string("XPU")) {
return Backend::XPU;
} else if (s == std::string("NPU")) {
return Backend::NPU;
} else if (s == std::string("OneDNN")) {
return Backend::ONEDNN;
} else if (s == std::string("GPUDNN")) {
......@@ -173,8 +168,6 @@ inline std::string BackendToString(const Backend& backend) {
return "GPU";
case Backend::XPU:
return "XPU";
case Backend::NPU:
return "NPU";
case Backend::ONEDNN:
return "ONEDNN";
case Backend::GPUDNN:
......
......@@ -35,8 +35,6 @@ const char *AllocationTypeStr(AllocationType type) {
return "gpu_pinned";
case AllocationType::XPU:
return "xpu";
case AllocationType::NPU:
return "npu";
case AllocationType::NPUPINNED:
return "npu_pinned";
case AllocationType::IPU:
......@@ -76,8 +74,6 @@ Place GetPinnedPlace(const Place &place) {
case AllocationType::GPU:
return phi::GPUPinnedPlace();
break;
case AllocationType::NPU:
return phi::NPUPinnedPlace();
default:
return place;
}
......
......@@ -163,16 +163,6 @@ class XPUPlace : public Place {
: Place(AllocationType::XPU, place.GetDeviceId()) {}
};
class NPUPlace : public Place {
public:
NPUPlace() : Place(AllocationType::NPU, 0) {}
explicit NPUPlace(int device_id) : Place(AllocationType::NPU, device_id) {}
NPUPlace(const NPUPlace&) = default;
NPUPlace(const Place& place) // NOLINT
: Place(AllocationType::NPU, place.GetDeviceId()) {}
};
class NPUPinnedPlace : public Place {
public:
NPUPinnedPlace() : Place(AllocationType::NPUPINNED) {}
......@@ -220,7 +210,6 @@ namespace experimental {
using AllocationType = phi::AllocationType;
using GPUPinnedPlace = phi::GPUPinnedPlace;
using XPUPlace = phi::XPUPlace;
using NPUPlace = phi::NPUPlace;
} // namespace experimental
using AllocationType = phi::AllocationType;
......
......@@ -37,8 +37,6 @@ Backend TransToPhiBackend(const phi::Place& place) {
return Backend::GPU;
case AllocationType::XPU:
return Backend::XPU;
case AllocationType::NPU:
return Backend::NPU;
case AllocationType::IPU:
return Backend::IPU;
case AllocationType::CUSTOM:
......
......@@ -161,13 +161,6 @@ void set_constant_with_place<phi::XPUPlace>(const phi::DeviceContext& context,
#endif
}
template <>
void set_constant_with_place<phi::NPUPlace>(const phi::DeviceContext& context,
phi::DenseTensor* tensor,
float value) {
PADDLE_THROW(phi::errors::Unimplemented("NPUPlace is not supported"));
}
template <>
void set_constant_with_place<phi::NPUPinnedPlace>(
const phi::DeviceContext& context, phi::DenseTensor* tensor, float value) {
......
......@@ -348,7 +348,7 @@ def amp_guard(
or tracer._expected_place.is_custom_place()
):
warnings.warn(
'amp_guard can only be enabled on CUDAPlace, XPUPlace, NPUPlace, and CustomPlace, current place is %s, so it makes no effect.'
'amp_guard can only be enabled on CUDAPlace, XPUPlace, and CustomPlace, current place is %s, so it makes no effect.'
% tracer._expected_place
)
enable = False
......
......@@ -24,7 +24,6 @@ from ..fluid.core import CPUPlace # noqa: F401
from ..fluid.core import IPUPlace # noqa: F401
from ..fluid.core import CUDAPlace # noqa: F401
from ..fluid.core import CUDAPinnedPlace # noqa: F401
from ..fluid.core import NPUPlace # noqa: F401
from ..fluid.core import CustomPlace # noqa: F401
from ..fluid import core # noqa: F401
......
......@@ -2214,7 +2214,7 @@ def _memcpy(input, place=None, output=None):
"""
The OP copies the :attr:`input` to the :attr:`output`.
NOTE: currently, only support CUDAPlace <-> CUDAPinnedPlace or NPUPlace <-> CPUPlace.
NOTE: currently, only support CUDAPlace <-> CUDAPinnedPlace.
Parameters:
input (Tensor): A tensor. Its data type supports float16, float32, float64, int32, int64, and bool.
......
......@@ -194,14 +194,6 @@ class Timeline:
% (k, mevent.device_id),
pid,
)
elif mevent.place == profiler_pb2.MemEvent.NPUPlace:
if (k, mevent.device_id, "NPU") not in self._mem_devices:
pid = self._allocate_pid()
self._mem_devices[(k, mevent.device_id, "NPU")] = pid
self._chrome_trace.emit_pid(
"memory usage on %s:npu:%d" % (k, mevent.device_id),
pid,
)
if (k, 0, "CPU") not in self._mem_devices:
pid = self._allocate_pid()
self._mem_devices[(k, 0, "CPU")] = pid
......@@ -259,7 +251,6 @@ class Timeline:
profiler_pb2.MemEvent.CPUPlace: "CPU",
profiler_pb2.MemEvent.CUDAPlace: "GPU",
profiler_pb2.MemEvent.CUDAPinnedPlace: "CUDAPinnedPlace",
profiler_pb2.MemEvent.NPUPlace: "NPU",
}
for k, profile_pb in self._profile_dict.items():
mem_list = []
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册