未验证 提交 6aea6be2 编写于 作者: H houj04 提交者: GitHub

[NPU] support dygraph execution on npu place(#33579)

* in NPU environment, use CPUPlace for missing operators.

* in NPU environment, use CPUPlace for missing operators.

* fix TensorCopy bug and add unit test.

* fix code style.

* add more unit tests.
上级 049dd853
......@@ -278,7 +278,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
const platform::DeviceContext* dev_ctx;
if (platform::is_gpu_place(dst_place)) {
if (platform::is_gpu_place(dst_place) || platform::is_npu_place(dst_place)) {
dev_ctx = pool.Get(dst_place);
} else {
dev_ctx = pool.Get(src.place());
......
......@@ -131,6 +131,13 @@ PreparedOp PrepareImpl(const NameVarMap<VarType>& ins,
expected_kernel_key.place_ = platform::CPUPlace();
kernel_iter = kernels.find(expected_kernel_key);
}
#endif
#ifdef PADDLE_WITH_ASCEND_CL
if (kernel_iter == kernels.end() &&
is_npu_place(expected_kernel_key.place_)) {
expected_kernel_key.place_ = platform::CPUPlace();
kernel_iter = kernels.find(expected_kernel_key);
}
#endif
// TODO(jiabin): Add operator.cc's line 1000 part back when we need that case
PADDLE_ENFORCE_NE(kernel_iter, kernels.end(),
......
......@@ -120,6 +120,17 @@ paddle::framework::GarbageCollector* Tracer::MutableGarbageCollectorIfNotExists(
gc.reset(new framework::CPUGarbageCollector(
BOOST_GET_CONST(platform::CPUPlace, place), 0));
VLOG(10) << "Created GarbageCollector at " << place;
} else if (platform::is_npu_place(place)) {
#if defined(PADDLE_WITH_ASCEND_CL)
// TODO(zhiqiu): fix bugs and enable NPUDefaultStreamGarbageCollector.
gc.reset(new framework::NPUUnsafeFastGarbageCollector(
BOOST_GET_CONST(platform::NPUPlace, place), 0));
VLOG(10) << "Created GarbageCollector at " << place;
#else
PADDLE_THROW(platform::errors::PermissionDenied(
"Paddle can't use NPU device since it's not compiled with NPU,"
"Please recompile or reinstall Paddle with NPU support."));
#endif
} else {
PADDLE_THROW(platform::errors::PreconditionNotMet(
"Unsupported place for garbage collection"));
......
......@@ -135,12 +135,14 @@ static const platform::Place PyObjectToPlace(const py::object &place_obj) {
return place_obj.cast<platform::XPUPlace>();
} else if (py::isinstance<platform::CUDAPinnedPlace>(place_obj)) {
return place_obj.cast<platform::CUDAPinnedPlace>();
} else if (py::isinstance<platform::NPUPlace>(place_obj)) {
return place_obj.cast<platform::NPUPlace>();
} else if (py::isinstance<platform::Place>(place_obj)) {
return place_obj.cast<platform::Place>();
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Place should be one of "
"Place/CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace"));
"Place/CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace"));
}
}
......@@ -172,9 +174,13 @@ static void InitTensorForVarBase(imperative::VarBase *self,
SetTensorFromPyArray<platform::CUDAPinnedPlace>(
tensor, array, BOOST_GET_CONST(platform::CUDAPinnedPlace, place),
zero_copy);
} else if (platform::is_npu_place(place)) {
SetTensorFromPyArray<platform::NPUPlace>(
tensor, array, BOOST_GET_CONST(platform::NPUPlace, place), zero_copy);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Place should be one of CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace"));
"Place should be one of "
"CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace"));
}
if (stop_gradient != -1) {
self->SetOverridedStopGradient(stop_gradient);
......@@ -718,6 +724,10 @@ void BindImperative(py::module *m_ptr) {
py::arg("value"), py::arg("place"), py::arg("persistable") = false,
py::arg("zero_copy") = false, py::arg("name") = "",
py::arg("stop_gradient") = -1)
.def("__init__", &InitVarBaseFromNumpyWithArg<platform::NPUPlace>,
py::arg("value"), py::arg("place"), py::arg("persistable") = false,
py::arg("zero_copy") = false, py::arg("name") = "",
py::arg("stop_gradient") = -1)
.def("__init__", &InitVarBaseFromNumpyWithArgDefault, py::arg("value"))
.def("__init__", &InitVarBaseFromTensorWithArgDefault, py::arg("tensor"))
.def("__init__", &InitVarBaseFromNumpyWithKwargs)
......@@ -1452,6 +1462,16 @@ void BindImperative(py::module *m_ptr) {
return new_var;
},
py::return_value_policy::copy)
.def("_copy_to",
[](const std::shared_ptr<imperative::VarBase> &self,
const platform::NPUPlace &place, bool blocking) {
auto new_var = self->NewVarBase(place, blocking);
if (!blocking) {
IncreaseVarbaseReferenceCountUntilCopyComplete(self, place);
}
return new_var;
},
py::return_value_policy::copy)
.def("_copy_to",
[](const std::shared_ptr<imperative::VarBase> &self,
const platform::Place &place, bool blocking) {
......@@ -1578,6 +1598,11 @@ void BindImperative(py::module *m_ptr) {
self.SetExpectedPlace(*p);
VLOG(4) << "Tracer(" << &self << ")"
<< " set expected place " << *p;
} else if (py::isinstance<platform::NPUPlace>(obj)) {
auto p = obj.cast<platform::NPUPlace *>();
self.SetExpectedPlace(*p);
VLOG(4) << "Tracer(" << &self << ")"
<< " set expected place " << *p;
} else if (py::isinstance<platform::Place>(obj)) {
auto p = obj.cast<platform::Place *>();
self.SetExpectedPlace(*p);
......@@ -1586,7 +1611,7 @@ void BindImperative(py::module *m_ptr) {
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Incompatible Place Type: supports XPUPlace, CUDAPlace, "
"CPUPlace, "
"CPUPlace, NPUPlace"
"and CUDAPinnedPlace, "
"but got Unknown Type!"));
}
......@@ -1647,6 +1672,19 @@ void BindImperative(py::module *m_ptr) {
std::move(attrs), place, trace_backward);
}
})
.def("trace",
[](imperative::Tracer &self, const std::string &type,
const PyNameVarBaseMap &ins, const PyNameVarBaseMap &outs,
framework::AttributeMap attrs, const platform::NPUPlace &place,
bool trace_backward) {
auto ins_map = ConvertToNameVarBaseMap(ins);
auto outs_map = ConvertToNameVarBaseMap(outs);
{
py::gil_scoped_release release;
self.TraceOp(type, std::move(ins_map), std::move(outs_map),
std::move(attrs), place, trace_backward);
}
})
.def("trace",
[](imperative::Tracer &self, const std::string &type,
const PyNameVarBaseMap &ins, const PyNameVarBaseMap &outs,
......@@ -1704,6 +1742,7 @@ void BindImperative(py::module *m_ptr) {
m.def("varbase_copy", &VarBaseCopy<platform::CUDAPlace>);
m.def("varbase_copy", &VarBaseCopy<platform::XPUPlace>);
m.def("varbase_copy", &VarBaseCopy<platform::CUDAPinnedPlace>);
m.def("varbase_copy", &VarBaseCopy<platform::NPUPlace>);
m.def(
"dygraph_partial_grad",
......@@ -1804,6 +1843,12 @@ void BindImperative(py::module *m_ptr) {
const py::args args, const py::kwargs kwargs) {
return imperative::PyLayerApply(place, cls, args, kwargs);
});
m.def("pylayer_apply",
[](const platform::NPUPlace &place, const py::object &cls,
const py::args args, const py::kwargs kwargs) {
return imperative::PyLayerApply(place, cls, args, kwargs);
});
}
} // namespace pybind
......
......@@ -246,6 +246,9 @@ class TestVarBase(unittest.TestCase):
_test_place("gpu_pinned")
_test_place(core.CUDAPlace(0))
_test_place("gpu:0")
if core.is_compiled_with_npu():
_test_place(core.NPUPlace(0))
_test_place("npu:0")
def test_to_tensor_not_change_input_stop_gradient(self):
with paddle.fluid.dygraph.guard(core.CPUPlace()):
......
......@@ -102,11 +102,10 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True):
place = _get_paddle_place(place)
if place is None:
place = _current_expected_place()
elif not isinstance(
place,
(core.Place, core.CPUPlace, core.CUDAPinnedPlace, core.CUDAPlace)):
elif not isinstance(place, (core.Place, core.CPUPlace, core.CUDAPinnedPlace,
core.CUDAPlace, core.NPUPlace)):
raise ValueError(
"'place' must be any of paddle.Place, paddle.CPUPlace, paddle.CUDAPinnedPlace, paddle.CUDAPlace"
"'place' must be any of paddle.Place, paddle.CPUPlace, paddle.CUDAPinnedPlace, paddle.CUDAPlace, paddle.NPUPlace"
)
#Todo(zhouwei): Support allocate tensor on any other specified card
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册