未验证 提交 6aea6be2 编写于 作者: H houj04 提交者: GitHub

[NPU] support dygraph execution on npu place(#33579)

* in NPU environment, use CPUPlace for missing operators.

* in NPU environment, use CPUPlace for missing operators.

* fix TensorCopy bug and add unit test.

* fix code style.

* add more unit tests.
上级 049dd853
...@@ -278,7 +278,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -278,7 +278,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) { Tensor* dst) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
const platform::DeviceContext* dev_ctx; const platform::DeviceContext* dev_ctx;
if (platform::is_gpu_place(dst_place)) { if (platform::is_gpu_place(dst_place) || platform::is_npu_place(dst_place)) {
dev_ctx = pool.Get(dst_place); dev_ctx = pool.Get(dst_place);
} else { } else {
dev_ctx = pool.Get(src.place()); dev_ctx = pool.Get(src.place());
......
...@@ -131,6 +131,13 @@ PreparedOp PrepareImpl(const NameVarMap<VarType>& ins, ...@@ -131,6 +131,13 @@ PreparedOp PrepareImpl(const NameVarMap<VarType>& ins,
expected_kernel_key.place_ = platform::CPUPlace(); expected_kernel_key.place_ = platform::CPUPlace();
kernel_iter = kernels.find(expected_kernel_key); kernel_iter = kernels.find(expected_kernel_key);
} }
#endif
#ifdef PADDLE_WITH_ASCEND_CL
if (kernel_iter == kernels.end() &&
is_npu_place(expected_kernel_key.place_)) {
expected_kernel_key.place_ = platform::CPUPlace();
kernel_iter = kernels.find(expected_kernel_key);
}
#endif #endif
// TODO(jiabin): Add operator.cc's line 1000 part back when we need that case // TODO(jiabin): Add operator.cc's line 1000 part back when we need that case
PADDLE_ENFORCE_NE(kernel_iter, kernels.end(), PADDLE_ENFORCE_NE(kernel_iter, kernels.end(),
......
...@@ -120,6 +120,17 @@ paddle::framework::GarbageCollector* Tracer::MutableGarbageCollectorIfNotExists( ...@@ -120,6 +120,17 @@ paddle::framework::GarbageCollector* Tracer::MutableGarbageCollectorIfNotExists(
gc.reset(new framework::CPUGarbageCollector( gc.reset(new framework::CPUGarbageCollector(
BOOST_GET_CONST(platform::CPUPlace, place), 0)); BOOST_GET_CONST(platform::CPUPlace, place), 0));
VLOG(10) << "Created GarbageCollector at " << place; VLOG(10) << "Created GarbageCollector at " << place;
} else if (platform::is_npu_place(place)) {
#if defined(PADDLE_WITH_ASCEND_CL)
// TODO(zhiqiu): fix bugs and enable NPUDefaultStreamGarbageCollector.
gc.reset(new framework::NPUUnsafeFastGarbageCollector(
BOOST_GET_CONST(platform::NPUPlace, place), 0));
VLOG(10) << "Created GarbageCollector at " << place;
#else
PADDLE_THROW(platform::errors::PermissionDenied(
"Paddle can't use NPU device since it's not compiled with NPU,"
"Please recompile or reinstall Paddle with NPU support."));
#endif
} else { } else {
PADDLE_THROW(platform::errors::PreconditionNotMet( PADDLE_THROW(platform::errors::PreconditionNotMet(
"Unsupported place for garbage collection")); "Unsupported place for garbage collection"));
......
...@@ -135,12 +135,14 @@ static const platform::Place PyObjectToPlace(const py::object &place_obj) { ...@@ -135,12 +135,14 @@ static const platform::Place PyObjectToPlace(const py::object &place_obj) {
return place_obj.cast<platform::XPUPlace>(); return place_obj.cast<platform::XPUPlace>();
} else if (py::isinstance<platform::CUDAPinnedPlace>(place_obj)) { } else if (py::isinstance<platform::CUDAPinnedPlace>(place_obj)) {
return place_obj.cast<platform::CUDAPinnedPlace>(); return place_obj.cast<platform::CUDAPinnedPlace>();
} else if (py::isinstance<platform::NPUPlace>(place_obj)) {
return place_obj.cast<platform::NPUPlace>();
} else if (py::isinstance<platform::Place>(place_obj)) { } else if (py::isinstance<platform::Place>(place_obj)) {
return place_obj.cast<platform::Place>(); return place_obj.cast<platform::Place>();
} else { } else {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"Place should be one of " "Place should be one of "
"Place/CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace")); "Place/CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace"));
} }
} }
...@@ -172,9 +174,13 @@ static void InitTensorForVarBase(imperative::VarBase *self, ...@@ -172,9 +174,13 @@ static void InitTensorForVarBase(imperative::VarBase *self,
SetTensorFromPyArray<platform::CUDAPinnedPlace>( SetTensorFromPyArray<platform::CUDAPinnedPlace>(
tensor, array, BOOST_GET_CONST(platform::CUDAPinnedPlace, place), tensor, array, BOOST_GET_CONST(platform::CUDAPinnedPlace, place),
zero_copy); zero_copy);
} else if (platform::is_npu_place(place)) {
SetTensorFromPyArray<platform::NPUPlace>(
tensor, array, BOOST_GET_CONST(platform::NPUPlace, place), zero_copy);
} else { } else {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"Place should be one of CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace")); "Place should be one of "
"CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace"));
} }
if (stop_gradient != -1) { if (stop_gradient != -1) {
self->SetOverridedStopGradient(stop_gradient); self->SetOverridedStopGradient(stop_gradient);
...@@ -718,6 +724,10 @@ void BindImperative(py::module *m_ptr) { ...@@ -718,6 +724,10 @@ void BindImperative(py::module *m_ptr) {
py::arg("value"), py::arg("place"), py::arg("persistable") = false, py::arg("value"), py::arg("place"), py::arg("persistable") = false,
py::arg("zero_copy") = false, py::arg("name") = "", py::arg("zero_copy") = false, py::arg("name") = "",
py::arg("stop_gradient") = -1) py::arg("stop_gradient") = -1)
.def("__init__", &InitVarBaseFromNumpyWithArg<platform::NPUPlace>,
py::arg("value"), py::arg("place"), py::arg("persistable") = false,
py::arg("zero_copy") = false, py::arg("name") = "",
py::arg("stop_gradient") = -1)
.def("__init__", &InitVarBaseFromNumpyWithArgDefault, py::arg("value")) .def("__init__", &InitVarBaseFromNumpyWithArgDefault, py::arg("value"))
.def("__init__", &InitVarBaseFromTensorWithArgDefault, py::arg("tensor")) .def("__init__", &InitVarBaseFromTensorWithArgDefault, py::arg("tensor"))
.def("__init__", &InitVarBaseFromNumpyWithKwargs) .def("__init__", &InitVarBaseFromNumpyWithKwargs)
...@@ -1452,6 +1462,16 @@ void BindImperative(py::module *m_ptr) { ...@@ -1452,6 +1462,16 @@ void BindImperative(py::module *m_ptr) {
return new_var; return new_var;
}, },
py::return_value_policy::copy) py::return_value_policy::copy)
.def("_copy_to",
[](const std::shared_ptr<imperative::VarBase> &self,
const platform::NPUPlace &place, bool blocking) {
auto new_var = self->NewVarBase(place, blocking);
if (!blocking) {
IncreaseVarbaseReferenceCountUntilCopyComplete(self, place);
}
return new_var;
},
py::return_value_policy::copy)
.def("_copy_to", .def("_copy_to",
[](const std::shared_ptr<imperative::VarBase> &self, [](const std::shared_ptr<imperative::VarBase> &self,
const platform::Place &place, bool blocking) { const platform::Place &place, bool blocking) {
...@@ -1578,6 +1598,11 @@ void BindImperative(py::module *m_ptr) { ...@@ -1578,6 +1598,11 @@ void BindImperative(py::module *m_ptr) {
self.SetExpectedPlace(*p); self.SetExpectedPlace(*p);
VLOG(4) << "Tracer(" << &self << ")" VLOG(4) << "Tracer(" << &self << ")"
<< " set expected place " << *p; << " set expected place " << *p;
} else if (py::isinstance<platform::NPUPlace>(obj)) {
auto p = obj.cast<platform::NPUPlace *>();
self.SetExpectedPlace(*p);
VLOG(4) << "Tracer(" << &self << ")"
<< " set expected place " << *p;
} else if (py::isinstance<platform::Place>(obj)) { } else if (py::isinstance<platform::Place>(obj)) {
auto p = obj.cast<platform::Place *>(); auto p = obj.cast<platform::Place *>();
self.SetExpectedPlace(*p); self.SetExpectedPlace(*p);
...@@ -1586,7 +1611,7 @@ void BindImperative(py::module *m_ptr) { ...@@ -1586,7 +1611,7 @@ void BindImperative(py::module *m_ptr) {
} else { } else {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"Incompatible Place Type: supports XPUPlace, CUDAPlace, " "Incompatible Place Type: supports XPUPlace, CUDAPlace, "
"CPUPlace, " "CPUPlace, NPUPlace"
"and CUDAPinnedPlace, " "and CUDAPinnedPlace, "
"but got Unknown Type!")); "but got Unknown Type!"));
} }
...@@ -1647,6 +1672,19 @@ void BindImperative(py::module *m_ptr) { ...@@ -1647,6 +1672,19 @@ void BindImperative(py::module *m_ptr) {
std::move(attrs), place, trace_backward); std::move(attrs), place, trace_backward);
} }
}) })
.def("trace",
[](imperative::Tracer &self, const std::string &type,
const PyNameVarBaseMap &ins, const PyNameVarBaseMap &outs,
framework::AttributeMap attrs, const platform::NPUPlace &place,
bool trace_backward) {
auto ins_map = ConvertToNameVarBaseMap(ins);
auto outs_map = ConvertToNameVarBaseMap(outs);
{
py::gil_scoped_release release;
self.TraceOp(type, std::move(ins_map), std::move(outs_map),
std::move(attrs), place, trace_backward);
}
})
.def("trace", .def("trace",
[](imperative::Tracer &self, const std::string &type, [](imperative::Tracer &self, const std::string &type,
const PyNameVarBaseMap &ins, const PyNameVarBaseMap &outs, const PyNameVarBaseMap &ins, const PyNameVarBaseMap &outs,
...@@ -1704,6 +1742,7 @@ void BindImperative(py::module *m_ptr) { ...@@ -1704,6 +1742,7 @@ void BindImperative(py::module *m_ptr) {
m.def("varbase_copy", &VarBaseCopy<platform::CUDAPlace>); m.def("varbase_copy", &VarBaseCopy<platform::CUDAPlace>);
m.def("varbase_copy", &VarBaseCopy<platform::XPUPlace>); m.def("varbase_copy", &VarBaseCopy<platform::XPUPlace>);
m.def("varbase_copy", &VarBaseCopy<platform::CUDAPinnedPlace>); m.def("varbase_copy", &VarBaseCopy<platform::CUDAPinnedPlace>);
m.def("varbase_copy", &VarBaseCopy<platform::NPUPlace>);
m.def( m.def(
"dygraph_partial_grad", "dygraph_partial_grad",
...@@ -1804,6 +1843,12 @@ void BindImperative(py::module *m_ptr) { ...@@ -1804,6 +1843,12 @@ void BindImperative(py::module *m_ptr) {
const py::args args, const py::kwargs kwargs) { const py::args args, const py::kwargs kwargs) {
return imperative::PyLayerApply(place, cls, args, kwargs); return imperative::PyLayerApply(place, cls, args, kwargs);
}); });
m.def("pylayer_apply",
[](const platform::NPUPlace &place, const py::object &cls,
const py::args args, const py::kwargs kwargs) {
return imperative::PyLayerApply(place, cls, args, kwargs);
});
} }
} // namespace pybind } // namespace pybind
......
...@@ -246,6 +246,9 @@ class TestVarBase(unittest.TestCase): ...@@ -246,6 +246,9 @@ class TestVarBase(unittest.TestCase):
_test_place("gpu_pinned") _test_place("gpu_pinned")
_test_place(core.CUDAPlace(0)) _test_place(core.CUDAPlace(0))
_test_place("gpu:0") _test_place("gpu:0")
if core.is_compiled_with_npu():
_test_place(core.NPUPlace(0))
_test_place("npu:0")
def test_to_tensor_not_change_input_stop_gradient(self): def test_to_tensor_not_change_input_stop_gradient(self):
with paddle.fluid.dygraph.guard(core.CPUPlace()): with paddle.fluid.dygraph.guard(core.CPUPlace()):
......
...@@ -102,11 +102,10 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True): ...@@ -102,11 +102,10 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True):
place = _get_paddle_place(place) place = _get_paddle_place(place)
if place is None: if place is None:
place = _current_expected_place() place = _current_expected_place()
elif not isinstance( elif not isinstance(place, (core.Place, core.CPUPlace, core.CUDAPinnedPlace,
place, core.CUDAPlace, core.NPUPlace)):
(core.Place, core.CPUPlace, core.CUDAPinnedPlace, core.CUDAPlace)):
raise ValueError( raise ValueError(
"'place' must be any of paddle.Place, paddle.CPUPlace, paddle.CUDAPinnedPlace, paddle.CUDAPlace" "'place' must be any of paddle.Place, paddle.CPUPlace, paddle.CUDAPinnedPlace, paddle.CUDAPlace, paddle.NPUPlace"
) )
#Todo(zhouwei): Support allocate tensor on any other specified card #Todo(zhouwei): Support allocate tensor on any other specified card
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册