diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index ee4edbbaa067b1dc91bb9989dad2bdb82cefacb7..b0148e50afc5483ffd706858345bb4b3e4a964f8 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -1849,19 +1849,32 @@ All parameter, weight, gradient are variables in Paddle. }) .def("run", [](OperatorBase &self, const Scope &scope, - const platform::CPUPlace &place) { self.Run(scope, place); }) + const platform::CPUPlace &place) { + pybind11::gil_scoped_release release; + self.Run(scope, place); + }) .def("run", [](OperatorBase &self, const Scope &scope, - const platform::XPUPlace &place) { self.Run(scope, place); }) + const platform::XPUPlace &place) { + pybind11::gil_scoped_release release; + self.Run(scope, place); + }) .def("run", [](OperatorBase &self, const Scope &scope, - const platform::NPUPlace &place) { self.Run(scope, place); }) + const platform::NPUPlace &place) { + pybind11::gil_scoped_release release; + self.Run(scope, place); + }) .def("run", [](OperatorBase &self, const Scope &scope, - const platform::CUDAPlace &place) { self.Run(scope, place); }) + const platform::CUDAPlace &place) { + pybind11::gil_scoped_release release; + self.Run(scope, place); + }) .def("run", [](OperatorBase &self, const Scope &scope, const platform::CUDAPinnedPlace &place) { + pybind11::gil_scoped_release release; self.Run(scope, place); }) .def("type", diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index 68e6e049cdbb0cd508536741c4902143f65f8f76..c5d0afb9a1716ab711c952140f7fa6e0c5c8d62a 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -216,6 +216,7 @@ T TensorGetElement(const framework::Tensor &self, size_t offset) { PADDLE_ENFORCE_LT(offset, self.numel(), platform::errors::InvalidArgument( "The offset exceeds the size of tensor.")); + T b = static_cast(0); if (platform::is_cpu_place(self.place())) { b = self.data()[offset]; @@ -231,8 +232,17 @@ T TensorGetElement(const framework::Tensor &self, size_t offset) { auto p = BOOST_GET_CONST(platform::CUDAPlace, self.place()); paddle::memory::Copy(platform::CPUPlace(), &b, p, a + offset, sizeof(T), nullptr); +#endif + } else if (platform::is_npu_place(self.place())) { +#if defined(PADDLE_WITH_ASCEND_CL) + const T *a = self.data(); + auto p = BOOST_GET_CONST(platform::NPUPlace, self.place()); + paddle::memory::Copy(platform::CPUPlace(), &b, p, a + offset, sizeof(T), + nullptr); #endif } + VLOG(10) << "TensorGetElement, place: " << self.place() + << ", offset: " << offset << ", element: " << b; return b; } @@ -241,6 +251,8 @@ void TensorSetElement(framework::Tensor *self, size_t offset, T elem) { PADDLE_ENFORCE_LT(offset, self->numel(), platform::errors::InvalidArgument( "The offset exceeds the size of tensor.")); + VLOG(10) << "TensorSetElement, place: " << self->place() + << ", offset: " << offset << ", element: " << elem; if (platform::is_cpu_place(self->place())) { self->mutable_data(self->place())[offset] = elem; } else if (platform::is_xpu_place(self->place())) { @@ -255,6 +267,13 @@ void TensorSetElement(framework::Tensor *self, size_t offset, T elem) { T *a = self->mutable_data(p); paddle::memory::Copy(p, a + offset, platform::CPUPlace(), &elem, sizeof(T), nullptr); +#endif + } else if (platform::is_npu_place(self->place())) { +#if defined(PADDLE_WITH_ASCEND_CL) + auto p = BOOST_GET_CONST(platform::NPUPlace, self->place()); + T *a = self->mutable_data(p); + paddle::memory::Copy(p, a + offset, platform::CPUPlace(), &elem, sizeof(T), + nullptr); #endif } } @@ -676,7 +695,7 @@ inline py::array TensorToPyArray(const framework::Tensor &tensor, size_t numel = 1; for (int i = tensor_dims.size() - 1; i >= 0; --i) { - py_dims[i] = (size_t)tensor_dims[i]; + py_dims[i] = static_cast(tensor_dims[i]); py_strides[i] = sizeof_dtype * numel; numel *= py_dims[i]; } diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index 2161f367007c75b78b1d18ab62666b4e8e1e5def..cd0d624eb40bc2af0ca3be6e8e87ad30fc144d53 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -1491,18 +1491,9 @@ class OpTest(unittest.TestCase): if not type(output_names) is list: output_names = [output_names] - # FIXME: Replace numeric_place with place to calculate numeric_grads. - # NOTE(liym27): There is an unknown error when call op.run() on NPUPlace, which - # needs to be fixed. - if hasattr(self.__class__, - "use_npu") and self.__class__.use_npu == True: - numeric_place = paddle.CPUPlace() - else: - numeric_place = place - numeric_grads = user_defined_grads or [ get_numeric_gradient( - numeric_place, + place, self.scope, self.op, self.inputs,