diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 3aa47c9e092f7a9327ff25b818a002003396cae6..7b17899d3da2383d7aef0593cd7f40c529afbdaf 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -167,7 +167,7 @@ endif() if (WITH_ASCEND_CL) cc_test(range_op_npu_test SRCS range_op_npu_test.cc DEPS op_registry range_op scope device_context enforce executor) - cc_test(expand_op_npu_test SRCS expand_op_npu_test.cc DEPS op_registry expand_op scope device_context enforce executor compare_op) + cc_test(expand_op_npu_test SRCS expand_op_npu_test.cc DEPS op_registry expand_op eigen_cc_function scope device_context enforce executor compare_op) endif() set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") diff --git a/paddle/fluid/platform/npu_info.cc b/paddle/fluid/platform/npu_info.cc index 3814faa7662fc556bc84d61802772b7e7db7ad74..bb36eedb83238149cc38bdfc8e2033834140c7d1 100644 --- a/paddle/fluid/platform/npu_info.cc +++ b/paddle/fluid/platform/npu_info.cc @@ -190,6 +190,8 @@ void NPUMemcpySync(void *dst, const void *src, size_t count, enum aclrtMemcpyKind kind, size_t dst_max_count) { // NOTE(zhiqiu): The default max_count is count dst_max_count = dst_max_count ? dst_max_count : count; + VLOG(4) << dst << " " << dst_max_count << " " << src << " " << count << " " + << kind; PADDLE_ENFORCE_NPU_SUCCESS(aclrtMemcpy(dst, dst_max_count, src, count, kind)); } diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 038bcc7f850990feaf4ce8f2df41f2b39dd247df..f315323ed592efc396ae83060dacdf34ce8ac73e 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -2178,9 +2178,7 @@ All parameter, weight, gradient are variables in Paddle. #ifdef PADDLE_WITH_ASCEND_CL m.def("get_npu_device_count", platform::GetNPUDeviceCount); - m.def("_npu_finalize", []() { - platform::AclInstance::Instance().Finalize(); - }); // private interface + m.def("npu_finalize", []() { platform::AclInstance::Instance().Finalize(); }); py::class_(m, "NPUProfConfigWrapper"); diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 6dd1478dc1f45fb388fbc4ca978db30522e058d4..439b5c64615c06827f77cee1d9fd85ca566eebdd 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -15,6 +15,7 @@ from __future__ import print_function import os import sys +import atexit # The legacy core need to be removed before "import core", # in case of users installing paddlepadde without -U option @@ -255,3 +256,8 @@ def __bootstrap__(): monkey_patch_variable() __bootstrap__() monkey_patch_varbase() + +# NOTE(zhiqiu): register npu_finalize on the exit of Python, +# do some clean up manually. +if core.is_compiled_with_npu(): + atexit.register(core.npu_finalize)