diff --git a/imperative/python/megengine/__init__.py b/imperative/python/megengine/__init__.py index a980024a615620d74c3e686d56fc475dccd41a2f..cee58727ef11296eeb4920c03a81140f6f168902 100644 --- a/imperative/python/megengine/__init__.py +++ b/imperative/python/megengine/__init__.py @@ -76,7 +76,7 @@ if sys.platform == "win32": kernel32.SetErrorMode(old_error_mode) from .core._imperative_rt.core2 import close as _close -from .core._imperative_rt.core2 import full_sync as sync +from .core._imperative_rt.core2 import full_sync as _full_sync from .core._imperative_rt.core2 import sync as _sync from .core._imperative_rt.utils import _set_fork_exec_path_for_timed_func from .config import * diff --git a/imperative/python/megengine/device.py b/imperative/python/megengine/device.py index e5f92b993b4713891c09cfe6f06db65bc7689b80..9ba1c4d640f87ff76be2263048860dfd5ee4a018 100644 --- a/imperative/python/megengine/device.py +++ b/imperative/python/megengine/device.py @@ -165,7 +165,7 @@ def get_cuda_compute_capability(device: int, device_type=DeviceType.CUDA) -> int def get_allocated_memory(device: Optional[str] = None): r"""Returns the current memory occupied by tensors on the computing device in bytes. - Due to the asynchronous execution of MegEngine, please call megengine.sync + Due to the asynchronous execution of MegEngine, please call megengine._full_sync before calling this function in order to get accurate value. """ if device is None: @@ -176,7 +176,7 @@ def get_allocated_memory(device: Optional[str] = None): def get_reserved_memory(device: Optional[str] = None): r"""Returns the current memory managed by the caching allocator on the computing device in bytes. - Due to the asynchronous execution of MegEngine, please call megengine.sync + Due to the asynchronous execution of MegEngine, please call megengine._full_sync before calling this function in order to get accurate value. """ if device is None: @@ -187,7 +187,7 @@ def get_reserved_memory(device: Optional[str] = None): def get_max_reserved_memory(device: Optional[str] = None): r"""Returns the maximum memory managed by the caching allocator on the computing device in bytes. - Due to the asynchronous execution of MegEngine, please call megengine.sync + Due to the asynchronous execution of MegEngine, please call megengine._full_sync before calling this function in order to get accurate value. """ if device is None: @@ -198,7 +198,7 @@ def get_max_reserved_memory(device: Optional[str] = None): def get_max_allocated_memory(device: Optional[str] = None): r"""Returns the maximum memory occupied by tensors on the computing device in bytes. - Due to the asynchronous execution of MegEngine, please call megengine.sync + Due to the asynchronous execution of MegEngine, please call megengine._full_sync before calling this function in order to get accurate value. """ if device is None: @@ -209,7 +209,7 @@ def get_max_allocated_memory(device: Optional[str] = None): def reset_max_memory_stats(device: Optional[str] = None): r"""Resets the maximum stats on the computing device. - Due to the asynchronous execution of MegEngine, please call megengine.sync + Due to the asynchronous execution of MegEngine, please call megengine._full_sync before calling this function in order to properly reset memory stats. """ if device is None: @@ -255,7 +255,7 @@ def coalesce_free_memory(): small pieces may not be returned. because of the async processing of megengine, the effect of this func may not be reflected - immediately. if you want to see the effect immediately, you can call megengine.sync after + immediately. if you want to see the effect immediately, you can call megengine._full_sync after this func was called .. note:: diff --git a/imperative/python/megengine/tools/benchmark_op.py b/imperative/python/megengine/tools/benchmark_op.py index e178dddfde50ed0af41bfeca66ff6f7753108c39..19bf0ff9e3d0df8a418cd0b3beaa09fff8be8daf 100644 --- a/imperative/python/megengine/tools/benchmark_op.py +++ b/imperative/python/megengine/tools/benchmark_op.py @@ -400,14 +400,14 @@ test_cases = [ def perf_func(func, inps, reps, unpack_inps, is_mge): if is_mge: - mge.sync() + mge._full_sync() tik = time.time() for _ in range(reps): if unpack_inps: out = func(*inps) else: out = func(inps) - mge.sync() + mge._full_sync() else: torch.cuda.synchronize() with torch.no_grad():