From 10325a82e1032c3397b6f6611f558eb18ede0b07 Mon Sep 17 00:00:00 2001 From: chenjian Date: Tue, 8 Mar 2022 09:55:10 +0800 Subject: [PATCH] add python profiler package (#40065) * add python profiler package * update according to review * fix bug * fix bug * fix bug * add unit test * Revert "add unit test" This reverts commit 4e69ff71b0645e069afe5dd8fea0d07717852c48. * reduce for pr * add unit test * modify for pr * fix unittest * update for ci coverage * modify according to review * fix bug * improve coverage --- paddle/fluid/platform/profiler.cc | 4 + paddle/fluid/platform/profiler.h | 1 + paddle/fluid/pybind/CMakeLists.txt | 2 +- paddle/fluid/pybind/pybind.cc | 85 ++++ python/paddle/fluid/core.py | 2 + .../fluid/tests/unittests/test_newprofiler.py | 129 +++++ python/paddle/profiler/__init__.py | 26 + python/paddle/profiler/profiler.py | 469 ++++++++++++++++++ python/paddle/profiler/profiler_statistic.py | 31 ++ python/paddle/profiler/utils.py | 90 ++++ python/setup.py.in | 1 + 11 files changed, 839 insertions(+), 1 deletion(-) create mode 100755 python/paddle/fluid/tests/unittests/test_newprofiler.py create mode 100644 python/paddle/profiler/__init__.py create mode 100644 python/paddle/profiler/profiler.py create mode 100644 python/paddle/profiler/profiler_statistic.py create mode 100644 python/paddle/profiler/utils.py diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index 866bf3c66aa..feb72bce72b 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -489,6 +489,10 @@ void NvprofDisableRecordEvent() { g_enable_nvprof_hook = false; } void EnableHostEventRecorder() { FLAGS_enable_host_event_recorder_hook = true; } +void DisableHostEventRecorder() { + FLAGS_enable_host_event_recorder_hook = false; +} + std::string PrintHostEvents() { std::ostringstream oss; auto host_evt_sec = HostEventRecorder::GetInstance().GatherEvents(); diff --git a/paddle/fluid/platform/profiler.h b/paddle/fluid/platform/profiler.h index 122e19b7c28..78275341cbb 100644 --- a/paddle/fluid/platform/profiler.h +++ b/paddle/fluid/platform/profiler.h @@ -216,6 +216,7 @@ void NvprofEnableRecordEvent(); void NvprofDisableRecordEvent(); void EnableHostEventRecorder(); +void DisableHostEventRecorder(); // Defined for UT std::string PrintHostEvents(); diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 5e61133510d..7ff501ef43d 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -2,7 +2,7 @@ set(PYBIND_DEPS init pybind python proto_desc memory executor fleet_wrapper box_ feed_fetch_method pass generate_pass pass_builder parallel_executor profiler layer tracer engine scope_pool analysis_predictor imperative_profiler imperative_flag save_load_util dlpack_tensor device_context gloo_wrapper infer_io_utils heter_wrapper generator op_version_registry ps_gpu_wrapper custom_operator - cost_model cuda_graph_with_memory_pool fleet_executor global_utils phi_utils tcp_store) + cost_model cuda_graph_with_memory_pool fleet_executor global_utils phi_utils tcp_store new_profiler) if (WITH_PSCORE) set(PYBIND_DEPS ${PYBIND_DEPS} ps_service) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 0a1cf604d2e..fcfc3e6a379 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -78,6 +78,9 @@ limitations under the License. */ #include "paddle/fluid/platform/monitor.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/platform/profiler/event_python.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" +#include "paddle/fluid/platform/profiler/profiler.h" #include "paddle/fluid/pybind/cuda_streams_py.h" #include "paddle/fluid/pybind/distributed_py.h" #include "paddle/fluid/pybind/eager.h" @@ -2913,6 +2916,88 @@ All parameter, weight, gradient are variables in Paddle. }); m.def("size_of_dtype", framework::SizeOfType); + py::class_(m, "_ProfilerResult") + .def(py::init<>()) + .def("get_data", &paddle::platform::ProfilerResult::GetData, + py::return_value_policy::automatic_reference) + .def("save", &paddle::platform::ProfilerResult::Save) + .def("get_extra_info", &paddle::platform::ProfilerResult::GetExtraInfo); + + py::class_(m, "DevicePythonNode") + .def(py::init<>()) + .def_readwrite("name", &paddle::platform::DevicePythonNode::name) + .def_readwrite("type", &paddle::platform::DevicePythonNode::type) + .def_readwrite("start_ns", &paddle::platform::DevicePythonNode::start_ns) + .def_readwrite("end_ns", &paddle::platform::DevicePythonNode::end_ns) + .def_readwrite("device_id", + &paddle::platform::DevicePythonNode::device_id) + .def_readwrite("context_id", + &paddle::platform::DevicePythonNode::context_id) + .def_readwrite("stream_id", + &paddle::platform::DevicePythonNode::stream_id); + + py::class_(m, "HostPythonNode") + .def(py::init<>()) + .def_readwrite("name", &paddle::platform::HostPythonNode::name) + .def_readwrite("type", &paddle::platform::HostPythonNode::type) + .def_readwrite("start_ns", &paddle::platform::HostPythonNode::start_ns) + .def_readwrite("end_ns", &paddle::platform::HostPythonNode::end_ns) + .def_readwrite("process_id", + &paddle::platform::HostPythonNode::process_id) + .def_readwrite("thread_id", &paddle::platform::HostPythonNode::thread_id) + .def_readwrite("children_node", + &paddle::platform::HostPythonNode::children_node_ptrs) + .def_readwrite("runtime_node", + &paddle::platform::HostPythonNode::runtime_node_ptrs) + .def_readwrite("device_node", + &paddle::platform::HostPythonNode::device_node_ptrs); + + py::class_(m, "_Profiler") + .def("create", &paddle::platform::Profiler::Create, + py::return_value_policy::take_ownership) + .def("prepare", + [](paddle::platform::Profiler *profiler) { + platform::EnableHostEventRecorder(); + profiler->Prepare(); + }) + .def("start", &paddle::platform::Profiler::Start) + .def("stop", + [](paddle::platform::Profiler *profiler) { + platform::DisableHostEventRecorder(); + return profiler->Stop(); + }, + py::return_value_policy::automatic_reference); + + py::class_(m, "ProfilerOptions") + .def(py::init<>()) + .def_readwrite("trace_switch", + &paddle::platform::ProfilerOptions::trace_switch); + + py::class_(m, "_RecordEvent") + .def(py::init([](std::string name, platform::TracerEventType type) { + return std::make_unique( + name, type, 1, paddle::platform::EventRole::kOrdinary); + })) + .def("end", [](platform::RecordEvent *event) { event->End(); }); + + py::enum_(m, "TracerEventType") + .value("Operator", paddle::platform::TracerEventType::Operator) + .value("Dataloader", paddle::platform::TracerEventType::Dataloader) + .value("ProfileStep", paddle::platform::TracerEventType::ProfileStep) + .value("CudaRuntime", paddle::platform::TracerEventType::CudaRuntime) + .value("Kernel", paddle::platform::TracerEventType::Kernel) + .value("Memcpy", paddle::platform::TracerEventType::Memcpy) + .value("Memset", paddle::platform::TracerEventType::Memset) + .value("UserDefined", paddle::platform::TracerEventType::UserDefined) + .value("OperatorInner", paddle::platform::TracerEventType::OperatorInner) + .value("Forward", paddle::platform::TracerEventType::Forward) + .value("Backward", paddle::platform::TracerEventType::Backward) + .value("Optimization", paddle::platform::TracerEventType::Optimization) + .value("Communication", paddle::platform::TracerEventType::Communication) + .value("PythonOp", paddle::platform::TracerEventType::PythonOp) + .value("PythonUserDefined", + paddle::platform::TracerEventType::PythonUserDefined); + m.def("load_profiler_result", &paddle::platform::LoadProfilerResult); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) m.def("set_cublas_switch", platform::SetAllowTF32Cublas); diff --git a/python/paddle/fluid/core.py b/python/paddle/fluid/core.py index 5e023e9248c..617ab630528 100644 --- a/python/paddle/fluid/core.py +++ b/python/paddle/fluid/core.py @@ -283,6 +283,7 @@ if avx_supported(): from .core_avx import _set_cached_executor_build_strategy from .core_avx import _device_synchronize from .core_avx import _get_current_stream + from .core_avx import _Profiler, _ProfilerResult, _RecordEvent from .core_avx import _set_current_stream if sys.platform != 'win32': from .core_avx import _set_process_pids @@ -344,6 +345,7 @@ if load_noavx: from .core_noavx import _device_synchronize from .core_noavx import _get_current_stream from .core_noavx import _set_current_stream + from .core_noavx import _Profiler, _ProfilerResult, _RecordEvent if sys.platform != 'win32': from .core_noavx import _set_process_pids from .core_noavx import _erase_process_pids diff --git a/python/paddle/fluid/tests/unittests/test_newprofiler.py b/python/paddle/fluid/tests/unittests/test_newprofiler.py new file mode 100755 index 00000000000..12fb0fa61b0 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_newprofiler.py @@ -0,0 +1,129 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np + +import paddle +import paddle.profiler as profiler + + +class TestProfiler(unittest.TestCase): + def test_profiler(self): + def my_trace_back(prof): + profiler.export_chrome_tracing('./test_profiler_chrometracing/')( + prof) + profiler.export_protobuf('./test_profiler_pb/')(prof) + + x_value = np.random.randn(2, 3, 3) + x = paddle.to_tensor( + x_value, stop_gradient=False, place=paddle.CPUPlace()) + y = x / 2.0 + ones_like_y = paddle.ones_like(y) + with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU], ) as prof: + y = x / 2.0 + prof = None + with profiler.Profiler( + targets=[profiler.ProfilerTarget.CPU], + scheduler=(1, 2)) as prof: + with profiler.RecordEvent(name='test'): + y = x / 2.0 + prof = None + with profiler.Profiler( + targets=[profiler.ProfilerTarget.CPU], + scheduler=profiler.make_scheduler( + closed=0, ready=1, record=1, repeat=1), + on_trace_ready=my_trace_back) as prof: + y = x / 2.0 + prof = None + with profiler.Profiler( + targets=[profiler.ProfilerTarget.CPU], + scheduler=profiler.make_scheduler( + closed=0, ready=0, record=2, repeat=1), + on_trace_ready=my_trace_back) as prof: + for i in range(3): + y = x / 2.0 + prof.step() + prof = None + with profiler.Profiler( + targets=[profiler.ProfilerTarget.CPU], + scheduler=lambda x: profiler.ProfilerState.RECORD_AND_RETURN, + on_trace_ready=my_trace_back) as prof: + for i in range(2): + y = x / 2.0 + prof.step() + + def my_sheduler(num_step): + if num_step % 5 < 2: + return profiler.ProfilerState.RECORD_AND_RETURN + elif num_step % 5 < 3: + return profiler.ProfilerState.READY + elif num_step % 5 < 4: + return profiler.ProfilerState.RECORD + else: + return profiler.ProfilerState.CLOSED + + def my_sheduler1(num_step): + if num_step % 5 < 2: + return profiler.ProfilerState.RECORD + elif num_step % 5 < 3: + return profiler.ProfilerState.READY + elif num_step % 5 < 4: + return profiler.ProfilerState.RECORD + else: + return profiler.ProfilerState.CLOSED + + prof = None + with profiler.Profiler( + targets=[profiler.ProfilerTarget.CPU], + scheduler=lambda x: profiler.ProfilerState.RECORD_AND_RETURN, + on_trace_ready=my_trace_back) as prof: + for i in range(2): + y = x / 2.0 + prof.step() + prof = None + with profiler.Profiler( + targets=[profiler.ProfilerTarget.CPU], + scheduler=my_sheduler, + on_trace_ready=my_trace_back) as prof: + for i in range(5): + y = x / 2.0 + prof.step() + prof = None + with profiler.Profiler( + targets=[profiler.ProfilerTarget.CPU], + scheduler=my_sheduler1) as prof: + for i in range(5): + y = x / 2.0 + prof.step() + prof = None + with profiler.Profiler( + targets=[profiler.ProfilerTarget.CPU], + scheduler=profiler.make_scheduler( + closed=1, ready=1, record=2, repeat=1, skip_first=1), + on_trace_ready=my_trace_back) as prof: + for i in range(5): + y = x / 2.0 + paddle.grad(outputs=y, inputs=[x], grad_outputs=ones_like_y) + prof.step() + + prof.export(path='./test_profiler_pb.pb', format='pb') + prof.summary() + result = profiler.utils.load_profiler_result('./test_profiler_pb.pb') + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/profiler/__init__.py b/python/paddle/profiler/__init__.py new file mode 100644 index 00000000000..4999e703f2a --- /dev/null +++ b/python/paddle/profiler/__init__.py @@ -0,0 +1,26 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .profiler import ProfilerState, ProfilerTarget +from .profiler import make_scheduler, export_chrome_tracing, export_protobuf +from .profiler import Profiler +from .profiler import TracerEventType +from .utils import RecordEvent, load_profiler_result +from .profiler_statistic import SortedKeys + +__all__ = [ + 'ProfilerState', 'ProfilerTarget', 'TracerEventType', 'make_scheduler', + 'export_chrome_tracing', 'export_protobuf', 'Profiler', 'RecordEvent', + 'load_profiler_result', 'SortedKeys' +] diff --git a/python/paddle/profiler/profiler.py b/python/paddle/profiler/profiler.py new file mode 100644 index 00000000000..dc637bf9830 --- /dev/null +++ b/python/paddle/profiler/profiler.py @@ -0,0 +1,469 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import socket +import datetime +from enum import Enum +from typing import Any, Callable, Iterable, Optional, Union +from warnings import warn + +import paddle +from paddle.fluid.core import (_Profiler, _ProfilerResult, ProfilerOptions, + TracerEventType) + +from .utils import RecordEvent, wrap_optimizers +from .profiler_statistic import SortedKeys + + +class ProfilerState(Enum): + r""" + Profiler state that can be specified to control profiler action. + + CLOSED: The profilers are closed. + READY: The profilers are open, but the data will not be recorded. + This state is used for reducing overhead influence when profilers start. + RECORD: The profilers are open, and the data will be recorded. + RECORD_AND_RETURN: The profilers are open, and at the last batch of current profiler period, + the collected data will be returned. + """ + CLOSED = 0 + READY = 1 + RECORD = 2 + RECORD_AND_RETURN = 3 # the last step of RECORD + + +class ProfilerTarget(Enum): + r""" + Target device for profiling. + """ + CPU = 0 + GPU = 1 + + +def make_scheduler(*, + closed: int, + ready: int, + record: int, + repeat: int=0, + skip_first: int=0) -> Callable: + r""" + Return a scheduler function, which scheduler the state according to the setting. + The state transform confirms to: + + (CLOSED) (CLOSED) (CLOSED) (READY) (RECORD,last RETURN) (CLOSED) + START -> skip_first -> closed -> ready -> record -> END + | | + | | (if has_repeated < repeat) + - - - - - - - - - - - - + Note that repeat <= 0 means the cycle will continue until the profiler exits. + + Parameters: + closed(int): The number of steps in state ProfilerState.CLOSED. + ready(int): The number of steps in state ProfilerState.READY. + record(int): The number of steps in state ProfilerState.RECORD. + repeat(int): The number of cycles to repeat above state transform. + skip_first(int): The number of first steps to drop, not participate in the state transform. + + Returns: + A scheduler function, conforms to above state transform setting. + + Examples: + 1. profiling range [2, 5] + batch 0: closed, batch 1: ready, batch [2, 5] record + .. code-block:: python + make_scheduler(closed=1, ready=1, record=4, repeat=1) + 2. profiling range [3,6], [9,12], [15,18]... + batch 0: skiped, batch 1: closed, batch 2: ready, batch [3,6]: record, repeat + .. code-block:: python + make_scheduler(closed=1, ready=1, record=4, skip_first=1) + """ + + def getScheduleState(step: int) -> ProfilerState: + assert step >= 0 + if step < skip_first: # within skip_first, just skip + return ProfilerState.CLOSED + step = step - skip_first + period_steps = closed + ready + record + has_repeated = step // period_steps + if repeat > 0 and has_repeated >= repeat: # the period has repeated repeat times, return CLOSED state + return ProfilerState.CLOSED + mod_step = step % period_steps + if mod_step < closed: + return ProfilerState.CLOSED + elif mod_step >= closed and mod_step < closed + ready: + return ProfilerState.READY + else: + if mod_step < period_steps - 1: + return ProfilerState.RECORD + else: + return ProfilerState.RECORD_AND_RETURN + assert closed >= 0 and ready >= 0 and record > 0 and \ + repeat >= 0 and skip_first >= 0, "Invalid profiler scheduler arguments" + if ready == 0: + warn("Profiler will record data after enabling profiler immediately, \ + some data collected at the beginning of profiling may be 'noisy' because of overhead." + ) + return getScheduleState + + +def _default_state_scheduler(step: int): + r""" + A default state scheduler, keep recording from the begining of the profiler until ending. + """ + return ProfilerState.RECORD + + +def export_chrome_tracing(dir_name: str, + worker_name: Optional[str]=None) -> Callable: + r""" + Return a callable, used for outputing tracing data to chrome tracing format file. + The output file will be saved in directory 'dir_name', and file name will be set as worker_name. + if worker_name is not set, the default name is [hostname]_[pid]. + + Parameters: + dir_name(str): Directory to save profiling data. + worker_name(Optional[str]): Prefix of the file name saved, default is [hostname]_[pid]. + + Examples: + .. code-block:: python + import paddle.profiler as profiler + with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU, + profiler.ProfilerTarget.GPU], + scheduler = (3, 10), + on_trace_ready = profiler.export_chrome_tracing('./log') + ) as p: + for iter in range(N): + train() + p.step() + """ + if not os.path.exists(dir_name): + try: + os.makedirs(dir_name, exist_ok=True) + except Exception: + raise RuntimeError( + "Can not create directory '{}' for saving profiling results.". + format(dir_name)) + + def handle_fn(prof): + nonlocal worker_name + if not worker_name: + worker_name = "host_{}pid_{}".format(socket.gethostname(), + str(os.getpid())) + now = datetime.datetime.now() + filename = '{}_time_{}.paddle_trace.json'.format( + worker_name, now.strftime('%Y_%m_%d_%H_%M_%S_%f')) + prof.export(os.path.join(dir_name, filename), "json") + + return handle_fn + + +def export_protobuf(dir_name: str, worker_name: Optional[str]=None) -> Callable: + r""" + Return a callable, used for outputing tracing data to protobuf file. + The output file will be saved in directory 'dir_name', and file name will be set as worker_name. + if worker_name is not set, the default name is [hostname]_[pid]. + + Parameters: + dir_name(str): Directory to save profiling data. + worker_name(Optional[str]): Prefix of the file name saved, default is [hostname]_[pid]. + + Examples: + .. code-block:: python + import paddle.profiler as profiler + with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU, + profiler.ProfilerTarget.GPU], + scheduler = (3, 10), + on_trace_ready = profiler.export_protobuf('./log') + ) as p: + for iter in range(N): + train() + p.step() + """ + if not os.path.exists(dir_name): + try: + os.makedirs(dir_name, exist_ok=True) + except Exception: + raise RuntimeError( + "Can not create directory '{}' for saving profiling results.". + format(dir_name)) + + def handle_fn(prof): + nonlocal worker_name + if not worker_name: + worker_name = "host_{}pid_{}".format(socket.gethostname(), + str(os.getpid())) + now = datetime.datetime.now() + filename = '{}_time_{}.paddle_trace.pb'.format( + worker_name, now.strftime('%Y_%m_%d_%H_%M_%S_%f')) + prof.export(os.path.join(dir_name, filename), "pb") + + return handle_fn + + +def _get_supported_targets() -> Iterable[ProfilerTarget]: + r""" + Get the current supported profiler target in the system. + """ + if paddle.device.is_compiled_with_cuda(): + return [ProfilerTarget.CPU, ProfilerTarget.GPU] + return [ProfilerTarget.CPU] + + +class Profiler: + r""" + Profiler context manager, user interface to manage profile process. + + Parameters: + targets (iterable): list of tracing targets, currently supported values: + ``paddle.profiler.ProfilerTarget.CPU``, + ``paddle.profiler.ProfilerTarget.GPU``. + scheduler (callable or tuple): If it is a callable object, it takes a step number as parameter and return the corresponding ``ProfilerState``. + If not provided, the default sheduler will keep tracing until the profiler exits. If it is a tuple, it has two values start_batch and end_batch, + which means profiling range [start_batch, end_batch). + on_trace_ready (callable): callable object, takes the Profiler object as parameter, which provides a way for users to do post-processing. + This callable object will be called when ``sheduler`` returns ``ProfilerState.RECORD_AND_RETURN``. + + Examples: + 1. profiling range [2, 5) + .. code-block:: python + import paddle.profiler as profiler + with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU, + profiler.ProfilerTarget.GPU], + scheduler = (2, 5), + on_trace_ready = profiler.export_chrome_tracing('./log') + ) as p: + for iter in range(N): + train() + p.step() + 2. profiling range [2,4], [7, 9], [11,13] + .. code-block:: python + import paddle.profiler as profiler + with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU, + profiler.ProfilerTarget.GPU], + scheduler = profiler.make_scheduler(closed=1, ready=1, record=3, repeat=3), + on_trace_ready = profiler.export_chrome_tracing('./log') + ) as p: + for iter in range(N): + train() + p.step() + 3. Use profiler without context manager, and use default parameters + .. code-block:: python + import paddle.profiler as profiler + p = profiler.Profiler() + p.start() + for iter in range(N): + train() + p.step() + p.stop() + p.summary() + """ + + def __init__( + self, + *, + targets: Optional[Iterable[ProfilerTarget]]=None, + scheduler: Union[Callable[[int], ProfilerState], tuple, None]=None, + on_trace_ready: Optional[Callable[..., Any]]=None): + supported_targets = _get_supported_targets() + if targets: + self.targets = set(targets) + for target in targets: + if target not in supported_targets: + self.targets.remove(target) + warn("Profiling {} is not supported in current context.". + format(target)) + else: + self.targets = supported_targets + profileoption = ProfilerOptions() + if ProfilerTarget.CPU in self.targets: + profileoption.trace_switch |= 1 + if ProfilerTarget.GPU in self.targets: + profileoption.trace_switch |= (1 << 1) + wrap_optimizers() + self.profiler = _Profiler.create(profileoption) + if callable(scheduler): + self.scheduler = scheduler + elif isinstance(scheduler, (tuple, list)): + assert len(scheduler) == 2 and scheduler[1] > scheduler[0] + start_batch, end_batch = scheduler + start_batch = max(start_batch, 0) + if start_batch >= 1: + self.scheduler = make_scheduler( + closed=max(start_batch - 1, 0), + ready=1, + record=(end_batch - start_batch), + repeat=1) + else: + self.scheduler = make_scheduler( + closed=0, + ready=0, + record=(end_batch - start_batch), + repeat=1) + else: + self.scheduler = _default_state_scheduler + + if on_trace_ready == None: + self.on_trace_ready = export_chrome_tracing('./profiler_log/') + else: + self.on_trace_ready = on_trace_ready + self.step_num = 0 + self.previous_state = ProfilerState.CLOSED + self.current_state = self.scheduler(self.step_num) + self.record_event = None + self.profiler_result = None + + def __enter__(self): + self.start() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.stop() + + def start(self): + r''' + Start profiler and enter the first profiler step(0). + State transformed from CLOSED to self.current_state and trigger corresponding action. + ''' + # CLOSED -> self.current_state + if self.current_state == ProfilerState.READY: + self.profiler.prepare() + elif self.current_state == ProfilerState.RECORD: + self.profiler.prepare() + self.profiler.start() + elif self.current_state == ProfilerState.RECORD_AND_RETURN: + self.profiler.prepare() + self.profiler.start() + self.record_event = RecordEvent( + name="ProfileStep#{}".format(self.step_num), + event_type=TracerEventType.ProfileStep) + self.record_event.begin() + + def stop(self): + r''' + Stop profiler and State transformed from self.current_state to CLOSED. + Trigger corresponding action and post-process profiler result using self.on_trace_ready if result exists. + ''' + # self.current_state -> CLOSED + # In this situation, RECORD state is regarded as RECORD_AND_RETURN + if self.record_event: + self.record_event.end() + self.record_event = None + if self.current_state == ProfilerState.READY: + warn( + "Inproper Profiler state transform: READY->CLOSED, profiler will start and stop without saving data" + ) + self.profiler.start() + self.profiler.stop() + if self.current_state == ProfilerState.RECORD or self.current_state == ProfilerState.RECORD_AND_RETURN: + self.profiler_result = self.profiler.stop() + if self.on_trace_ready: + self.on_trace_ready(self) + + def step(self): + r""" + Signals the profiler that the next profiling step has started. + Get the new ProfilerState and trigger corresponding action. + """ + if self.record_event: + self.record_event.end() + self.record_event = None + self.previous_state = self.current_state + self.step_num += 1 + self.current_state = self.scheduler(self.step_num) + self._trigger_action() + self.record_event = RecordEvent( + name="ProfileStep#{}".format(self.step_num), + event_type=TracerEventType.ProfileStep) + self.record_event.begin() + + def _trigger_action(self): + if self.previous_state == ProfilerState.CLOSED: + if self.current_state == ProfilerState.READY: # CLOSED -> READY + self.profiler.prepare() + if self.current_state == ProfilerState.RECORD: # CLOSED -> RECORD + self.profiler.prepare() + self.profiler.start() + if self.current_state == ProfilerState.RECORD_AND_RETURN: # CLOSED -> RECORD_AND_RETURN + self.profiler.prepare() + self.profiler.start() + + elif self.previous_state == ProfilerState.READY: + if self.current_state == ProfilerState.CLOSED: # READY -> CLOSED + warn( + "Improper schedule: READY->CLOSED, profiler will start and stop without saving data" + ) + self.profiler.start() + self.profiler.stop() + if self.current_state == ProfilerState.RECORD: # READY -> RECORD + self.profiler.start() + if self.current_state == ProfilerState.RECORD_AND_RETURN: # READY -> RECORD_AND_RETURN + self.profiler.start() + + elif self.previous_state == ProfilerState.RECORD: + if self.current_state == ProfilerState.CLOSED: # RECORD -> CLOSED + warn( + "Improper schedule: RECORD->CLOSED, profiler will not saving data" + ) + self.profiler.stop() + + if self.current_state == ProfilerState.READY: # RECORD -> READY + warn( + "Improper schedule: RECORD->READY, profiler will stop and re-prepare" + ) + self.profiler.stop() + self.profiler.prepare() + if self.current_state == ProfilerState.RECORD_AND_RETURN: # RECORD -> RECORD_AND_RETURN + pass + + else: + assert self.previous_state == ProfilerState.RECORD_AND_RETURN + if self.current_state == ProfilerState.CLOSED: # RECORD_AND_RETURN -> CLOSED + self.profiler_result = self.profiler.stop() + if self.current_state == ProfilerState.READY: # RECORD_AND_RETURN -> READY + self.profiler_result = self.profiler.stop() + self.profiler.prepare() + if self.current_state == ProfilerState.RECORD: # RECORD_AND_RETURN -> RECORD + self.profiler_result = self.profiler.stop() + self.profiler.prepare() + self.profiler.start() + if self.current_state == ProfilerState.RECORD_AND_RETURN: # RECORD_AND_RETURN -> RECORD_AND_RETURN + self.profiler_result = self.profiler.stop() + self.profiler.prepare() + self.profiler.start() + if self.on_trace_ready: + self.on_trace_ready(self) + + def export(self, path="", format="json"): + r""" + Exports the tracing data in Chrome tracing data format. + """ + if self.profiler_result: + self.profiler_result.save(path, format) + + def summary(self, + sorted_by=SortedKeys.CPUTotal, + op_detail=True, + thread_sep=False, + time_unit='ms'): + r""" + Print the Summary table. + + Parameters: + sorted_by: how to rank the op table items. + detail: expand each operator detail information. + thread_sep: print op table each thread. + time_unit: can be chosen form ['s', 'ms', 'us', 'ns'] + """ + pass diff --git a/python/paddle/profiler/profiler_statistic.py b/python/paddle/profiler/profiler_statistic.py new file mode 100644 index 00000000000..29d586268a0 --- /dev/null +++ b/python/paddle/profiler/profiler_statistic.py @@ -0,0 +1,31 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import collections +from enum import Enum + +from paddle.fluid.core import TracerEventType + + +class SortedKeys(Enum): + r""" + Sorted keys for printing summary table. + """ + CPUTotal = 0 + CPUAvg = 1 + CPUMax = 2 + CPUMin = 3 + GPUTotal = 4 + GPUAvg = 5 + GPUMax = 6 + GPUMin = 7 diff --git a/python/paddle/profiler/utils.py b/python/paddle/profiler/utils.py new file mode 100644 index 00000000000..642001dfbfc --- /dev/null +++ b/python/paddle/profiler/utils.py @@ -0,0 +1,90 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.fluid.core import (_RecordEvent, TracerEventType, + load_profiler_result) +from typing import Any +from warnings import warn +import functools +from contextlib import ContextDecorator + +_AllowedEventTypeList = [ + TracerEventType.Dataloader, TracerEventType.ProfileStep, + TracerEventType.UserDefined, TracerEventType.Forward, + TracerEventType.Backward, TracerEventType.Optimization, + TracerEventType.PythonOp, TracerEventType.PythonUserDefined +] + + +class RecordEvent(ContextDecorator): + r""" + Interface for recording a time range. + + Parameters: + name(str): Name of the record event + event_type(TracerEventType): Type of the record event, can be used for statistics. + + Examples: + .. code-block:: python + import paddle.profiler as profiler + with profiler.RecordEvent(name='op1', event_type=TracerEventType=TracerEventType.UserDefined): + op1() + """ + + def __init__(self, + name: str, + event_type: TracerEventType=TracerEventType.UserDefined): + self.name = name + self.event_type = event_type + self.event = None + + def __enter__(self): + self.begin() + return self + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any): + self.end() + + def begin(self): + if self.event_type not in _AllowedEventTypeList: + warn("Only TracerEvent Type in [{}, {}, {}, {}, {}, {},{}]\ + can be recorded.".format(*_AllowedEventTypeList)) + self.event = None + else: + if self.event_type == TracerEventType.UserDefined: + self.event_type == TracerEventType.PythonUserDefined + self.event = _RecordEvent(self.name, self.event_type) + + def end(self): + if self.event: + self.event.end() + + +def wrap_optimizers(): + def optimizer_warpper(func): + @functools.wraps(func) + def warpper(*args, **kwargs): + with RecordEvent( + 'Optimization Step', + event_type=TracerEventType.Optimization): + return func(*args, **kwargs) + + return warpper + + import paddle.optimizer as optimizer + for classname in optimizer.__all__: + if classname != 'Optimizer': + classobject = getattr(optimizer, classname) + if getattr(classobject, 'step', None) != None: + classobject.step = optimizer_warpper(classobject.step) diff --git a/python/setup.py.in b/python/setup.py.in index 0bc32cfbc00..118f617361f 100755 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -372,6 +372,7 @@ packages=['paddle', 'paddle.device', 'paddle.device.cuda', 'paddle.version', + 'paddle.profiler' ] with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f: -- GitLab