profiler.py 25.1 KB
Newer Older
C
chenjian 已提交
1
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
C
chenjian 已提交
2
#
C
chenjian 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
C
chenjian 已提交
6
#
C
chenjian 已提交
7
#     http://www.apache.org/licenses/LICENSE-2.0
C
chenjian 已提交
8
#
C
chenjian 已提交
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import socket
import datetime
from enum import Enum
from typing import Any, Callable, Iterable, Optional, Union
from warnings import warn

import paddle
from paddle.fluid.core import (_Profiler, _ProfilerResult, ProfilerOptions,
                               TracerEventType)

from .utils import RecordEvent, wrap_optimizers
C
chenjian 已提交
27
from .profiler_statistic import StatisticData, _build_table, SortedKeys
C
chenjian 已提交
28 29 30 31


class ProfilerState(Enum):
    r"""
C
chenjian 已提交
32
    ProfilerState is used to present the state of :ref:`Profiler <api_paddle_profiler_Profiler>` .
C
chenjian 已提交
33

C
chenjian 已提交
34
    The meaning of each ProfilerState is as following
C
chenjian 已提交
35

C
chenjian 已提交
36
    - **ProfilerState.CLOSED** : The profiler is closed, and no profiling data will be recorded.
C
chenjian 已提交
37

C
chenjian 已提交
38
    - **ProfilerState.READY** : The profiler is open, but the data will not be recorded. This state is used for reducing overhead influence when profiler starts.
C
chenjian 已提交
39

C
chenjian 已提交
40 41 42
    - **ProfilerState.RECORD** : The profiler is open, and the data will be recorded.

    - **ProfilerState.RECORD_AND_RETURN** : The profiler is open, and this state stands for the last batch of "RECORD" state in current profiling period. The collected data will be returned in this state.
C
chenjian 已提交
43 44 45 46
    """
    CLOSED = 0
    READY = 1
    RECORD = 2
C
chenjian 已提交
47
    RECORD_AND_RETURN = 3  # the last step of RECORD
C
chenjian 已提交
48 49 50 51


class ProfilerTarget(Enum):
    r"""
C
chenjian 已提交
52
    ProfilerTarget is used to specify target device for :ref:`profiling <api_paddle_profiler_Profiler>` . Only CPU and GPU are supported currently.
C
chenjian 已提交
53

C
chenjian 已提交
54 55 56 57 58
    The meaning of each ProfilerState is as following

    - **ProfilerTarget.CPU** : Profile events on CPU.

    - **ProfilerTarget.GPU** : Profile events on GPU.
C
chenjian 已提交
59 60 61 62 63 64 65 66 67 68 69 70
    """
    CPU = 0
    GPU = 1


def make_scheduler(*,
                   closed: int,
                   ready: int,
                   record: int,
                   repeat: int=0,
                   skip_first: int=0) -> Callable:
    r"""
C
chenjian 已提交
71
    Return a scheduler function, which scheduler the :ref:`state <api_paddle_profiler_ProfilerState>` according to the setting.
C
chenjian 已提交
72 73
    The state transform confirms to:

C
chenjian 已提交
74 75 76 77 78 79 80 81
    .. code-block:: text

        (CLOSED)  (CLOSED)    (CLOSED)  (READY)    (RECORD,last RETURN)      (CLOSED)
        START -> skip_first -> closed -> ready    ->    record       ->      END
                                |                        |
                                |                        | (if has_repeated < repeat)
                                - - - - - - - - - - - -
        Note that repeat <= 0 means the cycle will continue until the profiler exits.
C
chenjian 已提交
82

C
chenjian 已提交
83
    Args:
C
chenjian 已提交
84
        closed(int): The number of steps in state ProfilerState.CLOSED.
C
chenjian 已提交
85
        ready(int):  The number of steps in state ProfilerState.READY.
C
chenjian 已提交
86 87 88
        record(int): The number of steps in state ProfilerState.RECORD, and the state in last step will be set as ProfilerState.RECORD_AND_RETURN.
        repeat(int, optional): The number of cycles to repeat above state transform. Default value is 0, which means it will repeat this cycle until profiler exits.
        skip_first(int, optional): The number of first steps to drop, not participate in the state transform, and at ProfilerState.CLOSED state. Default value is 0.
C
chenjian 已提交
89 90

    Returns:
C
chenjian 已提交
91
        A scheduler function, conforms to above state transform setting. The function will takes one parameter step_num, and returns corresponding ProfilerState.
C
chenjian 已提交
92 93 94

    Examples:
        1. profiling range [2, 5]
C
chenjian 已提交
95

C
chenjian 已提交
96
        Assume batch 0: closed, batch 1: ready, batch [2, 5] record
C
chenjian 已提交
97 98

            .. code-block:: python
C
chenjian 已提交
99
                :name: code-example1
C
chenjian 已提交
100 101 102 103 104

                import paddle.profiler as profiler
                profiler.make_scheduler(closed=1, ready=1, record=4, repeat=1)


C
chenjian 已提交
105
        2. profiling range [3,6], [9,12], [15,18]...
C
chenjian 已提交
106

C
chenjian 已提交
107
        Assume batch 0: skiped, batch 1: closed, batch 2: ready, batch [3,6]: record, repeat
C
chenjian 已提交
108 109

            .. code-block:: python
C
chenjian 已提交
110
                :name: code-example2
C
chenjian 已提交
111 112 113

                import paddle.profiler as profiler
                profiler.make_scheduler(closed=1, ready=1, record=4, skip_first=1)
C
chenjian 已提交
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
    """

    def getScheduleState(step: int) -> ProfilerState:
        assert step >= 0
        if step < skip_first:  # within skip_first, just skip
            return ProfilerState.CLOSED
        step = step - skip_first
        period_steps = closed + ready + record
        has_repeated = step // period_steps
        if repeat > 0 and has_repeated >= repeat:  # the period has repeated repeat times, return CLOSED state
            return ProfilerState.CLOSED
        mod_step = step % period_steps
        if mod_step < closed:
            return ProfilerState.CLOSED
        elif mod_step >= closed and mod_step < closed + ready:
            return ProfilerState.READY
        else:
            if mod_step < period_steps - 1:
                return ProfilerState.RECORD
            else:
                return ProfilerState.RECORD_AND_RETURN
    assert closed >= 0 and ready >= 0 and record > 0 and \
             repeat >= 0 and skip_first >= 0, "Invalid profiler scheduler arguments"
    if ready == 0:
        warn("Profiler will record data after enabling profiler immediately, \
          some data collected at the beginning of profiling may be 'noisy' because of overhead."
             )
    return getScheduleState


def _default_state_scheduler(step: int):
    r"""
    A default state scheduler, keep recording from the begining of the profiler until ending.
    """
    return ProfilerState.RECORD


def export_chrome_tracing(dir_name: str,
                          worker_name: Optional[str]=None) -> Callable:
    r"""
    Return a callable, used for outputing tracing data to chrome tracing format file.
C
chenjian 已提交
155
    The output file will be saved in directory ``dir_name``, and file name will be set as worker_name.
C
chenjian 已提交
156 157
    if worker_name is not set, the default name is [hostname]_[pid].

C
chenjian 已提交
158
    Args:
C
chenjian 已提交
159
        dir_name(str): Directory to save profiling data.
C
chenjian 已提交
160 161 162 163
        worker_name(str, optional): Prefix of the file name saved, default is [hostname]_[pid].
    
    Returns:
        A callable, which takes a Profiler object as parameter and calls its export method to save data to chrome tracing format file.
C
chenjian 已提交
164 165

    Examples:
C
chenjian 已提交
166 167
        The return value can be used as parameter ``on_trace_ready`` in :ref:`Profiler <api_paddle_profiler_Profiler>` .

C
chenjian 已提交
168
        .. code-block:: python
C
chenjian 已提交
169
            :name: code-example1
C
chenjian 已提交
170 171 172 173 174 175 176 177 178 179

            # required: gpu
            import paddle.profiler as profiler
            with profiler.Profiler(
                    targets=[profiler.ProfilerTarget.CPU, profiler.ProfilerTarget.GPU],
                    scheduler = (3, 10),
                    on_trace_ready=profiler.export_protobuf('./log')) as p:
                for iter in range(10):
                    #train()
                    p.step()
C
chenjian 已提交
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
    """
    if not os.path.exists(dir_name):
        try:
            os.makedirs(dir_name, exist_ok=True)
        except Exception:
            raise RuntimeError(
                "Can not create directory '{}' for saving profiling results.".
                format(dir_name))

    def handle_fn(prof):
        nonlocal worker_name
        if not worker_name:
            worker_name = "host_{}pid_{}".format(socket.gethostname(),
                                                 str(os.getpid()))
        now = datetime.datetime.now()
        filename = '{}_time_{}.paddle_trace.json'.format(
            worker_name, now.strftime('%Y_%m_%d_%H_%M_%S_%f'))
        prof.export(os.path.join(dir_name, filename), "json")

    return handle_fn


def export_protobuf(dir_name: str, worker_name: Optional[str]=None) -> Callable:
    r"""
    Return a callable, used for outputing tracing data to protobuf file.
C
chenjian 已提交
205
    The output file will be saved in directory ``dir_name``, and file name will be set as worker_name.
C
chenjian 已提交
206 207
    if worker_name is not set, the default name is [hostname]_[pid].

C
chenjian 已提交
208
    Args:
C
chenjian 已提交
209
        dir_name(str): Directory to save profiling data.
C
chenjian 已提交
210 211 212 213
        worker_name(str, optional): Prefix of the file name saved, default is [hostname]_[pid].

    Returns:
        A callable, which takes a Profiler object as parameter and calls its export method to save data to protobuf file.
C
chenjian 已提交
214 215

    Examples:
C
chenjian 已提交
216 217
        The return value can be used as parameter ``on_trace_ready`` in :ref:`Profiler <api_paddle_profiler_Profiler>` .

C
chenjian 已提交
218
        .. code-block:: python
C
chenjian 已提交
219
            :name: code-example1
C
chenjian 已提交
220 221 222 223 224 225 226 227 228 229

            # required: gpu
            import paddle.profiler as profiler
            with profiler.Profiler(
                    targets=[profiler.ProfilerTarget.CPU, profiler.ProfilerTarget.GPU],
                    scheduler = (3, 10),
                    on_trace_ready = profiler.export_protobuf('./log')) as p:
                for iter in range(10):
                    #train()
                    p.step()
C
chenjian 已提交
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
    """
    if not os.path.exists(dir_name):
        try:
            os.makedirs(dir_name, exist_ok=True)
        except Exception:
            raise RuntimeError(
                "Can not create directory '{}' for saving profiling results.".
                format(dir_name))

    def handle_fn(prof):
        nonlocal worker_name
        if not worker_name:
            worker_name = "host_{}pid_{}".format(socket.gethostname(),
                                                 str(os.getpid()))
        now = datetime.datetime.now()
        filename = '{}_time_{}.paddle_trace.pb'.format(
            worker_name, now.strftime('%Y_%m_%d_%H_%M_%S_%f'))
        prof.export(os.path.join(dir_name, filename), "pb")

    return handle_fn


def _get_supported_targets() -> Iterable[ProfilerTarget]:
    r"""
    Get the current supported profiler target in the system.
    """
C
chenjian 已提交
256
    if _Profiler.is_cupti_supported():
C
chenjian 已提交
257 258 259 260 261 262
        return [ProfilerTarget.CPU, ProfilerTarget.GPU]
    return [ProfilerTarget.CPU]


class Profiler:
    r"""
C
chenjian 已提交
263
    Profiler context manager, user interface to manage profiling process to start, stop, export profiling data and print summary table.
C
chenjian 已提交
264

C
chenjian 已提交
265 266 267 268
    Args:
        targets (list, optional): specify target devices to profile, and all existing and supported devices will be chosen by default. Currently supported values, :ref:`ProfilerTarget.CPU <api_paddle_profiler_ProfilerTarget>` and :ref:`ProfilerTarget.GPU <api_paddle_profiler_ProfilerTarget>` .
        scheduler (Callable|tuple, optional): If it is a callable object, it takes a step number as parameter and return the corresponding :ref:`ProfilerState <api_paddle_profiler_ProfilerState>`. This callable object can be generated by :ref:`make_scheduler <api_paddle_profiler_make_scheduler>` function.
            If not provided (None), the default scheduler will keep tracing until the profiler exits. If it is a tuple, it has two values start_batch and end_batch,
C
chenjian 已提交
269
            which means profiling range [start_batch, end_batch).
C
chenjian 已提交
270 271
        on_trace_ready (Callable, optional): Callable object, serves as callback function, and takes the Profiler object as parameter, which provides a way for users to do post-processing.
            This callable object will be called when ``scheduler`` returns ``ProfilerState.RECORD_AND_RETURN``. The default value is :ref:`export_chrome_tracing <api_paddle_profiler_export_chrome_tracing>` (./profiler_log/).
C
chenjian 已提交
272

C
chenjian 已提交
273
    Examples:
C
chenjian 已提交
274
        1. profiling range [2, 5).
C
chenjian 已提交
275 276

            .. code-block:: python
C
chenjian 已提交
277
                :name: code-example1
C
chenjian 已提交
278 279 280 281 282 283 284 285 286 287 288

                # required: gpu
                import paddle.profiler as profiler
                with profiler.Profiler(
                        targets=[profiler.ProfilerTarget.CPU, profiler.ProfilerTarget.GPU],
                        scheduler = (2, 5),
                        on_trace_ready = profiler.export_chrome_tracing('./log')) as p:
                    for iter in range(10):
                        #train()
                        p.step()

C
chenjian 已提交
289
        2. profiling range [2,4], [7, 9], [11,13]
C
chenjian 已提交
290 291

            .. code-block:: python
C
chenjian 已提交
292
                :name: code-example2
C
chenjian 已提交
293 294 295 296 297 298 299 300 301 302 303

                # required: gpu
                import paddle.profiler as profiler
                with profiler.Profiler(
                        targets=[profiler.ProfilerTarget.CPU, profiler.ProfilerTarget.GPU],
                        scheduler = profiler.make_scheduler(closed=1, ready=1, record=3, repeat=3),
                        on_trace_ready = profiler.export_chrome_tracing('./log')) as p:
                    for iter in range(10):
                        #train()
                        p.step()

C
chenjian 已提交
304
        3. Use profiler without context manager, and use default parameters
C
chenjian 已提交
305 306

            .. code-block:: python
C
chenjian 已提交
307
                :name: code-example3
C
chenjian 已提交
308 309 310 311 312 313 314 315 316 317 318

                # required: gpu
                import paddle.profiler as profiler
                p = profiler.Profiler()
                p.start()
                for iter in range(10):
                    #train()
                    p.step()
                p.stop()
                p.summary()

C
chenjian 已提交
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
    """

    def __init__(
            self,
            *,
            targets: Optional[Iterable[ProfilerTarget]]=None,
            scheduler: Union[Callable[[int], ProfilerState], tuple, None]=None,
            on_trace_ready: Optional[Callable[..., Any]]=None):
        supported_targets = _get_supported_targets()
        if targets:
            self.targets = set(targets)
            for target in targets:
                if target not in supported_targets:
                    self.targets.remove(target)
                    warn("Profiling {} is not supported in current context.".
                         format(target))
        else:
            self.targets = supported_targets
        profileoption = ProfilerOptions()
        if ProfilerTarget.CPU in self.targets:
            profileoption.trace_switch |= 1
        if ProfilerTarget.GPU in self.targets:
            profileoption.trace_switch |= (1 << 1)
        wrap_optimizers()
        self.profiler = _Profiler.create(profileoption)
        if callable(scheduler):
            self.scheduler = scheduler
        elif isinstance(scheduler, (tuple, list)):
            assert len(scheduler) == 2 and scheduler[1] > scheduler[0]
            start_batch, end_batch = scheduler
            start_batch = max(start_batch, 0)
            if start_batch >= 1:
                self.scheduler = make_scheduler(
                    closed=max(start_batch - 1, 0),
                    ready=1,
                    record=(end_batch - start_batch),
                    repeat=1)
            else:
                self.scheduler = make_scheduler(
                    closed=0,
                    ready=0,
                    record=(end_batch - start_batch),
                    repeat=1)
        else:
            self.scheduler = _default_state_scheduler

        if on_trace_ready == None:
            self.on_trace_ready = export_chrome_tracing('./profiler_log/')
        else:
            self.on_trace_ready = on_trace_ready
        self.step_num = 0
        self.previous_state = ProfilerState.CLOSED
        self.current_state = self.scheduler(self.step_num)
        self.record_event = None
        self.profiler_result = None

    def __enter__(self):
        self.start()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.stop()

    def start(self):
        r'''
        Start profiler and enter the first profiler step(0).
C
chenjian 已提交
385 386 387 388
        State transformed from CLOSED to self.current_state and trigger corresponding action.

        Examples:
            .. code-block:: python
C
chenjian 已提交
389
                :name: code-example4
C
chenjian 已提交
390 391 392 393 394 395 396 397 398 399 400 401

                # required: gpu
                import paddle.profiler as profiler
                prof = profiler.Profiler(
                    targets=[profiler.ProfilerTarget.CPU, profiler.ProfilerTarget.GPU],
                    scheduler = (1, 9),
                    on_trace_ready = profiler.export_chrome_tracing('./log'))
                prof.start()
                for iter in range(10):
                    #train()
                    prof.step()
                prof.stop()
C
chenjian 已提交
402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
        '''
        # CLOSED -> self.current_state
        if self.current_state == ProfilerState.READY:
            self.profiler.prepare()
        elif self.current_state == ProfilerState.RECORD:
            self.profiler.prepare()
            self.profiler.start()
        elif self.current_state == ProfilerState.RECORD_AND_RETURN:
            self.profiler.prepare()
            self.profiler.start()
        self.record_event = RecordEvent(
            name="ProfileStep#{}".format(self.step_num),
            event_type=TracerEventType.ProfileStep)
        self.record_event.begin()

    def stop(self):
        r'''
        Stop profiler and State transformed from self.current_state to CLOSED.
        Trigger corresponding action and post-process profiler result using self.on_trace_ready if result exists.
C
chenjian 已提交
421 422 423

        Examples:
            .. code-block:: python
C
chenjian 已提交
424
                :name: code-example5
C
chenjian 已提交
425 426 427 428 429 430 431 432 433 434 435 436

                # required: gpu
                import paddle.profiler as profiler
                prof = profiler.Profiler(
                    targets=[profiler.ProfilerTarget.CPU, profiler.ProfilerTarget.GPU],
                    scheduler = (1, 7),
                    on_trace_ready = profiler.export_chrome_tracing('./log'))
                prof.start()
                for iter in range(10):
                    #train()
                    prof.step()
                prof.stop()
C
chenjian 已提交
437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
        '''
        # self.current_state -> CLOSED
        # In this situation, RECORD state is regarded as RECORD_AND_RETURN
        if self.record_event:
            self.record_event.end()
            self.record_event = None
        if self.current_state == ProfilerState.READY:
            warn(
                "Inproper Profiler state transform: READY->CLOSED, profiler will start and stop without saving data"
            )
            self.profiler.start()
            self.profiler.stop()
        if self.current_state == ProfilerState.RECORD or self.current_state == ProfilerState.RECORD_AND_RETURN:
            self.profiler_result = self.profiler.stop()
            if self.on_trace_ready:
                self.on_trace_ready(self)

    def step(self):
        r"""
        Signals the profiler that the next profiling step has started.
        Get the new ProfilerState and trigger corresponding action.
C
chenjian 已提交
458 459 460

        Examples:
            .. code-block:: python
C
chenjian 已提交
461
                :name: code-example6
C
chenjian 已提交
462 463 464 465 466 467 468 469 470 471 472 473 474

                # required: gpu
                import paddle.profiler as profiler
                prof = profiler.Profiler(
                    targets=[profiler.ProfilerTarget.CPU, profiler.ProfilerTarget.GPU],
                    scheduler = (3, 7),
                    on_trace_ready = profiler.export_chrome_tracing('./log'))

                prof.start()
                for iter in range(10):
                    #train()
                    prof.step()
                prof.stop()
C
chenjian 已提交
475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546
        """
        if self.record_event:
            self.record_event.end()
            self.record_event = None
        self.previous_state = self.current_state
        self.step_num += 1
        self.current_state = self.scheduler(self.step_num)
        self._trigger_action()
        self.record_event = RecordEvent(
            name="ProfileStep#{}".format(self.step_num),
            event_type=TracerEventType.ProfileStep)
        self.record_event.begin()

    def _trigger_action(self):
        if self.previous_state == ProfilerState.CLOSED:
            if self.current_state == ProfilerState.READY:  # CLOSED -> READY
                self.profiler.prepare()
            if self.current_state == ProfilerState.RECORD:  # CLOSED -> RECORD
                self.profiler.prepare()
                self.profiler.start()
            if self.current_state == ProfilerState.RECORD_AND_RETURN:  # CLOSED -> RECORD_AND_RETURN
                self.profiler.prepare()
                self.profiler.start()

        elif self.previous_state == ProfilerState.READY:
            if self.current_state == ProfilerState.CLOSED:  # READY -> CLOSED
                warn(
                    "Improper schedule: READY->CLOSED, profiler will start and stop without saving data"
                )
                self.profiler.start()
                self.profiler.stop()
            if self.current_state == ProfilerState.RECORD:  # READY -> RECORD
                self.profiler.start()
            if self.current_state == ProfilerState.RECORD_AND_RETURN:  # READY -> RECORD_AND_RETURN
                self.profiler.start()

        elif self.previous_state == ProfilerState.RECORD:
            if self.current_state == ProfilerState.CLOSED:  # RECORD -> CLOSED
                warn(
                    "Improper schedule: RECORD->CLOSED, profiler will not saving data"
                )
                self.profiler.stop()

            if self.current_state == ProfilerState.READY:  # RECORD -> READY
                warn(
                    "Improper schedule: RECORD->READY, profiler will stop and re-prepare"
                )
                self.profiler.stop()
                self.profiler.prepare()
            if self.current_state == ProfilerState.RECORD_AND_RETURN:  # RECORD -> RECORD_AND_RETURN
                pass

        else:
            assert self.previous_state == ProfilerState.RECORD_AND_RETURN
            if self.current_state == ProfilerState.CLOSED:  # RECORD_AND_RETURN -> CLOSED
                self.profiler_result = self.profiler.stop()
            if self.current_state == ProfilerState.READY:  # RECORD_AND_RETURN -> READY
                self.profiler_result = self.profiler.stop()
                self.profiler.prepare()
            if self.current_state == ProfilerState.RECORD:  # RECORD_AND_RETURN -> RECORD
                self.profiler_result = self.profiler.stop()
                self.profiler.prepare()
                self.profiler.start()
            if self.current_state == ProfilerState.RECORD_AND_RETURN:  # RECORD_AND_RETURN -> RECORD_AND_RETURN
                self.profiler_result = self.profiler.stop()
                self.profiler.prepare()
                self.profiler.start()
            if self.on_trace_ready:
                self.on_trace_ready(self)

    def export(self, path="", format="json"):
        r"""
C
chenjian 已提交
547 548 549 550 551 552
        Exports the tracing data to file.

        Args:
            path(str): file path of the output.
            format(str, optional): output format, can be chosen from ['json', 'pb], 'json' for chrome tracing and 'pb' for protobuf, default value is "json".

C
chenjian 已提交
553 554 555

        Examples:
            .. code-block:: python
C
chenjian 已提交
556
                :name: code-example7
C
chenjian 已提交
557 558 559 560 561 562 563 564 565 566 567 568

                # required: gpu
                import paddle.profiler as profiler
                prof = profiler.Profiler(
                    targets=[profiler.ProfilerTarget.CPU, profiler.ProfilerTarget.GPU],
                    scheduler = (3, 7))
                prof.start()
                for iter in range(10):
                    #train()
                    prof.step()
                prof.stop()
                prof.export(path="./profiler_data.json", format="json")
C
chenjian 已提交
569 570 571 572 573 574 575 576 577 578
        """
        if self.profiler_result:
            self.profiler_result.save(path, format)

    def summary(self,
                sorted_by=SortedKeys.CPUTotal,
                op_detail=True,
                thread_sep=False,
                time_unit='ms'):
        r"""
C
chenjian 已提交
579
        Print the Summary table. Currently support overview, model, distributed, operator, memory manipulation and userdefined summary.
C
chenjian 已提交
580

C
chenjian 已提交
581 582 583 584 585
        Args:
            sorted_by( :ref:`SortedKeys <api_paddle_profiler_SortedKeys>` , optional): how to rank the op table items, default value is SortedKeys.CPUTotal.
            op_detail(bool, optional): expand each operator detail information, default value is True.
            thread_sep(bool, optional): print op table each thread, default value is False.
            time_unit(str, optional): time unit for display, can be chosen form ['s', 'ms', 'us', 'ns'], default value is 'ms'.
C
chenjian 已提交
586 587 588

        Examples:
            .. code-block:: python
C
chenjian 已提交
589
                :name: code-example8
C
chenjian 已提交
590 591 592 593 594 595 596 597 598 599 600 601 602

                # required: gpu
                import paddle.profiler as profiler
                prof = profiler.Profiler(
                    targets=[profiler.ProfilerTarget.CPU, profiler.ProfilerTarget.GPU],
                    scheduler = (3, 7),
                    on_trace_ready = profiler.export_chrome_tracing('./log'))
                prof.start()
                for iter in range(10):
                    #train()
                    prof.step()
                prof.stop()
                prof.summary(sorted_by=profiler.SortedKeys.CPUTotal, op_detail=True, thread_sep=False, time_unit='ms')
C
chenjian 已提交
603
        """
C
chenjian 已提交
604 605 606 607 608 609 610 611 612 613 614
        if self.profiler_result:
            statistic_data = StatisticData(
                self.profiler_result.get_data(),
                self.profiler_result.get_extra_info())
            print(
                _build_table(
                    statistic_data,
                    sorted_by=sorted_by,
                    op_detail=op_detail,
                    thread_sep=thread_sep,
                    time_unit=time_unit))