未验证 提交 0b89cb1d 编写于 作者: J JYChen 提交者: GitHub

remove legacy profiler (#52624)

* remove legacy profiler

* rm test_parallel_executor_profiler
上级 bc9956cc
......@@ -58,7 +58,8 @@ class CostModel:
exe = paddle.static.Executor(place)
exe.run(startup_program)
paddle.fluid.profiler.start_profiler("All")
p = paddle.profiler.Profiler()
p.start()
exe.run(main_program, feed={"X": x}, fetch_list=[])
cost_model = core.CostModel()
......
......@@ -75,7 +75,7 @@ from .core import (
CustomPlace,
)
from .lod_tensor import create_lod_tensor, create_random_int_lodtensor
from . import profiler
from . import unique_name
from . import compiler
from .compiler import *
......@@ -130,7 +130,6 @@ __all__ = (
'ParamAttr',
'WeightNormParamAttr',
'DataFeeder',
'profiler',
'unique_name',
'Scope',
'_cuda_synchronize',
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import core
from .wrapped_decorator import signature_safe_contextmanager
import os
import sys
from paddle.utils.deprecated import deprecated
__all__ = [
'cuda_profiler',
'reset_profiler',
'profiler',
'start_profiler',
'stop_profiler',
]
NVPROF_CONFIG = [
"gpustarttimestamp",
"gpuendtimestamp",
"gridsize3d",
"threadblocksize",
"streamid",
"enableonstart 0",
"conckerneltrace",
]
@deprecated(
since="2.3.0",
update_to="paddle.profiler.Profiler",
level=1,
reason="Please use new profiler tool, this profiler tool is no longer maintained.",
)
@signature_safe_contextmanager
def cuda_profiler(output_file, output_mode=None, config=None):
"""
API cuda_profiler has been abandoned. If you have relevant requirements, you can use `paddle.utils.profiler.start_profiler` and `paddle.utils.profiler.stop_profiler`.
The relevant reference documents are as follows:
<https://www.paddlepaddle.org.cn/documentation/docs/en/api/paddle/utils/profiler/start_profiler_en.html#start-profiler>
<https://www.paddlepaddle.org.cn/documentation/docs/en/api/paddle/utils/profiler/stop_profiler_en.html#stop-profiler>
<https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/performance_improving/analysis_tools/timeline_en.html>
"""
raise RuntimeError(
"API cuda_profiler has been abandoned. If you have relevant requirements, you can use `paddle.utils.profiler.start_profiler` and `paddle.utils.profiler.stop_profiler`.\nThe relevant reference documents are as follows:\n<https://www.paddlepaddle.org.cn/documentation/docs/en/api/paddle/utils/profiler/start_profiler_en.html#start-profiler>\n<https://www.paddlepaddle.org.cn/documentation/docs/en/api/paddle/utils/profiler/stop_profiler_en.html#stop-profiler>\n<https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/performance_improving/analysis_tools/timeline_en.html>"
)
@signature_safe_contextmanager
def npu_profiler(output_file, config=None):
"""
The NPU profiler.
This fuctions is used to profile NPU program by NPU runtime application
programming interface. The profiling result will be written into
`output_file`. The users can set set the NPU profiling config by `config` argument.
After getting the profiling result file, users can use
`tools provided by Ascend <https://support.huaweicloud.com/tg-Inference-cann/atlasprofiling_16_0006.html>`_
to load this output file to visualize results.
Args:
output_file (str) : The output file name, the result will be
written into this file. It should be absolute path.
config (list<str>, optional) : NPU profile config. For more details, please
refer to `User Guide <https://support.huaweicloud.com/tg-Inference-cann/atlasprofiling_16_0006.html>`_ .
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
import numpy as np
import paddle
epoc = 8
dshape = [4, 3, 28, 28]
data = paddle.static.data(name='data', shape=[None, 3, 28, 28], dtype='float32')
conv = paddle.static.nn.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
place = fluid.NPUPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
output_file = 'npu.txt'
with profiler.npu_profiler(output_file) as npu_prof:
for i in range(epoc):
input = np.random.random(dshape).astype('float32')
exe.run(fluid.default_main_program(), feed={'data': input})
# then use NPU profiler tools to load this output file
# to visualize results.
"""
# TODO: support config in python.
if not config:
config = core.npu_prof_create_config()
core.npu_prof_init(output_file)
# Enables profiler collection by the active NPU profiling tool.
core.npu_prof_start(config)
try:
yield
# Disables profiler collection.
finally:
core.npu_prof_stop(config)
core.npu_prof_finalize()
@deprecated(
since="2.3.0",
update_to="paddle.profiler.Profiler",
level=1,
reason="Please use new profiler tool, this profiler tool is no longer maintained.",
)
def reset_profiler():
"""
Clear the previous time record. It works for
`fluid.profiler.start_profiler`, `fluid.profiler.stop_profiler`,
and `fluid.profiler.profiler`.
Examples:
.. code-block:: python
# required: gpu
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
with profiler.profiler('CPU', 'total', '/tmp/profile'):
for iter in range(10):
if iter == 2:
profiler.reset_profiler()
# ...
"""
core.reset_profiler()
@deprecated(
since="2.3.0",
update_to="paddle.profiler.Profiler",
level=1,
reason="Please use new profiler tool, this profiler tool is no longer maintained.",
)
def start_profiler(state, tracer_option='Default'):
"""
Enable the profiler. Uers can use `fluid.profiler.start_profiler` and
`fluid.profiler.stop_profiler` to profile, which is equal to the usage
of `fluid.profiler.profiler` interface.
Args:
state (str) : The profiling state, which should be one of 'CPU', 'GPU'
or 'All'. 'CPU' means only profiling CPU; 'GPU' means profiling
both CPU and GPU; 'All' means profiling both CPU and GPU, and
generates timeline as well.
tracer_option (str, optional) : tracer_option can be one of ['Default', 'OpDetail', 'AllOpDetail'], it
can control the profile level and print the different level profile result. `Default` option print
the different Op type profiling result and the `OpDetail` option print the detail profiling
result of different op types such as compute and data transform, `AllOpDetail` option
print the detail profiling result of different op name same as `OpDetail`.
Raises:
ValueError: If `state` is not in ['CPU', 'GPU', 'All'] or `tracer_option`
is not in ['Default', 'OpDetail', 'AllOpDetail'].
Examples:
.. code-block:: python
# required: gpu
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
profiler.start_profiler('GPU')
for iter in range(10):
if iter == 2:
profiler.reset_profiler()
# except each iteration
profiler.stop_profiler('total', '/tmp/profile')
profiler.start_profiler('GPU', "OpDetail")
for iter in range(10):
if iter == 2:
profiler.reset_profiler()
# except each iteration
profiler.stop_profiler('total', '/tmp/profile')
"""
if core.is_profiler_enabled():
return
if state not in ['CPU', 'GPU', "All"]:
raise ValueError("The state must be 'CPU' or 'GPU' or 'All'.")
if state == "GPU":
prof_state = core.ProfilerState.kCUDA
elif state == "CPU":
prof_state = core.ProfilerState.kCPU
else:
prof_state = core.ProfilerState.kAll
if tracer_option not in ['Default', 'OpDetail', 'AllOpDetail']:
raise ValueError(
"tracer option must be 'Default', 'OpDetail', 'AllOpDetail'."
)
if tracer_option == "Default":
prof_tracer_option = core.TracerOption.kDefault
elif tracer_option == "OpDetail":
prof_tracer_option = core.TracerOption.kOpDetail
else:
prof_tracer_option = core.TracerOption.kAllOpDetail
core.set_tracer_option(prof_tracer_option)
core.enable_profiler(prof_state)
@deprecated(
since="2.3.0",
update_to="paddle.profiler.Profiler",
level=1,
reason="Please use new profiler tool, this profiler tool is no longer maintained.",
)
def stop_profiler(sorted_key=None, profile_path='/tmp/profile'):
"""
Stop the profiler. Uers can use `fluid.profiler.start_profiler` and
`fluid.profiler.stop_profiler` to profile, which is equal to the usage
of `fluid.profiler.profiler` interface.
Args:
sorted_key (str, optional) : The order of profiling results, which
should be one of None, 'calls', 'total', 'max', 'min' or 'ave'.
Default is None, means the profiling results will be printed
in the order of first end time of events.
The `calls` means sorting by the number of calls.
The `total` means sorting by the total execution time.
The `max` means sorting by the maximum execution time.
The `min` means sorting by the minimum execution time.
The `ave` means sorting by the average execution time.
and write it into `profile_path`. The default profile_path is `/tmp/profile`.
profile_path (str, optional) : If state == 'All', it will generate timeline,
Raises:
ValueError: If `sorted_key` is not in
['calls', 'total', 'max', 'min', 'ave'].
Examples:
.. code-block:: python
# required: gpu
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
profiler.start_profiler('GPU')
for iter in range(10):
if iter == 2:
profiler.reset_profiler()
# except each iteration
profiler.stop_profiler('total', '/tmp/profile')
"""
if not core.is_profiler_enabled():
return
sorted_key = 'default' if sorted_key is None else sorted_key
if sorted_key not in ['default', 'calls', 'total', 'max', 'min', 'ave']:
raise ValueError(
"The sorted_key must be None or in 'calls', 'total', "
"'max', 'min' and 'ave'"
)
key_map = {
'default': core.EventSortingKey.kDefault,
'calls': core.EventSortingKey.kCalls,
'total': core.EventSortingKey.kTotal,
'max': core.EventSortingKey.kMax,
'min': core.EventSortingKey.kMin,
'ave': core.EventSortingKey.kAve,
}
# TODO(qingqing) : redirect C++ ostream to Python stream.
# with core.ostream_redirect(stdout=True, stderr=True):
core.disable_profiler(key_map[sorted_key], profile_path)
@deprecated(
since="2.3.0",
update_to="paddle.profiler.Profiler",
level=1,
reason="Please use new profiler tool, this profiler tool is no longer maintained.",
)
@signature_safe_contextmanager
def profiler(
state, sorted_key=None, profile_path='/tmp/profile', tracer_option='Default'
):
"""
The profiler interface. This profiler can be used to profile both CPU and GPU program.
Args:
state (str) : The profiling state, which should be one of 'CPU', 'GPU'
or 'All'. 'CPU' means only profiling CPU; 'GPU' means profiling
both CPU and GPU; 'All' means profiling both CPU and GPU, and
generates timeline as well.
sorted_key (str, optional) : The order of profiling results, which
should be one of None, 'calls', 'total', 'max', 'min' or 'ave'.
Default is None, means the profiling results will be printed
in the order of first end time of events.
The `calls` means sorting by the number of calls.
The `total` means sorting by the total execution time.
The `max` means sorting by the maximum execution time.
The `min` means sorting by the minimum execution time.
The `ave` means sorting by the average execution time.
profile_path (str, optional) : If state == 'All', it will generate timeline,
and write it into `profile_path`. The default profile_path is `/tmp/profile`.
tracer_option (str, optional) : tracer_option can be one of ['Default', 'OpDetail', 'AllOpDetail'], it
can control the profile level and print the different level profile result. `Default` option print
the different Op type profiling result and the `OpDetail` option print the detail profiling
result of different op types such as compute and data transform, `AllOpDetail` option
print the detail profiling result of different op name same as `OpDetail`.
Raises:
ValueError: If `state` is not in ['CPU', 'GPU', 'All']. If `sorted_key` is
not in ['calls', 'total', 'max', 'min', 'ave'].
Examples:
.. code-block:: python
# required: gpu
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
import numpy as np
import paddle
paddle.enable_static()
epoc = 8
dshape = [4, 3, 28, 28]
data = paddle.static.data(name='data', shape=[None, 3, 28, 28], dtype='float32')
conv = paddle.static.nn.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
with profiler.profiler('CPU', 'total', '/tmp/profile', 'Default') as prof:
for i in range(epoc):
input = np.random.random(dshape).astype('float32')
exe.run(fluid.default_main_program(), feed={'data': input})
Examples Results:
.. code-block:: text
#### Examples Results ####
#### 1) sorted_key = 'total', 'calls', 'max', 'min', 'ave' ####
# The only difference in 5 sorted_key results is the following sentence:
# "Sorted by number of xxx in descending order in the same thread."
# The reason is that in this example, above 5 columns are already sorted.
-------------------------> Profiling Report <-------------------------
Place: CPU
Time unit: ms
Sorted by total time in descending order in the same thread
#Sorted by number of calls in descending order in the same thread
#Sorted by number of max in descending order in the same thread
#Sorted by number of min in descending order in the same thread
#Sorted by number of avg in descending order in the same thread
Event Calls Total Min. Max. Ave. Ratio.
thread0::conv2d 8 129.406 0.304303 127.076 16.1758 0.983319
thread0::elementwise_add 8 2.11865 0.193486 0.525592 0.264832 0.016099
thread0::feed 8 0.076649 0.006834 0.024616 0.00958112 0.000582432
#### 2) sorted_key = None ####
# Since the profiling results are printed in the order of first end time of Ops,
# the printed order is feed->conv2d->elementwise_add
-------------------------> Profiling Report <-------------------------
Place: CPU
Time unit: ms
Sorted by event first end time in descending order in the same thread
Event Calls Total Min. Max. Ave. Ratio.
thread0::feed 8 0.077419 0.006608 0.023349 0.00967738 0.00775934
thread0::conv2d 8 7.93456 0.291385 5.63342 0.99182 0.795243
thread0::elementwise_add 8 1.96555 0.191884 0.518004 0.245693 0.196998
"""
start_profiler(state, tracer_option)
try:
yield
finally:
stop_profiler(sorted_key, profile_path)
@signature_safe_contextmanager
def _nvprof_range(iter_id, start, end, exit_after_prof=True):
'''
A range profiler interface (not public yet).
Examples:
.. code-block:: python
model = Model()
for i in range(max_iter):
paddle.fluid.profiler._nvprof_range(i, 10, 20):
out = model(in)
'''
try:
if iter_id == start:
core.nvprof_start()
core.nvprof_enable_record_event()
if iter_id >= start:
core.nvprof_nvtx_push(str(iter_id))
yield
finally:
if iter_id < end:
core.nvprof_nvtx_pop()
if iter_id == end:
core.nvprof_stop()
if exit_after_prof:
sys.exit()
......@@ -400,7 +400,6 @@ endfunction()
list(REMOVE_ITEM TEST_OPS test_feed_data_check_shape_type)
list(REMOVE_ITEM TEST_OPS test_fetch_lod_tensor_array)
list(REMOVE_ITEM TEST_OPS test_warpctc_op)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_profiler)
list(REMOVE_ITEM TEST_OPS test_data_norm_op)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_transformer)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_transformer_auto_growth)
......@@ -694,17 +693,6 @@ if(WITH_DISTRIBUTE)
endif()
endif()
# profiler will random hang in linux cuda 10.1 or 10.2
# see https://github.com/PaddlePaddle/Paddle/issues/29082 for details.
# We guess there are some bugs in linux cuda 10.1 or 10.2,
# since this unittest is stable in cuda 11.2 and 10.2 (windows-ci pipeline) now.
if(NOT (LINUX AND CUDA_VERSION LESS 11.0))
py_test_modules(test_parallel_executor_profiler MODULES
test_parallel_executor_profiler)
set_tests_properties(test_parallel_executor_profiler
PROPERTIES LABELS "RUN_TYPE=DIST")
set_tests_properties(test_parallel_executor_profiler PROPERTIES TIMEOUT 120)
endif()
py_test_modules(test_parallel_executor_transformer MODULES
test_parallel_executor_transformer)
if(WIN32)
......
......@@ -197,17 +197,6 @@ class TestProfiler(unittest.TestCase):
prof.stop()
class TestNvprof(unittest.TestCase):
def test_nvprof(self):
for i in range(10):
paddle.fluid.profiler._nvprof_range(i, 10, 20)
x_value = np.random.randn(2, 3, 3)
x = paddle.to_tensor(
x_value, stop_gradient=False, place=paddle.CPUPlace()
)
y = x / 2.0
class TestGetProfiler(unittest.TestCase):
def test_getprofiler(self):
config_content = '''
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
from paddle import fluid
from paddle.fluid import core
from paddle.fluid.tests.unittests.test_profiler import TestProfiler
# NCCL 2.7 decides to use shared memory while NCCL 2.6 didn't, hence causing the error.
# include/shm.h:28 NCCL WARN Call to posix_fallocate failed: No space left on device
#
# Set environment variables NCCL_SHM_DISABLE=1 to disables the Shared Memory (SHM) transports
# and force to use P2P which is the default transports way of NCCL2.6.
os.environ['NCCL_SHM_DISABLE'] = str(1)
class TestPEProfiler(TestProfiler):
def test_cpu_profiler(self):
exe = fluid.Executor(fluid.CPUPlace())
self.net_profiler(exe, 'CPU', "Default", use_parallel_executor=True)
@unittest.skipIf(
not core.is_compiled_with_cuda(), "profiler is enabled only with GPU"
)
def test_cuda_profiler(self):
exe = fluid.Executor(fluid.CUDAPlace(0))
self.net_profiler(exe, 'GPU', "OpDetail", use_parallel_executor=True)
@unittest.skipIf(
not core.is_compiled_with_cuda(), "profiler is enabled only with GPU"
)
def test_all_profiler(self):
exe = fluid.Executor(fluid.CUDAPlace(0))
self.net_profiler(exe, 'All', "AllOpDetail", use_parallel_executor=True)
if __name__ == '__main__':
unittest.main()
......@@ -12,219 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import tempfile
import unittest
import numpy as np
import paddle
from paddle import fluid, utils
from paddle.fluid import core, profiler
from paddle.fluid.proto.profiler import profiler_pb2
from paddle.utils.flops import flops
class TestProfiler(unittest.TestCase):
@classmethod
def setUpClass(cls):
os.environ['CPU_NUM'] = str(4)
def build_program(self, compile_program=True):
startup_program = fluid.Program()
main_program = fluid.Program()
with fluid.program_guard(main_program, startup_program):
image = paddle.static.data(
name='x', shape=[-1, 784], dtype='float32'
)
hidden1 = paddle.static.nn.fc(x=image, size=64, activation='relu')
i = paddle.zeros(shape=[1], dtype='int64')
counter = paddle.tensor.fill_constant(
shape=[1], dtype='int64', value=0, force_cpu=True
)
until = paddle.tensor.fill_constant([1], dtype='int64', value=10)
data_arr = paddle.tensor.array_write(hidden1, i)
cond = paddle.less_than(x=counter, y=until)
while_op = paddle.static.nn.control_flow.While(cond=cond)
with while_op.block():
hidden_n = paddle.static.nn.fc(
x=hidden1, size=64, activation='relu'
)
paddle.tensor.array_write(hidden_n, i, data_arr)
paddle.increment(x=counter, value=1)
paddle.assign(paddle.less_than(x=counter, y=until), cond)
hidden_n = paddle.tensor.array_read(data_arr, i)
hidden2 = paddle.static.nn.fc(
x=hidden_n, size=64, activation='relu'
)
predict = paddle.static.nn.fc(
x=hidden2, size=10, activation='softmax'
)
label = paddle.static.data(name='y', shape=[-1, 1], dtype='int64')
cost = paddle.nn.functional.cross_entropy(
input=predict, label=label, reduction='none', use_softmax=False
)
avg_cost = paddle.mean(cost)
batch_size = paddle.tensor.create_tensor(dtype='int64')
batch_acc = paddle.static.accuracy(
input=predict, label=label, total=batch_size
)
optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9)
opts = optimizer.minimize(avg_cost, startup_program=startup_program)
if compile_program:
# TODO(luotao): profiler tool may have bug with multi-thread parallel executor.
# https://github.com/PaddlePaddle/Paddle/pull/25200#issuecomment-650483092
train_program = fluid.compiler.CompiledProgram(main_program)
else:
train_program = main_program
return train_program, startup_program, avg_cost, batch_size, batch_acc
def get_profile_path(self):
profile_path = os.path.join(tempfile.gettempdir(), "profile")
open(profile_path, "w").write("")
return profile_path
def check_profile_result(self, profile_path):
data = open(profile_path, 'rb').read()
if len(data) > 0:
profile_pb = profiler_pb2.Profile()
profile_pb.ParseFromString(data)
self.assertGreater(len(profile_pb.events), 0)
for event in profile_pb.events:
if event.type == profiler_pb2.Event.GPUKernel:
if not event.detail_info and not event.name.startswith(
"MEM"
):
raise Exception(
"Kernel %s missing event. Has this kernel been recorded by RecordEvent?"
% event.name
)
elif event.type == profiler_pb2.Event.CPU and (
event.name.startswith("Driver API")
or event.name.startswith("Runtime API")
):
print("Warning: unregister", event.name)
def run_iter(self, exe, main_program, fetch_list):
x = np.random.random((32, 784)).astype("float32")
y = np.random.randint(0, 10, (32, 1)).astype("int64")
outs = exe.run(
main_program, feed={'x': x, 'y': y}, fetch_list=fetch_list
)
def net_profiler(
self,
exe,
state,
tracer_option,
batch_range=None,
use_parallel_executor=False,
use_new_api=False,
):
(
main_program,
startup_program,
avg_cost,
batch_size,
batch_acc,
) = self.build_program(compile_program=use_parallel_executor)
exe.run(startup_program)
profile_path = self.get_profile_path()
if not use_new_api:
with profiler.profiler(state, 'total', profile_path, tracer_option):
for iter in range(10):
if iter == 2:
profiler.reset_profiler()
self.run_iter(
exe, main_program, [avg_cost, batch_acc, batch_size]
)
else:
options = utils.ProfilerOptions(
options={
'state': state,
'sorted_key': 'total',
'tracer_level': tracer_option,
'batch_range': [0, 10]
if batch_range is None
else batch_range,
'profile_path': profile_path,
}
)
with utils.Profiler(enabled=True, options=options) as prof:
for iter in range(10):
self.run_iter(
exe, main_program, [avg_cost, batch_acc, batch_size]
)
utils.get_profiler().record_step()
if batch_range is None and iter == 2:
utils.get_profiler().reset()
# TODO(luotao): check why nccl kernel in profile result.
# https://github.com/PaddlePaddle/Paddle/pull/25200#issuecomment-650483092
# self.check_profile_result(profile_path)
def test_cpu_profiler(self):
exe = fluid.Executor(fluid.CPUPlace())
for use_new_api in [False, True]:
self.net_profiler(
exe,
'CPU',
"Default",
batch_range=[5, 10],
use_new_api=use_new_api,
)
@unittest.skipIf(
not core.is_compiled_with_cuda(), "profiler is enabled only with GPU"
)
def test_cuda_profiler(self):
exe = fluid.Executor(fluid.CUDAPlace(0))
for use_new_api in [False, True]:
self.net_profiler(
exe,
'GPU',
"OpDetail",
batch_range=[0, 10],
use_new_api=use_new_api,
)
@unittest.skipIf(
not core.is_compiled_with_cuda(), "profiler is enabled only with GPU"
)
def test_all_profiler(self):
exe = fluid.Executor(fluid.CUDAPlace(0))
for use_new_api in [False, True]:
self.net_profiler(
exe,
'All',
"AllOpDetail",
batch_range=None,
use_new_api=use_new_api,
)
class TestProfilerAPIError(unittest.TestCase):
def test_errors(self):
options = utils.ProfilerOptions()
self.assertIsNone(options['profile_path'])
self.assertIsNone(options['timeline_path'])
options = options.with_state('All')
self.assertTrue(options['state'] == 'All')
try:
print(options['test'])
except ValueError:
pass
global_profiler = utils.get_profiler()
with utils.Profiler(enabled=True) as prof:
self.assertTrue(utils.get_profiler() == prof)
self.assertTrue(global_profiler != prof)
class TestFLOPSAPI(unittest.TestCase):
def test_flops(self):
self.assertTrue(flops('relu', {'X': [[12, 12]]}, {'output': 4}) == 144)
......
......@@ -13,9 +13,6 @@
# limitations under the License.
from . import gast
from .profiler import ProfilerOptions # noqa: F401
from .profiler import Profiler # noqa: F401
from .profiler import get_profiler # noqa: F401
from .deprecated import deprecated # noqa: F401
from .lazy_import import try_import # noqa: F401
from .op_version import OpLastCheckpointChecker # noqa: F401
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import warnings
from ..fluid import core
from ..fluid.profiler import cuda_profiler # noqa: F401
from ..fluid.profiler import profiler # noqa: F401
from ..fluid.profiler import reset_profiler, start_profiler, stop_profiler
from .deprecated import deprecated
__all__ = [ # noqa
'Profiler',
'get_profiler',
'ProfilerOptions',
'cuda_profiler',
'start_profiler',
'profiler',
'stop_profiler',
'reset_profiler',
]
@deprecated(
since="2.4.2",
update_to="paddle.profiler.Profiler",
level=1,
reason="Please use new profiler tool, this profiler tool is no longer maintained.",
)
class ProfilerOptions:
def __init__(self, options=None):
self.options = {
'state': 'All',
'sorted_key': 'default',
'tracer_level': 'Default',
'batch_range': [0, sys.maxsize],
'output_thread_detail': False,
'profile_path': 'none',
'timeline_path': 'none',
'op_summary_path': 'none',
}
if options is not None:
for key in self.options.keys():
if options.get(key, None) is not None:
self.options[key] = options[key]
# function to set one specified option
def with_state(self, state):
self.options['state'] = state
return self
def __getitem__(self, name):
if self.options.get(name, None) is None:
raise ValueError(
"ProfilerOptions does not have an option named %s." % name
)
else:
if (
isinstance(self.options[name], str)
and self.options[name] == 'none'
):
return None
else:
return self.options[name]
_current_profiler = None
@deprecated(
since="2.4.2",
update_to="paddle.profiler.Profiler",
level=1,
reason="Please use new profiler tool, this profiler tool is no longer maintained.",
)
class Profiler:
def __init__(self, enabled=True, options=None):
if options is not None:
self.profiler_options = options
else:
self.profiler_options = ProfilerOptions()
self.batch_id = 0
self.enabled = enabled
def __enter__(self):
# record current profiler
global _current_profiler
self.previous_profiler = _current_profiler
_current_profiler = self
if self.enabled:
if self.profiler_options['batch_range'][0] == 0:
self.start()
return self
def __exit__(self, exception_type, exception_value, traceback):
global _current_profiler
_current_profiler = self.previous_profiler
if self.enabled:
self.stop()
def start(self):
if self.enabled:
try:
start_profiler(
state=self.profiler_options['state'],
tracer_option=self.profiler_options['tracer_level'],
)
except Exception as e:
warnings.warn(
"Profiler is not enabled becuase following exception:\n{}".format(
e
)
)
def stop(self):
if self.enabled:
try:
stop_profiler(
sorted_key=self.profiler_options['sorted_key'],
profile_path=self.profiler_options['profile_path'],
)
except Exception as e:
warnings.warn(
"Profiler is not disabled becuase following exception:\n{}".format(
e
)
)
def reset(self):
if self.enabled and core.is_profiler_enabled():
reset_profiler()
def record_step(self, change_profiler_status=True):
if not self.enabled:
return
self.batch_id = self.batch_id + 1
if change_profiler_status:
if self.batch_id == self.profiler_options['batch_range'][0]:
if core.is_profiler_enabled():
self.reset()
else:
self.start()
if self.batch_id == self.profiler_options['batch_range'][1]:
self.stop()
@deprecated(
since="2.4.2",
update_to="paddle.profiler.Profiler",
level=1,
reason="Please use new profiler tool, this profiler tool is no longer maintained.",
)
def get_profiler():
global _current_profiler
if _current_profiler is None:
_current_profiler = Profiler()
return _current_profiler
......@@ -263,7 +263,6 @@ def check_public_api():
paddle.text,
paddle.utils,
paddle.utils.download,
paddle.utils.profiler,
paddle.utils.cpp_extension,
paddle.sysconfig,
paddle.vision,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册