remove legacy profiler (#52624)

* remove legacy profiler * rm test_parallel_executor_profiler

remove legacy profiler (#52624)
* remove legacy profiler * rm test_parallel_executor_profiler
0b89cb1d · JYChen · GitHub · bc9956cc · 0b89cb1d · 0b89cb1d
10 changed file
--- a/python/paddle/cost_model/cost_model.py
+++ b/python/paddle/cost_model/cost_model.py
@@ -58,7 +58,8 @@ class CostModel:
        exe = paddle.static.Executor(place)

        exe.run(startup_program)
-        paddle.fluid.profiler.start_profiler("All")
+        p = paddle.profiler.Profiler()
+        p.start()
        exe.run(main_program, feed={"X": x}, fetch_list=[])

        cost_model = core.CostModel()

--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -75,7 +75,7 @@ from .core import (
    CustomPlace,
 )
 from .lod_tensor import create_lod_tensor, create_random_int_lodtensor
-from . import profiler
+
 from . import unique_name
 from . import compiler
 from .compiler import *
@@ -130,7 +130,6 @@ __all__ = (
        'ParamAttr',
        'WeightNormParamAttr',
        'DataFeeder',
-        'profiler',
        'unique_name',
        'Scope',
        '_cuda_synchronize',

--- a/python/paddle/fluid/profiler.py
+++ b/python/paddle/fluid/profiler.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from . import core
-from .wrapped_decorator import signature_safe_contextmanager
-import os
-import sys
-
-from paddle.utils.deprecated import deprecated
-
-__all__ = [
-    'cuda_profiler',
-    'reset_profiler',
-    'profiler',
-    'start_profiler',
-    'stop_profiler',
-]
-
-NVPROF_CONFIG = [
-    "gpustarttimestamp",
-    "gpuendtimestamp",
-    "gridsize3d",
-    "threadblocksize",
-    "streamid",
-    "enableonstart 0",
-    "conckerneltrace",
-]
-
-
-@deprecated(
-    since="2.3.0",
-    update_to="paddle.profiler.Profiler",
-    level=1,
-    reason="Please use new profiler tool, this profiler tool is no longer maintained.",
-)
-@signature_safe_contextmanager
-def cuda_profiler(output_file, output_mode=None, config=None):
-    """
-    API cuda_profiler has been abandoned. If you have relevant requirements, you can use `paddle.utils.profiler.start_profiler` and `paddle.utils.profiler.stop_profiler`.
-    The relevant reference documents are as follows:
-    <https://www.paddlepaddle.org.cn/documentation/docs/en/api/paddle/utils/profiler/start_profiler_en.html#start-profiler>
-    <https://www.paddlepaddle.org.cn/documentation/docs/en/api/paddle/utils/profiler/stop_profiler_en.html#stop-profiler>
-    <https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/performance_improving/analysis_tools/timeline_en.html>
-    """
-    raise RuntimeError(
-        "API cuda_profiler has been abandoned. If you have relevant requirements, you can use `paddle.utils.profiler.start_profiler` and `paddle.utils.profiler.stop_profiler`.\nThe relevant reference documents are as follows:\n<https://www.paddlepaddle.org.cn/documentation/docs/en/api/paddle/utils/profiler/start_profiler_en.html#start-profiler>\n<https://www.paddlepaddle.org.cn/documentation/docs/en/api/paddle/utils/profiler/stop_profiler_en.html#stop-profiler>\n<https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/performance_improving/analysis_tools/timeline_en.html>"
-    )
-
-
-@signature_safe_contextmanager
-def npu_profiler(output_file, config=None):
-    """
-    The NPU profiler.
-
-    This fuctions is used to profile NPU program by NPU runtime application
-    programming interface. The profiling result will be written into
-    `output_file`. The users can set set the NPU profiling config by `config` argument.
-
-    After getting the profiling result file, users can use
-    `tools provided by Ascend <https://support.huaweicloud.com/tg-Inference-cann/atlasprofiling_16_0006.html>`_
-    to load this output file to visualize results.
-
-    Args:
-        output_file (str) : The output file name, the result will be
-            written into this file. It should be absolute path.
-        config (list<str>, optional) : NPU profile config. For more details, please
-            refer to `User Guide <https://support.huaweicloud.com/tg-Inference-cann/atlasprofiling_16_0006.html>`_ .
-
-    Examples:
-
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import paddle.fluid.profiler as profiler
-            import numpy as np
-            import paddle
-
-            epoc = 8
-            dshape = [4, 3, 28, 28]
-            data = paddle.static.data(name='data', shape=[None, 3, 28, 28], dtype='float32')
-            conv = paddle.static.nn.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
-
-            place = fluid.NPUPlace(0)
-            exe = fluid.Executor(place)
-            exe.run(fluid.default_startup_program())
-
-            output_file = 'npu.txt'
-            with profiler.npu_profiler(output_file) as npu_prof:
-                for i in range(epoc):
-                    input = np.random.random(dshape).astype('float32')
-                    exe.run(fluid.default_main_program(), feed={'data': input})
-            # then use  NPU profiler tools to load this output file
-            # to visualize results.
-    """
-    # TODO: support config in python.
-    if not config:
-        config = core.npu_prof_create_config()
-
-    core.npu_prof_init(output_file)
-    # Enables profiler collection by the active NPU profiling tool.
-    core.npu_prof_start(config)
-    try:
-        yield
-    # Disables profiler collection.
-    finally:
-        core.npu_prof_stop(config)
-        core.npu_prof_finalize()
-
-
-@deprecated(
-    since="2.3.0",
-    update_to="paddle.profiler.Profiler",
-    level=1,
-    reason="Please use new profiler tool, this profiler tool is no longer maintained.",
-)
-def reset_profiler():
-    """
-    Clear the previous time record. It works for
-    `fluid.profiler.start_profiler`, `fluid.profiler.stop_profiler`,
-    and `fluid.profiler.profiler`.
-
-    Examples:
-
-        .. code-block:: python
-
-            # required: gpu
-            import paddle.fluid as fluid
-            import paddle.fluid.profiler as profiler
-            with profiler.profiler('CPU', 'total', '/tmp/profile'):
-                for iter in range(10):
-                    if iter == 2:
-                        profiler.reset_profiler()
-                    # ...
-    """
-    core.reset_profiler()
-
-
-@deprecated(
-    since="2.3.0",
-    update_to="paddle.profiler.Profiler",
-    level=1,
-    reason="Please use new profiler tool, this profiler tool is no longer maintained.",
-)
-def start_profiler(state, tracer_option='Default'):
-    """
-    Enable the profiler. Uers can use `fluid.profiler.start_profiler` and
-    `fluid.profiler.stop_profiler` to profile, which is equal to the usage
-    of `fluid.profiler.profiler` interface.
-
-    Args:
-        state (str) : The profiling state, which should be one of 'CPU', 'GPU'
-            or 'All'. 'CPU' means only profiling CPU; 'GPU' means profiling
-            both CPU and GPU; 'All' means profiling both CPU and GPU, and
-            generates timeline as well.
-        tracer_option (str, optional) : tracer_option can be one of ['Default', 'OpDetail', 'AllOpDetail'], it
-            can control the profile level and print the different level profile result. `Default` option print
-            the different Op type profiling result and the `OpDetail` option print the detail profiling
-            result of different op types such as compute and data transform, `AllOpDetail` option
-            print the detail profiling result of different op name same as `OpDetail`.
-
-    Raises:
-        ValueError: If `state` is not in ['CPU', 'GPU', 'All'] or `tracer_option`
-            is not in ['Default', 'OpDetail', 'AllOpDetail'].
-
-    Examples:
-
-        .. code-block:: python
-
-            # required: gpu
-            import paddle.fluid as fluid
-            import paddle.fluid.profiler as profiler
-
-            profiler.start_profiler('GPU')
-            for iter in range(10):
-                if iter == 2:
-                    profiler.reset_profiler()
-                # except each iteration
-            profiler.stop_profiler('total', '/tmp/profile')
-
-            profiler.start_profiler('GPU', "OpDetail")
-            for iter in range(10):
-                if iter == 2:
-                    profiler.reset_profiler()
-                # except each iteration
-            profiler.stop_profiler('total', '/tmp/profile')
-    """
-    if core.is_profiler_enabled():
-        return
-    if state not in ['CPU', 'GPU', "All"]:
-        raise ValueError("The state must be 'CPU' or 'GPU' or 'All'.")
-    if state == "GPU":
-        prof_state = core.ProfilerState.kCUDA
-    elif state == "CPU":
-        prof_state = core.ProfilerState.kCPU
-    else:
-        prof_state = core.ProfilerState.kAll
-
-    if tracer_option not in ['Default', 'OpDetail', 'AllOpDetail']:
-        raise ValueError(
-            "tracer option must be 'Default', 'OpDetail', 'AllOpDetail'."
-        )
-    if tracer_option == "Default":
-        prof_tracer_option = core.TracerOption.kDefault
-    elif tracer_option == "OpDetail":
-        prof_tracer_option = core.TracerOption.kOpDetail
-    else:
-        prof_tracer_option = core.TracerOption.kAllOpDetail
-
-    core.set_tracer_option(prof_tracer_option)
-    core.enable_profiler(prof_state)
-
-
-@deprecated(
-    since="2.3.0",
-    update_to="paddle.profiler.Profiler",
-    level=1,
-    reason="Please use new profiler tool, this profiler tool is no longer maintained.",
-)
-def stop_profiler(sorted_key=None, profile_path='/tmp/profile'):
-    """
-    Stop the profiler. Uers can use `fluid.profiler.start_profiler` and
-    `fluid.profiler.stop_profiler` to profile, which is equal to the usage
-    of `fluid.profiler.profiler` interface.
-
-    Args:
-        sorted_key (str, optional) : The order of profiling results, which
-            should be one of None, 'calls', 'total', 'max', 'min' or 'ave'.
-            Default is None, means the profiling results will be printed
-            in the order of first end time of events.
-            The `calls` means sorting by the number of calls.
-            The `total` means sorting by the total execution time.
-            The `max` means sorting by the maximum execution time.
-            The `min` means sorting by the minimum execution time.
-            The `ave` means sorting by the average execution time.
-            and write it into `profile_path`. The default profile_path is `/tmp/profile`.
-        profile_path (str, optional) : If state == 'All', it will generate timeline,
-
-    Raises:
-        ValueError: If `sorted_key` is not in
-            ['calls', 'total', 'max', 'min', 'ave'].
-
-    Examples:
-
-        .. code-block:: python
-
-            # required: gpu
-            import paddle.fluid as fluid
-            import paddle.fluid.profiler as profiler
-
-            profiler.start_profiler('GPU')
-            for iter in range(10):
-                if iter == 2:
-                    profiler.reset_profiler()
-                # except each iteration
-            profiler.stop_profiler('total', '/tmp/profile')
-    """
-    if not core.is_profiler_enabled():
-        return
-    sorted_key = 'default' if sorted_key is None else sorted_key
-    if sorted_key not in ['default', 'calls', 'total', 'max', 'min', 'ave']:
-        raise ValueError(
-            "The sorted_key must be None or in 'calls', 'total', "
-            "'max', 'min' and 'ave'"
-        )
-    key_map = {
-        'default': core.EventSortingKey.kDefault,
-        'calls': core.EventSortingKey.kCalls,
-        'total': core.EventSortingKey.kTotal,
-        'max': core.EventSortingKey.kMax,
-        'min': core.EventSortingKey.kMin,
-        'ave': core.EventSortingKey.kAve,
-    }
-    # TODO(qingqing) : redirect C++ ostream to Python stream.
-    # with core.ostream_redirect(stdout=True, stderr=True):
-    core.disable_profiler(key_map[sorted_key], profile_path)
-
-
-@deprecated(
-    since="2.3.0",
-    update_to="paddle.profiler.Profiler",
-    level=1,
-    reason="Please use new profiler tool, this profiler tool is no longer maintained.",
-)
-@signature_safe_contextmanager
-def profiler(
-    state, sorted_key=None, profile_path='/tmp/profile', tracer_option='Default'
-):
-    """
-    The profiler interface. This profiler can be used to profile both CPU and GPU program.
-
-    Args:
-        state (str) : The profiling state, which should be one of 'CPU', 'GPU'
-            or 'All'. 'CPU' means only profiling CPU; 'GPU' means profiling
-            both CPU and GPU; 'All' means profiling both CPU and GPU, and
-            generates timeline as well.
-        sorted_key (str, optional) : The order of profiling results, which
-            should be one of None, 'calls', 'total', 'max', 'min' or 'ave'.
-            Default is None, means the profiling results will be printed
-            in the order of first end time of events.
-            The `calls` means sorting by the number of calls.
-            The `total` means sorting by the total execution time.
-            The `max` means sorting by the maximum execution time.
-            The `min` means sorting by the minimum execution time.
-            The `ave` means sorting by the average execution time.
-        profile_path (str, optional) : If state == 'All', it will generate timeline,
-            and write it into `profile_path`. The default profile_path is `/tmp/profile`.
-        tracer_option (str, optional) : tracer_option can be one of ['Default', 'OpDetail', 'AllOpDetail'], it
-            can control the profile level and print the different level profile result. `Default` option print
-            the different Op type profiling result and the `OpDetail` option print the detail profiling
-            result of different op types such as compute and data transform, `AllOpDetail` option
-            print the detail profiling result of different op name same as `OpDetail`.
-
-    Raises:
-        ValueError: If `state` is not in ['CPU', 'GPU', 'All']. If `sorted_key` is
-            not in ['calls', 'total', 'max', 'min', 'ave'].
-
-    Examples:
-
-        .. code-block:: python
-
-            # required: gpu
-            import paddle.fluid as fluid
-            import paddle.fluid.profiler as profiler
-            import numpy as np
-            import paddle
-            paddle.enable_static()
-
-            epoc = 8
-            dshape = [4, 3, 28, 28]
-            data = paddle.static.data(name='data', shape=[None, 3, 28, 28], dtype='float32')
-            conv = paddle.static.nn.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
-
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-            exe.run(fluid.default_startup_program())
-
-            with profiler.profiler('CPU', 'total', '/tmp/profile', 'Default') as prof:
-                for i in range(epoc):
-                    input = np.random.random(dshape).astype('float32')
-                    exe.run(fluid.default_main_program(), feed={'data': input})
-
-    Examples Results:
-
-        .. code-block:: text
-
-            #### Examples Results ####
-            #### 1) sorted_key = 'total', 'calls', 'max', 'min', 'ave' ####
-            # The only difference in 5 sorted_key results is the following sentence:
-            # "Sorted by number of xxx in descending order in the same thread."
-            # The reason is that in this example, above 5 columns are already sorted.
-            ------------------------->     Profiling Report     <-------------------------
-
-            Place: CPU
-            Time unit: ms
-            Sorted by total time in descending order in the same thread
-            #Sorted by number of calls in descending order in the same thread
-            #Sorted by number of max in descending order in the same thread
-            #Sorted by number of min in descending order in the same thread
-            #Sorted by number of avg in descending order in the same thread
-
-            Event                       Calls       Total       Min.        Max.        Ave.        Ratio.
-            thread0::conv2d             8           129.406     0.304303    127.076     16.1758     0.983319
-            thread0::elementwise_add    8           2.11865     0.193486    0.525592    0.264832    0.016099
-            thread0::feed               8           0.076649    0.006834    0.024616    0.00958112  0.000582432
-
-            #### 2) sorted_key = None  ####
-            # Since the profiling results are printed in the order of first end time of Ops,
-            # the printed order is feed->conv2d->elementwise_add
-            ------------------------->     Profiling Report     <-------------------------
-
-            Place: CPU
-            Time unit: ms
-            Sorted by event first end time in descending order in the same thread
-
-            Event                       Calls       Total       Min.        Max.        Ave.        Ratio.
-            thread0::feed               8           0.077419    0.006608    0.023349    0.00967738  0.00775934
-            thread0::conv2d             8           7.93456     0.291385    5.63342     0.99182     0.795243
-            thread0::elementwise_add    8           1.96555     0.191884    0.518004    0.245693    0.196998
-    """
-    start_profiler(state, tracer_option)
-    try:
-        yield
-    finally:
-        stop_profiler(sorted_key, profile_path)
-
-
-@signature_safe_contextmanager
-def _nvprof_range(iter_id, start, end, exit_after_prof=True):
-    '''
-    A range profiler interface (not public yet).
-
-    Examples:
-
-        .. code-block:: python
-
-            model = Model()
-            for i in range(max_iter):
-                paddle.fluid.profiler._nvprof_range(i, 10, 20):
-                    out = model(in)
-    '''
-    try:
-        if iter_id == start:
-            core.nvprof_start()
-            core.nvprof_enable_record_event()
-        if iter_id >= start:
-            core.nvprof_nvtx_push(str(iter_id))
-        yield
-    finally:
-        if iter_id < end:
-            core.nvprof_nvtx_pop()
-        if iter_id == end:
-            core.nvprof_stop()
-            if exit_after_prof:
-                sys.exit()
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -400,7 +400,6 @@ endfunction()
 list(REMOVE_ITEM TEST_OPS test_feed_data_check_shape_type)
 list(REMOVE_ITEM TEST_OPS test_fetch_lod_tensor_array)
 list(REMOVE_ITEM TEST_OPS test_warpctc_op)
-list(REMOVE_ITEM TEST_OPS test_parallel_executor_profiler)
 list(REMOVE_ITEM TEST_OPS test_data_norm_op)
 list(REMOVE_ITEM TEST_OPS test_parallel_executor_transformer)
 list(REMOVE_ITEM TEST_OPS test_parallel_executor_transformer_auto_growth)
@@ -694,17 +693,6 @@ if(WITH_DISTRIBUTE)
  endif()
 endif()

-# profiler will random hang in linux cuda 10.1 or 10.2
-# see https://github.com/PaddlePaddle/Paddle/issues/29082 for details.
-# We guess there are some bugs in linux cuda 10.1 or 10.2,
-# since this unittest is stable in cuda 11.2 and 10.2 (windows-ci pipeline) now.
-if(NOT (LINUX AND CUDA_VERSION LESS 11.0))
-  py_test_modules(test_parallel_executor_profiler MODULES
-                  test_parallel_executor_profiler)
-  set_tests_properties(test_parallel_executor_profiler
-                       PROPERTIES LABELS "RUN_TYPE=DIST")
-  set_tests_properties(test_parallel_executor_profiler PROPERTIES TIMEOUT 120)
-endif()
 py_test_modules(test_parallel_executor_transformer MODULES
                test_parallel_executor_transformer)
 if(WIN32)

--- a/python/paddle/fluid/tests/unittests/test_newprofiler.py
+++ b/python/paddle/fluid/tests/unittests/test_newprofiler.py
@@ -197,17 +197,6 @@ class TestProfiler(unittest.TestCase):
        prof.stop()


-class TestNvprof(unittest.TestCase):
-    def test_nvprof(self):
-        for i in range(10):
-            paddle.fluid.profiler._nvprof_range(i, 10, 20)
-            x_value = np.random.randn(2, 3, 3)
-            x = paddle.to_tensor(
-                x_value, stop_gradient=False, place=paddle.CPUPlace()
-            )
-            y = x / 2.0
-
-
 class TestGetProfiler(unittest.TestCase):
    def test_getprofiler(self):
        config_content = '''

--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_profiler.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_profiler.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import unittest
-
-from paddle import fluid
-from paddle.fluid import core
-from paddle.fluid.tests.unittests.test_profiler import TestProfiler
-
-# NCCL 2.7 decides to use shared memory while NCCL 2.6 didn't, hence causing the error.
-# include/shm.h:28 NCCL WARN Call to posix_fallocate failed: No space left on device
-#
-# Set environment variables NCCL_SHM_DISABLE=1 to disables the Shared Memory (SHM) transports
-# and force to use P2P which is the default transports way of NCCL2.6.
-os.environ['NCCL_SHM_DISABLE'] = str(1)
-
-
-class TestPEProfiler(TestProfiler):
-    def test_cpu_profiler(self):
-        exe = fluid.Executor(fluid.CPUPlace())
-        self.net_profiler(exe, 'CPU', "Default", use_parallel_executor=True)
-
-    @unittest.skipIf(
-        not core.is_compiled_with_cuda(), "profiler is enabled only with GPU"
-    )
-    def test_cuda_profiler(self):
-        exe = fluid.Executor(fluid.CUDAPlace(0))
-        self.net_profiler(exe, 'GPU', "OpDetail", use_parallel_executor=True)
-
-    @unittest.skipIf(
-        not core.is_compiled_with_cuda(), "profiler is enabled only with GPU"
-    )
-    def test_all_profiler(self):
-        exe = fluid.Executor(fluid.CUDAPlace(0))
-        self.net_profiler(exe, 'All', "AllOpDetail", use_parallel_executor=True)
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_profiler.py
+++ b/python/paddle/fluid/tests/unittests/test_profiler.py
@@ -12,219 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import os
-import tempfile
 import unittest

-import numpy as np
-
 import paddle
-from paddle import fluid, utils
-from paddle.fluid import core, profiler
-from paddle.fluid.proto.profiler import profiler_pb2
 from paddle.utils.flops import flops


-class TestProfiler(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        os.environ['CPU_NUM'] = str(4)
-
-    def build_program(self, compile_program=True):
-        startup_program = fluid.Program()
-        main_program = fluid.Program()
-        with fluid.program_guard(main_program, startup_program):
-            image = paddle.static.data(
-                name='x', shape=[-1, 784], dtype='float32'
-            )
-            hidden1 = paddle.static.nn.fc(x=image, size=64, activation='relu')
-            i = paddle.zeros(shape=[1], dtype='int64')
-            counter = paddle.tensor.fill_constant(
-                shape=[1], dtype='int64', value=0, force_cpu=True
-            )
-            until = paddle.tensor.fill_constant([1], dtype='int64', value=10)
-            data_arr = paddle.tensor.array_write(hidden1, i)
-            cond = paddle.less_than(x=counter, y=until)
-            while_op = paddle.static.nn.control_flow.While(cond=cond)
-            with while_op.block():
-                hidden_n = paddle.static.nn.fc(
-                    x=hidden1, size=64, activation='relu'
-                )
-                paddle.tensor.array_write(hidden_n, i, data_arr)
-                paddle.increment(x=counter, value=1)
-                paddle.assign(paddle.less_than(x=counter, y=until), cond)
-
-            hidden_n = paddle.tensor.array_read(data_arr, i)
-            hidden2 = paddle.static.nn.fc(
-                x=hidden_n, size=64, activation='relu'
-            )
-            predict = paddle.static.nn.fc(
-                x=hidden2, size=10, activation='softmax'
-            )
-            label = paddle.static.data(name='y', shape=[-1, 1], dtype='int64')
-            cost = paddle.nn.functional.cross_entropy(
-                input=predict, label=label, reduction='none', use_softmax=False
-            )
-            avg_cost = paddle.mean(cost)
-            batch_size = paddle.tensor.create_tensor(dtype='int64')
-            batch_acc = paddle.static.accuracy(
-                input=predict, label=label, total=batch_size
-            )
-
-        optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9)
-        opts = optimizer.minimize(avg_cost, startup_program=startup_program)
-
-        if compile_program:
-            # TODO(luotao): profiler tool may have bug with multi-thread parallel executor.
-            # https://github.com/PaddlePaddle/Paddle/pull/25200#issuecomment-650483092
-            train_program = fluid.compiler.CompiledProgram(main_program)
-        else:
-            train_program = main_program
-        return train_program, startup_program, avg_cost, batch_size, batch_acc
-
-    def get_profile_path(self):
-        profile_path = os.path.join(tempfile.gettempdir(), "profile")
-        open(profile_path, "w").write("")
-        return profile_path
-
-    def check_profile_result(self, profile_path):
-        data = open(profile_path, 'rb').read()
-        if len(data) > 0:
-            profile_pb = profiler_pb2.Profile()
-            profile_pb.ParseFromString(data)
-            self.assertGreater(len(profile_pb.events), 0)
-            for event in profile_pb.events:
-                if event.type == profiler_pb2.Event.GPUKernel:
-                    if not event.detail_info and not event.name.startswith(
-                        "MEM"
-                    ):
-                        raise Exception(
-                            "Kernel %s missing event. Has this kernel been recorded by RecordEvent?"
-                            % event.name
-                        )
-                elif event.type == profiler_pb2.Event.CPU and (
-                    event.name.startswith("Driver API")
-                    or event.name.startswith("Runtime API")
-                ):
-                    print("Warning: unregister", event.name)
-
-    def run_iter(self, exe, main_program, fetch_list):
-        x = np.random.random((32, 784)).astype("float32")
-        y = np.random.randint(0, 10, (32, 1)).astype("int64")
-        outs = exe.run(
-            main_program, feed={'x': x, 'y': y}, fetch_list=fetch_list
-        )
-
-    def net_profiler(
-        self,
-        exe,
-        state,
-        tracer_option,
-        batch_range=None,
-        use_parallel_executor=False,
-        use_new_api=False,
-    ):
-        (
-            main_program,
-            startup_program,
-            avg_cost,
-            batch_size,
-            batch_acc,
-        ) = self.build_program(compile_program=use_parallel_executor)
-        exe.run(startup_program)
-
-        profile_path = self.get_profile_path()
-        if not use_new_api:
-            with profiler.profiler(state, 'total', profile_path, tracer_option):
-                for iter in range(10):
-                    if iter == 2:
-                        profiler.reset_profiler()
-                    self.run_iter(
-                        exe, main_program, [avg_cost, batch_acc, batch_size]
-                    )
-        else:
-            options = utils.ProfilerOptions(
-                options={
-                    'state': state,
-                    'sorted_key': 'total',
-                    'tracer_level': tracer_option,
-                    'batch_range': [0, 10]
-                    if batch_range is None
-                    else batch_range,
-                    'profile_path': profile_path,
-                }
-            )
-            with utils.Profiler(enabled=True, options=options) as prof:
-                for iter in range(10):
-                    self.run_iter(
-                        exe, main_program, [avg_cost, batch_acc, batch_size]
-                    )
-                    utils.get_profiler().record_step()
-                    if batch_range is None and iter == 2:
-                        utils.get_profiler().reset()
-        # TODO(luotao): check why nccl kernel in profile result.
-        # https://github.com/PaddlePaddle/Paddle/pull/25200#issuecomment-650483092
-        # self.check_profile_result(profile_path)
-
-    def test_cpu_profiler(self):
-        exe = fluid.Executor(fluid.CPUPlace())
-        for use_new_api in [False, True]:
-            self.net_profiler(
-                exe,
-                'CPU',
-                "Default",
-                batch_range=[5, 10],
-                use_new_api=use_new_api,
-            )
-
-    @unittest.skipIf(
-        not core.is_compiled_with_cuda(), "profiler is enabled only with GPU"
-    )
-    def test_cuda_profiler(self):
-        exe = fluid.Executor(fluid.CUDAPlace(0))
-        for use_new_api in [False, True]:
-            self.net_profiler(
-                exe,
-                'GPU',
-                "OpDetail",
-                batch_range=[0, 10],
-                use_new_api=use_new_api,
-            )
-
-    @unittest.skipIf(
-        not core.is_compiled_with_cuda(), "profiler is enabled only with GPU"
-    )
-    def test_all_profiler(self):
-        exe = fluid.Executor(fluid.CUDAPlace(0))
-        for use_new_api in [False, True]:
-            self.net_profiler(
-                exe,
-                'All',
-                "AllOpDetail",
-                batch_range=None,
-                use_new_api=use_new_api,
-            )
-
-
-class TestProfilerAPIError(unittest.TestCase):
-    def test_errors(self):
-        options = utils.ProfilerOptions()
-        self.assertIsNone(options['profile_path'])
-        self.assertIsNone(options['timeline_path'])
-
-        options = options.with_state('All')
-        self.assertTrue(options['state'] == 'All')
-        try:
-            print(options['test'])
-        except ValueError:
-            pass
-
-        global_profiler = utils.get_profiler()
-        with utils.Profiler(enabled=True) as prof:
-            self.assertTrue(utils.get_profiler() == prof)
-            self.assertTrue(global_profiler != prof)
-
-
 class TestFLOPSAPI(unittest.TestCase):
    def test_flops(self):
        self.assertTrue(flops('relu', {'X': [[12, 12]]}, {'output': 4}) == 144)

--- a/python/paddle/utils/__init__.py
+++ b/python/paddle/utils/__init__.py
@@ -13,9 +13,6 @@
 # limitations under the License.

 from . import gast
-from .profiler import ProfilerOptions  # noqa: F401
-from .profiler import Profiler  # noqa: F401
-from .profiler import get_profiler  # noqa: F401
 from .deprecated import deprecated  # noqa: F401
 from .lazy_import import try_import  # noqa: F401
 from .op_version import OpLastCheckpointChecker  # noqa: F401

--- a/python/paddle/utils/profiler.py
+++ b/python/paddle/utils/profiler.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import warnings
-
-from ..fluid import core
-from ..fluid.profiler import cuda_profiler  # noqa: F401
-from ..fluid.profiler import profiler  # noqa: F401
-from ..fluid.profiler import reset_profiler, start_profiler, stop_profiler
-from .deprecated import deprecated
-
-__all__ = [  # noqa
-    'Profiler',
-    'get_profiler',
-    'ProfilerOptions',
-    'cuda_profiler',
-    'start_profiler',
-    'profiler',
-    'stop_profiler',
-    'reset_profiler',
-]
-
-
-@deprecated(
-    since="2.4.2",
-    update_to="paddle.profiler.Profiler",
-    level=1,
-    reason="Please use new profiler tool, this profiler tool is no longer maintained.",
-)
-class ProfilerOptions:
-    def __init__(self, options=None):
-        self.options = {
-            'state': 'All',
-            'sorted_key': 'default',
-            'tracer_level': 'Default',
-            'batch_range': [0, sys.maxsize],
-            'output_thread_detail': False,
-            'profile_path': 'none',
-            'timeline_path': 'none',
-            'op_summary_path': 'none',
-        }
-        if options is not None:
-            for key in self.options.keys():
-                if options.get(key, None) is not None:
-                    self.options[key] = options[key]
-
-    # function to set one specified option
-    def with_state(self, state):
-        self.options['state'] = state
-        return self
-
-    def __getitem__(self, name):
-        if self.options.get(name, None) is None:
-            raise ValueError(
-                "ProfilerOptions does not have an option named %s." % name
-            )
-        else:
-            if (
-                isinstance(self.options[name], str)
-                and self.options[name] == 'none'
-            ):
-                return None
-            else:
-                return self.options[name]
-
-
-_current_profiler = None
-
-
-@deprecated(
-    since="2.4.2",
-    update_to="paddle.profiler.Profiler",
-    level=1,
-    reason="Please use new profiler tool, this profiler tool is no longer maintained.",
-)
-class Profiler:
-    def __init__(self, enabled=True, options=None):
-        if options is not None:
-            self.profiler_options = options
-        else:
-            self.profiler_options = ProfilerOptions()
-        self.batch_id = 0
-        self.enabled = enabled
-
-    def __enter__(self):
-        # record current profiler
-        global _current_profiler
-        self.previous_profiler = _current_profiler
-        _current_profiler = self
-
-        if self.enabled:
-            if self.profiler_options['batch_range'][0] == 0:
-                self.start()
-        return self
-
-    def __exit__(self, exception_type, exception_value, traceback):
-        global _current_profiler
-        _current_profiler = self.previous_profiler
-
-        if self.enabled:
-            self.stop()
-
-    def start(self):
-        if self.enabled:
-            try:
-                start_profiler(
-                    state=self.profiler_options['state'],
-                    tracer_option=self.profiler_options['tracer_level'],
-                )
-            except Exception as e:
-                warnings.warn(
-                    "Profiler is not enabled becuase following exception:\n{}".format(
-                        e
-                    )
-                )
-
-    def stop(self):
-        if self.enabled:
-            try:
-                stop_profiler(
-                    sorted_key=self.profiler_options['sorted_key'],
-                    profile_path=self.profiler_options['profile_path'],
-                )
-            except Exception as e:
-                warnings.warn(
-                    "Profiler is not disabled becuase following exception:\n{}".format(
-                        e
-                    )
-                )
-
-    def reset(self):
-        if self.enabled and core.is_profiler_enabled():
-            reset_profiler()
-
-    def record_step(self, change_profiler_status=True):
-        if not self.enabled:
-            return
-        self.batch_id = self.batch_id + 1
-        if change_profiler_status:
-            if self.batch_id == self.profiler_options['batch_range'][0]:
-                if core.is_profiler_enabled():
-                    self.reset()
-                else:
-                    self.start()
-
-            if self.batch_id == self.profiler_options['batch_range'][1]:
-                self.stop()
-
-
-@deprecated(
-    since="2.4.2",
-    update_to="paddle.profiler.Profiler",
-    level=1,
-    reason="Please use new profiler tool, this profiler tool is no longer maintained.",
-)
-def get_profiler():
-    global _current_profiler
-    if _current_profiler is None:
-        _current_profiler = Profiler()
-    return _current_profiler
--- a/tools/print_signatures.py
+++ b/tools/print_signatures.py
@@ -263,7 +263,6 @@ def check_public_api():
        paddle.text,
        paddle.utils,
        paddle.utils.download,
-        paddle.utils.profiler,
        paddle.utils.cpp_extension,
        paddle.sysconfig,
        paddle.vision,