From 234b530d5274906d0f2966e1ec7907195f318a68 Mon Sep 17 00:00:00 2001 From: Tao Luo Date: Thu, 10 Oct 2019 22:23:11 +0800 Subject: [PATCH] refine profiler, name_scope document (#20431) * refine profiler document (#20326) * refine profiler document test=develop test=document_fix * update profiler document test=develop test=document_fix * refine profiler, name_scope document test=develop test=document_fix --- paddle/fluid/API.spec | 10 +-- python/paddle/fluid/framework.py | 41 +++++++--- python/paddle/fluid/profiler.py | 133 +++++++++++++++++++------------ 3 files changed, 118 insertions(+), 66 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index ebce930fe7c..e1049bd3603 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -10,7 +10,7 @@ paddle.fluid.Program.to_string (ArgSpec(args=['self', 'throw_on_error', 'with_de paddle.fluid.default_startup_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'f53890b2fb8c0642b6047e4fee2d6d58')) paddle.fluid.default_main_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '853718df675e59aea7104f3d61bbf11d')) paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,)), ('document', '78fb5c7f70ef76bcf4a1862c3f6b8191')) -paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '917d313881ff990de5fb18d98a9c7b42')) +paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '907a5f877206079d8e67ae69b06bb3ba')) paddle.fluid.cuda_places (ArgSpec(args=['device_ids'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ab9bd2079536114aa7c1488a489ee87f')) paddle.fluid.cpu_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', 'a7352a3dd39308fde4fbbf6421a4193d')) paddle.fluid.cuda_pinned_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', '567ac29567716fd8e7432b533337d529')) @@ -1088,11 +1088,11 @@ paddle.fluid.dygraph_grad_clip.GradClipByNorm ('paddle.fluid.dygraph_grad_clip.G paddle.fluid.dygraph_grad_clip.GradClipByNorm.__init__ (ArgSpec(args=['self', 'clip_norm'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph_grad_clip.GradClipByGlobalNorm ('paddle.fluid.dygraph_grad_clip.GradClipByGlobalNorm', ('document', 'd1872377e7d7a5fe0dd2e8c42e4c9656')) paddle.fluid.dygraph_grad_clip.GradClipByGlobalNorm.__init__ (ArgSpec(args=['self', 'max_global_norm'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.profiler.cuda_profiler (ArgSpec(args=['output_file', 'output_mode', 'config'], varargs=None, keywords=None, defaults=(None, None)), ('document', '4053b45953807a24e28027dc86829d6c')) +paddle.fluid.profiler.cuda_profiler (ArgSpec(args=['output_file', 'output_mode', 'config'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6ae5833bd2490c6a3bdcae0d31ce5ec5')) paddle.fluid.profiler.reset_profiler (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'fd1f25a7a06516ca9a1f4ab0783a4d70')) -paddle.fluid.profiler.profiler (ArgSpec(args=['state', 'sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile')), ('document', 'a2be24e028dffa06ab28cc55a27c59e4')) -paddle.fluid.profiler.start_profiler (ArgSpec(args=['state'], varargs=None, keywords=None, defaults=None), ('document', '4c192ea399e6e80b1ab47a8265b022a5')) -paddle.fluid.profiler.stop_profiler (ArgSpec(args=['sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile')), ('document', 'bc8628b859b04242200e48a458c971c4')) +paddle.fluid.profiler.profiler (ArgSpec(args=['state', 'sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile')), ('document', '8e8d777eb0127876d7bdb6c421db7f5c')) +paddle.fluid.profiler.start_profiler (ArgSpec(args=['state'], varargs=None, keywords=None, defaults=None), ('document', '9494b48e79a0e07b49017ba5a97800b6')) +paddle.fluid.profiler.stop_profiler (ArgSpec(args=['sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile')), ('document', '10406b144bd8b5e01ea44301219f7fef')) paddle.fluid.unique_name.generate (ArgSpec(args=['key'], varargs=None, keywords=None, defaults=None), ('document', '4d68cde4c4df8f1b8018620b4dc19b42')) paddle.fluid.unique_name.switch (ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,)), ('document', '695a6e91afbcdbafac69a069038811be')) paddle.fluid.unique_name.guard (ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ead717d6d440a1eb11971695cd1727f4')) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index f174f6e3fbf..622eb247c7a 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -285,27 +285,46 @@ def name_scope(prefix=None): """ Generate hierarchical name prefix for the operators. - Note: This should only used for debugging and visualization purpose. - Don't use it for serious analysis such as graph/program transformations. + Note: + This should only used for debugging and visualization purpose. + Don't use it for serious analysis such as graph/program transformations. Args: - prefix(str): prefix. + prefix(str, optional): prefix. Default is none. Examples: .. code-block:: python import paddle.fluid as fluid with fluid.name_scope("s1"): - a = fluid.layers.data(name='data', shape=[1], dtype='int32') - b = a + 1 - with fluid.name_scope("s2"): - c = b * 1 - with fluid.name_scope("s3"): - d = c / 1 + a = fluid.data(name='data', shape=[None, 1], dtype='int32') + b = a + 1 + with fluid.name_scope("s2"): + c = b * 1 + with fluid.name_scope("s3"): + d = c / 1 with fluid.name_scope("s1"): - f = fluid.layers.pow(d, 2.0) + f = fluid.layers.pow(d, 2.0) with fluid.name_scope("s4"): - g = f - 1 + g = f - 1 + + # Op are created in the default main program. + for op in fluid.default_main_program().block(0).ops: + # elementwise_add is created in /s1/ + if op.type == 'elementwise_add': + assert op.desc.attr("op_namescope") == '/s1/' + # elementwise_mul is created in '/s1/s2' + elif op.type == 'elementwise_mul': + assert op.desc.attr("op_namescope") == '/s1/s2/' + # elementwise_div is created in '/s1/s3' + elif op.type == 'elementwise_div': + assert op.desc.attr("op_namescope") == '/s1/s3/' + # elementwise_sum is created in '/s4' + elif op.type == 'elementwise_sub': + assert op.desc.attr("op_namescope") == '/s4/' + # pow is created in /s1_1/ + elif op.type == 'pow': + assert op.desc.attr("op_namescope") == '/s1_1/' """ # TODO(panyx0718): Only [0-9a-z]. # in dygraph we don't need namescope since it will cause mem leak diff --git a/python/paddle/fluid/profiler.py b/python/paddle/fluid/profiler.py index b0e168929b4..82b29e25fde 100644 --- a/python/paddle/fluid/profiler.py +++ b/python/paddle/fluid/profiler.py @@ -37,25 +37,27 @@ NVPROF_CONFIG = [ @signature_safe_contextmanager def cuda_profiler(output_file, output_mode=None, config=None): - """The CUDA profiler. + """ + The CUDA profiler. + This fuctions is used to profile CUDA program by CUDA runtime application programming interface. The profiling result will be written into - `output_file` with Key-Value pair format or Comma separated values format. - The user can set the output mode by `output_mode` argument and set the - counters/options for profiling by `config` argument. The default config - is ['gpustarttimestamp', 'gpuendtimestamp', 'gridsize3d', - 'threadblocksize', 'streamid', 'enableonstart 0', 'conckerneltrace']. - Then users can use NVIDIA Visual Profiler - (https://developer.nvidia.com/nvidia-visual-profiler) tools to load this - this output file to visualize results. + `output_file`. The users can set the output mode by `output_mode` argument + and set the nvidia profiling config by `config` argument. + + After getting the profiling result file, users can use + `NVIDIA Visual Profiler `_ + to load this output file to visualize results. Args: - output_file (string) : The output file name, the result will be + output_file (str) : The output file name, the result will be written into this file. - output_mode (string) : The output mode has Key-Value pair format and - Comma separated values format. It should be 'kvp' or 'csv'. - config (list of string) : The profiler options and counters can refer - to "Compute Command Line Profiler User Guide". + output_mode (str, optional) : The output mode has Key-Value pair format ('kvp') + and Comma separated values format ('csv', default). + config (list, optional) : Nvidia profile config. Default config is + ['gpustarttimestamp', 'gpuendtimestamp', 'gridsize3d', 'threadblocksize', + 'streamid', 'enableonstart 0', 'conckerneltrace']. For more details, please + refer to `Compute Command Line Profiler User Guide `_ . Raises: ValueError: If `output_mode` is not in ['kvp', 'csv']. @@ -70,7 +72,7 @@ def cuda_profiler(output_file, output_mode=None, config=None): epoc = 8 dshape = [4, 3, 28, 28] - data = fluid.layers.data(name='data', shape=[3, 28, 28], dtype='float32') + data = fluid.data(name='data', shape=[None, 3, 28, 28], dtype='float32') conv = fluid.layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1]) place = fluid.CUDAPlace(0) @@ -127,13 +129,14 @@ def reset_profiler(): def start_profiler(state): """ Enable the profiler. Uers can use `fluid.profiler.start_profiler` and - `fluid.profiler.stop_profiler` to insert the code, except the usage of - `fluid.profiler.profiler` interface. + `fluid.profiler.stop_profiler` to profile, which is equal to the usage + of `fluid.profiler.profiler` interface. Args: - state (string) : The profiling state, which should be 'CPU', 'GPU' - or 'All'. 'CPU' means only profile CPU. 'GPU' means profiling - GPU as well. 'All' also generates timeline. + state (str) : The profiling state, which should be one of 'CPU', 'GPU' + or 'All'. 'CPU' means only profiling CPU; 'GPU' means profiling + both CPU and GPU; 'All' means profiling both CPU and GPU, and + generates timeline as well. Raises: ValueError: If `state` is not in ['CPU', 'GPU', 'All']. @@ -168,21 +171,21 @@ def start_profiler(state): def stop_profiler(sorted_key=None, profile_path='/tmp/profile'): """ Stop the profiler. Uers can use `fluid.profiler.start_profiler` and - `fluid.profiler.stop_profiler` to insert the code, except the usage of - `fluid.profiler.profiler` interface. + `fluid.profiler.stop_profiler` to profile, which is equal to the usage + of `fluid.profiler.profiler` interface. Args: - sorted_key (string) : If None, the profiling results will be printed - in the order of first end time of events. Otherwise, the profiling - results will be sorted by the this flag. This flag should be one - of 'calls', 'total', 'max', 'min' or 'ave'. + sorted_key (str, optional) : The order of profiling results, which + should be one of None, 'calls', 'total', 'max', 'min' or 'ave'. + Default is None, means the profiling results will be printed + in the order of first end time of events. The `calls` means sorting by the number of calls. The `total` means sorting by the total execution time. The `max` means sorting by the maximum execution time. The `min` means sorting by the minimum execution time. The `ave` means sorting by the average execution time. - profile_path (string) : If state == 'All', it will write a profile - proto output file. + profile_path (str, optional) : If state == 'All', it will generate timeline, + and write it into `profile_path`. The default profile_path is `/tmp/profile`. Raises: ValueError: If `sorted_key` is not in @@ -223,34 +226,26 @@ def stop_profiler(sorted_key=None, profile_path='/tmp/profile'): @signature_safe_contextmanager def profiler(state, sorted_key=None, profile_path='/tmp/profile'): - """The profiler interface. - Different from cuda_profiler, this profiler can be used to profile both CPU - and GPU program. By default, it records the CPU and GPU operator kernels, - if you want to profile other program, you can refer the profiling tutorial - to add more records in C++ code. - - If the state == 'All', a profile proto file will be written to - `profile_path`. This file records timeline information during the execution. - Then users can visualize this file to see the timeline, please refer - https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/howto/optimization/timeline.md + """ + The profiler interface. Different from `fluid.profiler.cuda_profiler`, + this profiler can be used to profile both CPU and GPU program. Args: - state (string) : The profiling state, which should be 'CPU' or 'GPU', - telling the profiler to use CPU timer or GPU timer for profiling. - Although users may have already specified the execution place - (CPUPlace/CUDAPlace) in the beginning, for flexibility the profiler - would not inherit this place. - sorted_key (string) : If None, the profiling results will be printed - in the order of first end time of events. Otherwise, the profiling - results will be sorted by the this flag. This flag should be one - of 'calls', 'total', 'max', 'min' or 'ave'. + state (str) : The profiling state, which should be one of 'CPU', 'GPU' + or 'All'. 'CPU' means only profiling CPU; 'GPU' means profiling + both CPU and GPU; 'All' means profiling both CPU and GPU, and + generates timeline as well. + sorted_key (str, optional) : The order of profiling results, which + should be one of None, 'calls', 'total', 'max', 'min' or 'ave'. + Default is None, means the profiling results will be printed + in the order of first end time of events. The `calls` means sorting by the number of calls. The `total` means sorting by the total execution time. The `max` means sorting by the maximum execution time. The `min` means sorting by the minimum execution time. The `ave` means sorting by the average execution time. - profile_path (string) : If state == 'All', it will write a profile - proto output file. + profile_path (str, optional) : If state == 'All', it will generate timeline, + and write it into `profile_path`. The default profile_path is `/tmp/profile`. Raises: ValueError: If `state` is not in ['CPU', 'GPU', 'All']. If `sorted_key` is @@ -266,7 +261,7 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'): epoc = 8 dshape = [4, 3, 28, 28] - data = fluid.layers.data(name='data', shape=[3, 28, 28], dtype='float32') + data = fluid.data(name='data', shape=[None, 3, 28, 28], dtype='float32') conv = fluid.layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1]) place = fluid.CPUPlace() @@ -277,6 +272,44 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'): for i in range(epoc): input = np.random.random(dshape).astype('float32') exe.run(fluid.default_main_program(), feed={'data': input}) + + Examples Results: + + .. code-block:: text + + #### Examples Results #### + #### 1) sorted_key = 'total', 'calls', 'max', 'min', 'ave' #### + # The only difference in 5 sorted_key results is the following sentense: + # "Sorted by number of xxx in descending order in the same thread." + # The reason is that in this example, above 5 columns are already sorted. + -------------------------> Profiling Report <------------------------- + + Place: CPU + Time unit: ms + Sorted by total time in descending order in the same thread + #Sorted by number of calls in descending order in the same thread + #Sorted by number of max in descending order in the same thread + #Sorted by number of min in descending order in the same thread + #Sorted by number of avg in descending order in the same thread + + Event Calls Total Min. Max. Ave. Ratio. + thread0::conv2d 8 129.406 0.304303 127.076 16.1758 0.983319 + thread0::elementwise_add 8 2.11865 0.193486 0.525592 0.264832 0.016099 + thread0::feed 8 0.076649 0.006834 0.024616 0.00958112 0.000582432 + + #### 2) sorted_key = None #### + # Since the profiling results are printed in the order of first end time of Ops, + # the printed order is feed->conv2d->elementwise_add + -------------------------> Profiling Report <------------------------- + + Place: CPU + Time unit: ms + Sorted by event first end time in descending order in the same thread + + Event Calls Total Min. Max. Ave. Ratio. + thread0::feed 8 0.077419 0.006608 0.023349 0.00967738 0.00775934 + thread0::conv2d 8 7.93456 0.291385 5.63342 0.99182 0.795243 + thread0::elementwise_add 8 1.96555 0.191884 0.518004 0.245693 0.196998 """ start_profiler(state) yield -- GitLab