未验证 提交 49074859 编写于 作者: Z zhangkaihuo 提交者: GitHub

Add compare accuracy api (#53430)

上级 45ce0ad5
...@@ -25,6 +25,7 @@ from .grad_scaler import AmpScaler # noqa: F401 ...@@ -25,6 +25,7 @@ from .grad_scaler import AmpScaler # noqa: F401
from .grad_scaler import OptimizerState # noqa: F401 from .grad_scaler import OptimizerState # noqa: F401
from . import debugging # noqa: F401 from . import debugging # noqa: F401
from . import accuracy_compare # noqa: F401
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.framework import ( from paddle.fluid.framework import (
......
此差异已折叠。
...@@ -30,6 +30,7 @@ __all__ = [ ...@@ -30,6 +30,7 @@ __all__ = [
"collect_operator_stats", "collect_operator_stats",
"enable_tensor_checker", "enable_tensor_checker",
"disable_tensor_checker", "disable_tensor_checker",
"compare_accuracy",
] ]
...@@ -424,6 +425,67 @@ def collect_operator_stats(): ...@@ -424,6 +425,67 @@ def collect_operator_stats():
disable_operator_stats_collection() disable_operator_stats_collection()
def compare_accuracy(
dump_path,
another_dump_path,
output_filename,
loss_scale=1,
dump_all_tensors=False,
):
r"""
This is a precision comparison tool that can be used to compare log data of float16 and float32.
Args:
dump_path(str): The path of the running log, such as the log for execution using the fp32 type.
another_dump_path(str): the path of another running log ,such as the log for execution using the fp16 type.
output_filename(str): the excel file nmae of compare output.
loss_scale(float): the loss_scale during the training phase.
dump_all_tensors(bool, optional): dump all tensor, It is currently not support. Default is False.
Examples:
.. code-block:: python
import paddle
from paddle.fluid import core
try:
import xlsxwriter as xlw
except ImportError:
import subprocess
subprocess.check_call(
['python', '-m', 'pip', 'install', 'xlsxwriter==3.0.9']
)
import xlsxwriter as xlw
if core.is_compiled_with_cuda():
paddle.set_flags(
{"FLAGS_check_nan_inf": 1, "FLAGS_check_nan_inf_level": 3}
)
path = "workerlog_log_dir"
paddle.fluid.core.set_nan_inf_debug_path(path)
x = paddle.to_tensor(
[2, 3, 4, 0], dtype="float32"
)
y = paddle.to_tensor(
[1, 5, 2, 0], dtype="float32"
)
z1 = x + y
out_excel = "compary_accuracy_out_excel.csv"
paddle.amp.debugging.compare_accuracy(
path, path, out_excel, loss_scale=1, dump_all_tensors=False
)
"""
assert dump_all_tensors is False, "It is currently not supported."
paddle.amp.accuracy_compare.compare_accuracy(
dump_path,
another_dump_path,
output_filename,
loss_scale,
dump_all_tensors=False,
)
def enable_tensor_checker(checker_config): def enable_tensor_checker(checker_config):
""" """
The enable_tensor_checker(checker_config) function enables model-level accuracy checking and is used in combination with disables_tensor_checker() to achieve model-level precision checking by checking the output Tensors of all operators within the specified range. The enable_tensor_checker(checker_config) function enables model-level accuracy checking and is used in combination with disables_tensor_checker() to achieve model-level precision checking by checking the output Tensors of all operators within the specified range.
......
...@@ -16,3 +16,4 @@ autograd==1.4 ...@@ -16,3 +16,4 @@ autograd==1.4
librosa==0.8.1 librosa==0.8.1
parameterized parameterized
wandb>=0.13 wandb>=0.13
xlsxwriter==3.0.9
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle
from paddle.fluid import core
@unittest.skipIf(
not core.is_compiled_with_cuda(), "not support cpu TestCompareAccuracyApi"
)
class TestCompareAccuracyApi(unittest.TestCase):
def calc(self, path, dtype):
paddle.fluid.core.set_nan_inf_debug_path(path)
x = paddle.to_tensor(
[2000, 3000, 4, 0], place=core.CUDAPlace(0), dtype=dtype
)
y = paddle.to_tensor(
[100, 500, 2, 10000], place=core.CUDAPlace(0), dtype=dtype
)
# normal
z1 = x + y
# inf
z2 = x * y
def test(self):
paddle.set_flags(
{"FLAGS_check_nan_inf": 1, "FLAGS_check_nan_inf_level": 3}
)
fp32_path = "workerlog_fp32_log_dir"
fp16_path = "workerlog_fp16_log_dir"
self.calc(fp32_path, "float32")
self.calc(fp16_path, "float16")
out_excel = "compary_accuracy_out_excel.csv"
paddle.amp.debugging.compare_accuracy(
fp32_path,
fp16_path,
out_excel,
loss_scale=1,
dump_all_tensors=False,
)
def test2(self):
fp32_path = "workerlog_fp32_log_dir"
fp16_path = "workerlog_fp16_null_log_dir"
self.calc(fp32_path, "float32")
out_excel = "compary_accuracy_out_excel_2.csv"
paddle.amp.debugging.compare_accuracy(
fp32_path,
fp16_path,
out_excel,
loss_scale=1,
dump_all_tensors=False,
)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册