test_nan_inf.py 7.1 KB
Newer Older
W
WangXi 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import subprocess
17 18 19
import sys
import unittest

20
import numpy as np
P
pangyoki 已提交
21

22
import paddle
W
WangXi 已提交
23 24 25 26 27 28 29 30 31 32


class TestNanInf(unittest.TestCase):
    def setUp(self):
        self._python_interp = sys.executable
        if os.getenv('WITH_COVERAGE', 'OFF') == 'ON':
            self._python_interp += " -m coverage run --branch -p"

        self.env = os.environ.copy()

33
    def check_nan_inf(self):
W
WangXi 已提交
34 35
        cmd = self._python_interp

36 37 38 39 40 41
        proc = subprocess.Popen(
            cmd.split(" "),
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            env=self.env,
        )
W
WangXi 已提交
42 43 44 45 46 47 48 49

        out, err = proc.communicate()
        returncode = proc.returncode

        print(out)
        print(err)

        # in python3, type(out+err) is 'bytes', need use encode
50
        assert (out + err).find(b'There are NAN or INF') != -1
W
WangXi 已提交
51

52 53 54 55 56 57 58 59
    def test_nan_inf_in_static_mode(self):
        self._python_interp += " check_nan_inf_base.py"
        self.check_nan_inf()

    def test_nan_inf_in_dynamic_mode(self):
        self._python_interp += " check_nan_inf_base_dygraph.py"
        self.check_nan_inf()

W
WangXi 已提交
60 61 62

class TestNanInfEnv(TestNanInf):
    def setUp(self):
63
        super().setUp()
W
WangXi 已提交
64 65
        # windows python have some bug with env, so need use str to pass ci
        # otherwise, "TypeError: environment can only contain strings"
66 67 68
        self.env["PADDLE_INF_NAN_SKIP_OP"] = "mul"
        self.env["PADDLE_INF_NAN_SKIP_ROLE"] = "loss"
        self.env["PADDLE_INF_NAN_SKIP_VAR"] = "elementwise_add:fc_0.tmp_1"
W
WangXi 已提交
69 70


71 72 73 74 75 76 77 78 79
class TestCheckSkipEnv(TestNanInf):
    def setUp(self):
        super().setUp()
        # windows python have some bug with env, so need use str to pass ci
        # otherwise, "TypeError: environment can only contain strings"
        self.env["Paddle_check_nan_inf_op_list"] = "mean"
        self.env["Paddle_skip_nan_inf_op_list"] = "elementwise_add"


80
class TestNanInfCheckResult(unittest.TestCase):
81 82 83 84 85 86 87
    def setUp(self):
        self._python_interp = sys.executable
        if os.getenv('WITH_COVERAGE', 'OFF') == 'ON':
            self._python_interp += " -m coverage run --branch -p"

        self.env = os.environ.copy()

88 89 90 91 92 93 94 95 96 97 98 99 100
    def generate_inputs(self, shape, dtype="float32"):
        data = np.random.random(size=shape).astype(dtype)
        # [-10, 10)
        x = (data * 20 - 10) * np.random.randint(
            low=0, high=2, size=shape
        ).astype(dtype)
        y = np.random.randint(low=0, high=2, size=shape).astype(dtype)
        return x, y

    def get_reference_num_nan_inf(self, x):
        out = np.log(x)
        num_nan = np.sum(np.isnan(out))
        num_inf = np.sum(np.isinf(out))
101
        print(f"[reference] num_nan={num_nan}, num_inf={num_inf}")
102 103 104 105 106 107 108 109 110 111 112 113 114 115
        return num_nan, num_inf

    def get_num_nan_inf(self, x_np, use_cuda=True, add_assert=False):
        num_nan = 0
        num_inf = 0
        try:
            if use_cuda:
                paddle.device.set_device("gpu:0")
            else:
                paddle.device.set_device("cpu")
            x = paddle.to_tensor(x_np)
            out = paddle.log(x)
            sys.stdout.flush()
            if add_assert:
116
                raise AssertionError()
117 118 119 120 121 122 123 124 125 126 127 128 129 130
        except Exception as e:
            # Cannot catch the log in CUDA kernel.
            err_str_list = (
                str(e)
                .replace("(", " ")
                .replace(")", " ")
                .replace(",", " ")
                .split(" ")
            )
            for err_str in err_str_list:
                if "num_nan" in err_str:
                    num_nan = int(err_str.split("=")[1])
                elif "num_inf" in err_str:
                    num_inf = int(err_str.split("=")[1])
131
            print(f"[paddle] num_nan={num_nan}, num_inf={num_inf}")
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
        return num_nan, num_inf

    def test_num_nan_inf(self):
        def _check_num_nan_inf(use_cuda):
            shape = [32, 32]
            x_np, _ = self.generate_inputs(shape)
            num_nan_np, num_inf_np = self.get_reference_num_nan_inf(x_np)
            add_assert = (num_nan_np + num_inf_np) > 0
            num_nan, num_inf = self.get_num_nan_inf(x_np, use_cuda, add_assert)
            if not use_cuda:
                assert num_nan == num_nan_np and num_inf == num_inf_np

        paddle.set_flags(
            {"FLAGS_check_nan_inf": 1, "FLAGS_check_nan_inf_level": 0}
        )
        _check_num_nan_inf(use_cuda=False)
        if paddle.fluid.core.is_compiled_with_cuda():
            _check_num_nan_inf(use_cuda=True)

151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
    def test_check_stack(self):
        self._python_interp += " check_nan_inf_backward_stack.py"
        cmd = self._python_interp
        proc = subprocess.Popen(
            cmd.split(" "),
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            env=self.env,
        )

        out, err = proc.communicate()
        returncode = proc.returncode

        print(out)
        print(err)

        # in python3, type(out+err) is 'bytes', need use encode
        assert (out + err).find(b' z = paddle.pow(x, y)') != -1

170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
    def check_nan_inf_level(self, use_cuda, dtype):
        shape = [8, 8]
        x_np, y_np = self.generate_inputs(shape, dtype)

        if use_cuda:
            paddle.device.set_device("gpu:0")
        else:
            paddle.device.set_device("cpu")
        x = paddle.to_tensor(x_np)
        y = paddle.to_tensor(y_np)
        out = paddle.log(x * 1e6) / y

    def test_check_nan_inf_level_float32(self):
        paddle.set_flags(
            {"FLAGS_check_nan_inf": 1, "FLAGS_check_nan_inf_level": 2}
        )
        self.check_nan_inf_level(use_cuda=False, dtype="float32")
        if paddle.fluid.core.is_compiled_with_cuda():
            self.check_nan_inf_level(use_cuda=True, dtype="float32")

    def test_check_nan_inf_level_float16(self):
        paddle.set_flags(
            {"FLAGS_check_nan_inf": 1, "FLAGS_check_nan_inf_level": 3}
        )
        if paddle.fluid.core.is_compiled_with_cuda():
            self.check_nan_inf_level(use_cuda=True, dtype="float16")

197 198 199 200 201 202 203 204 205 206 207 208 209 210
    def test_check_numerics(self):
        paddle.set_flags(
            {"FLAGS_check_nan_inf": 1, "FLAGS_check_nan_inf_level": 3}
        )
        if paddle.fluid.core.is_compiled_with_cuda():
            self.check_nan_inf_level(use_cuda=True, dtype="float16")

        shape = [8, 8]
        x_np, y_np = self.generate_inputs(shape, "float16")
        x = paddle.to_tensor(x_np)
        y = paddle.to_tensor(y_np)
        paddle.fluid.core.check_numerics("check_numerics", x)
        paddle.fluid.core.check_numerics("check_numerics", y)

211

W
WangXi 已提交
212 213
if __name__ == '__main__':
    unittest.main()