# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import subprocess import sys import unittest import numpy as np import paddle class TestNanInf(unittest.TestCase): def setUp(self): self._python_interp = sys.executable if os.getenv('WITH_COVERAGE', 'OFF') == 'ON': self._python_interp += " -m coverage run --branch -p" self.env = os.environ.copy() def check_nan_inf(self): cmd = self._python_interp proc = subprocess.Popen( cmd.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=self.env, ) out, err = proc.communicate() returncode = proc.returncode print(out) print(err) # in python3, type(out+err) is 'bytes', need use encode assert (out + err).find(b'There are NAN or INF') != -1 def test_nan_inf_in_static_mode(self): self._python_interp += " check_nan_inf_base.py" self.check_nan_inf() def test_nan_inf_in_dynamic_mode(self): self._python_interp += " check_nan_inf_base_dygraph.py" self.check_nan_inf() class TestNanInfEnv(TestNanInf): def setUp(self): super().setUp() # windows python have some bug with env, so need use str to pass ci # otherwise, "TypeError: environment can only contain strings" self.env["PADDLE_INF_NAN_SKIP_OP"] = "mul" self.env["PADDLE_INF_NAN_SKIP_ROLE"] = "loss" self.env["PADDLE_INF_NAN_SKIP_VAR"] = "elementwise_add:fc_0.tmp_1" class TestCheckSkipEnv(TestNanInf): def setUp(self): super().setUp() # windows python have some bug with env, so need use str to pass ci # otherwise, "TypeError: environment can only contain strings" self.env["Paddle_check_nan_inf_op_list"] = "mean" self.env["Paddle_skip_nan_inf_op_list"] = "elementwise_add" class TestNanInfCheckResult(unittest.TestCase): def setUp(self): self._python_interp = sys.executable if os.getenv('WITH_COVERAGE', 'OFF') == 'ON': self._python_interp += " -m coverage run --branch -p" self.env = os.environ.copy() def generate_inputs(self, shape, dtype="float32"): data = np.random.random(size=shape).astype(dtype) # [-10, 10) x = (data * 20 - 10) * np.random.randint( low=0, high=2, size=shape ).astype(dtype) y = np.random.randint(low=0, high=2, size=shape).astype(dtype) return x, y def get_reference_num_nan_inf(self, x): out = np.log(x) num_nan = np.sum(np.isnan(out)) num_inf = np.sum(np.isinf(out)) print(f"[reference] num_nan={num_nan}, num_inf={num_inf}") return num_nan, num_inf def get_num_nan_inf(self, x_np, use_cuda=True, add_assert=False): num_nan = 0 num_inf = 0 try: if use_cuda: paddle.device.set_device("gpu:0") else: paddle.device.set_device("cpu") x = paddle.to_tensor(x_np) out = paddle.log(x) sys.stdout.flush() if add_assert: raise AssertionError() except Exception as e: # Cannot catch the log in CUDA kernel. err_str_list = ( str(e) .replace("(", " ") .replace(")", " ") .replace(",", " ") .split(" ") ) for err_str in err_str_list: if "num_nan" in err_str: num_nan = int(err_str.split("=")[1]) elif "num_inf" in err_str: num_inf = int(err_str.split("=")[1]) print(f"[paddle] num_nan={num_nan}, num_inf={num_inf}") return num_nan, num_inf def test_num_nan_inf(self): def _check_num_nan_inf(use_cuda): shape = [32, 32] x_np, _ = self.generate_inputs(shape) num_nan_np, num_inf_np = self.get_reference_num_nan_inf(x_np) add_assert = (num_nan_np + num_inf_np) > 0 num_nan, num_inf = self.get_num_nan_inf(x_np, use_cuda, add_assert) if not use_cuda: assert num_nan == num_nan_np and num_inf == num_inf_np paddle.set_flags( {"FLAGS_check_nan_inf": 1, "FLAGS_check_nan_inf_level": 0} ) _check_num_nan_inf(use_cuda=False) if paddle.fluid.core.is_compiled_with_cuda(): _check_num_nan_inf(use_cuda=True) def test_check_stack(self): self._python_interp += " check_nan_inf_backward_stack.py" cmd = self._python_interp proc = subprocess.Popen( cmd.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=self.env, ) out, err = proc.communicate() returncode = proc.returncode print(out) print(err) # in python3, type(out+err) is 'bytes', need use encode assert (out + err).find(b' z = paddle.pow(x, y)') != -1 def check_nan_inf_level(self, use_cuda, dtype): shape = [8, 8] x_np, y_np = self.generate_inputs(shape, dtype) if use_cuda: paddle.device.set_device("gpu:0") else: paddle.device.set_device("cpu") x = paddle.to_tensor(x_np) y = paddle.to_tensor(y_np) out = paddle.log(x * 1e6) / y def test_check_nan_inf_level_float32(self): paddle.set_flags( {"FLAGS_check_nan_inf": 1, "FLAGS_check_nan_inf_level": 2} ) self.check_nan_inf_level(use_cuda=False, dtype="float32") if paddle.fluid.core.is_compiled_with_cuda(): self.check_nan_inf_level(use_cuda=True, dtype="float32") def test_check_nan_inf_level_float16(self): paddle.set_flags( {"FLAGS_check_nan_inf": 1, "FLAGS_check_nan_inf_level": 3} ) if paddle.fluid.core.is_compiled_with_cuda(): self.check_nan_inf_level(use_cuda=True, dtype="float16") def test_check_numerics(self): paddle.set_flags( {"FLAGS_check_nan_inf": 1, "FLAGS_check_nan_inf_level": 3} ) if paddle.fluid.core.is_compiled_with_cuda(): self.check_nan_inf_level(use_cuda=True, dtype="float16") shape = [8, 8] x_np, y_np = self.generate_inputs(shape, "float16") x = paddle.to_tensor(x_np) y = paddle.to_tensor(y_np) paddle.fluid.core.check_numerics("check_numerics", x) paddle.fluid.core.check_numerics("check_numerics", y) if __name__ == '__main__': unittest.main()