diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc b/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc index 1d3e5e5162ca9d3b23d4164b6d994a3ae141d5cb..8bf1398f607c80421a1e0e4fc70b1596d29f9d2e 100644 --- a/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc +++ b/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc @@ -15,9 +15,12 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include "paddle/fluid/operators/amp/check_finite_and_unscale_op.h" #include "paddle/fluid/operators/amp/fp16_type_traits.h" +#include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/platform/float16.h" + namespace paddle { namespace operators { + template class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel { using MPDType = typename details::MPTypeTrait::Type; @@ -38,6 +41,8 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel { // cpy to cpu bool cpu_found_inf_data = false; + // number of inf and nans + int nums_inf_nans = 0; MPDType cpu_scale_data; if (platform::is_xpu_place(scale->place())) { memory::Copy(platform::CPUPlace(), static_cast(&cpu_scale_data), @@ -52,48 +57,21 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel { const auto* x = xs[i]; auto* out = outs[i]; out->mutable_data(dev_ctx.GetPlace()); - framework::Tensor is_finite = - ctx.AllocateTmpTensor(x->dims(), - dev_ctx); - framework::Tensor is_nan = - ctx.AllocateTmpTensor(x->dims(), - dev_ctx); - framework::Tensor is_finite_and_nan = - ctx.AllocateTmpTensor(x->dims(), - dev_ctx); - if (cpu_found_inf_data == false) { - int r = xpu::isfinite(dev_ctx.x_context(), - reinterpret_cast(x->data()), - is_finite.data(), x->numel()); - PADDLE_ENFORCE_EQ( - r, XPU_SUCCESS, - platform::errors::External("XPU API(isfinite) return wrong " - "value[%d %s]", - r, XPUAPIErrorMsg[r])); - r = xpu::logical_not( - dev_ctx.x_context(), - reinterpret_cast(is_finite.data()), - is_finite.data(), x->numel()); - PADDLE_ENFORCE_EQ( - r, XPU_SUCCESS, - platform::errors::External("XPU API(logical_not) return wrong " - "value[%d %s]", - r, XPUAPIErrorMsg[r])); - r = xpu::any(dev_ctx.x_context(), is_finite.data(), - found_inf_data, x->numel()); - PADDLE_ENFORCE_EQ( - r, XPU_SUCCESS, - platform::errors::External("XPU API(any) return wrong " - "value[%d %s]", - r, XPUAPIErrorMsg[r])); - if (dev_ctx.x_context()->xpu_stream) { - dev_ctx.Wait(); - } - memory::Copy(platform::CPUPlace(), &cpu_found_inf_data, - dev_ctx.GetPlace(), found_inf_data, sizeof(bool)); + framework::Tensor inf_nan_count = + ctx.AllocateTmpTensor( + found_inf->dims(), dev_ctx); + + if (nums_inf_nans == 0) { + int r = xpu::count_nan_or_inf( + dev_ctx.x_context(), reinterpret_cast(x->data()), + inf_nan_count.data(), x->numel()); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "count_nan_or_inf"); + memory::Copy(platform::CPUPlace(), &nums_inf_nans, dev_ctx.GetPlace(), + inf_nan_count.data(), sizeof(int)); } - if (cpu_found_inf_data) { + if (nums_inf_nans > 0) { + cpu_found_inf_data = true; inverse_scale = 0.0; } @@ -109,45 +87,25 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel { int r = xpu::cast_v2(dev_ctx.x_context(), reinterpret_cast(x->data()), float_x.data(), x->numel()); - PADDLE_ENFORCE_EQ( - r, XPU_SUCCESS, - platform::errors::External("XPU API(cast_v2) return wrong " - "value[%d %s]", - r, XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast_v2"); r = xpu::scale(dev_ctx.x_context(), float_x.data(), float_out.data(), x->numel(), false, inverse_scale, 0.0); - PADDLE_ENFORCE_EQ( - r, XPU_SUCCESS, - platform::errors::External("XPU API(scale) return wrong " - "value[%d %s]", - r, XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale"); r = xpu::cast_v2(dev_ctx.x_context(), float_out.data(), reinterpret_cast(out->data()), out->numel()); - - PADDLE_ENFORCE_EQ( - r, XPU_SUCCESS, - platform::errors::External("XPU API(cast_v2) return wrong " - "value[%d %s]", - r, XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast_v2"); } else { int r = xpu::scale(dev_ctx.x_context(), reinterpret_cast(x->data()), reinterpret_cast(out->data()), x->numel(), false, inverse_scale, 0.0); - PADDLE_ENFORCE_EQ( - r, XPU_SUCCESS, - platform::errors::External("XPU API(scale) return wrong " - "value[%d %s]", - r, XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale"); } } - if (dev_ctx.x_context()->xpu_stream) { - dev_ctx.Wait(); - } memory::Copy(dev_ctx.GetPlace(), found_inf_data, platform::CPUPlace(), &cpu_found_inf_data, sizeof(bool)); } diff --git a/python/paddle/fluid/tests/unittests/xpu/test_amp_check_finite_and_scale_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_amp_check_finite_and_scale_op_xpu.py index 3ef4701cdf3d081986aaa7648e8b0c10aafca7e9..e6bc61b895abbe8506352195979db42d9448c4fc 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_amp_check_finite_and_scale_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_amp_check_finite_and_scale_op_xpu.py @@ -19,84 +19,126 @@ import paddle import unittest import numpy as np from op_test_xpu import XPUOpTest -from op_test import OpTest, skip_check_grad_ci -import paddle.fluid as fluid +from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper paddle.enable_static() -class TestCheckFiniteAndUnscaleOp(XPUOpTest): - - def setUp(self): - self.op_type = "check_finite_and_unscale" - self.init_dtype() - x = np.random.random((1024, 1024)).astype(self.dtype) - scale = np.random.random((1)).astype(self.dtype) - # self.attrs = {'stop_gradient': True} - self.inputs = {'X': [('x0', x)], 'Scale': scale} - self.outputs = { - 'FoundInfinite': np.array([0]), - 'Out': [('out0', x / scale)], - } - - def init_dtype(self): - self.dtype = np.float32 - - def test_check_output(self): - if paddle.is_compiled_with_xpu(): - place = paddle.XPUPlace(0) - self.check_output_with_place(place) - - -# class TestCheckFiniteAndUnscaleOpWithNan(XPUOpTest): -# def setUp(self): -# self.op_type = "check_finite_and_unscale" -# self.init_dtype() -# x = np.random.random((1024, 1024)).astype(self.dtype) -# x[128][128] = np.nan -# print("x shape = ", x.shape) -# print(x) -# scale = np.random.random((1)).astype(self.dtype) - -# self.inputs = {'X': [('x0', x)], 'Scale': scale} -# self.outputs = { -# 'FoundInfinite': np.array([1]), -# 'Out': [('out0', x)], -# } - -# def init_dtype(self): -# self.dtype = np.float32 - -# def test_check_output(self): -# # When input contains nan, do not check the output, -# # since the output may be nondeterministic and will be discarded. -# if paddle.is_compiled_with_xpu(): -# place = paddle.XPUPlace(0) -# self.check_output_with_place(place, no_check_set=['Out']) - -# class TestCheckFiniteAndUnscaleOpWithInf(XPUOpTest): -# def setUp(self): -# self.op_type = "check_finite_and_unscale" -# self.init_dtype() -# x = np.random.random((1024, 1024)).astype(self.dtype) -# x[128][128] = np.inf -# scale = np.random.random((1)).astype(self.dtype) - -# self.inputs = {'X': [('x0', x)], 'Scale': scale} -# self.outputs = { -# 'FoundInfinite': np.array([1]), -# 'Out': [('out0', x)], -# } - -# def init_dtype(self): -# self.dtype = np.float32 - -# def test_check_output(self): -# # When input contains inf, do not check the output, -# # since the output may be nondeterministic and will be discarded. -# if paddle.is_compiled_with_xpu(): -# place = paddle.XPUPlace(0) -# self.check_output_with_place(place, no_check_set=['Out']) +class XPUTestCheckFiniteAndUnscaleOp(XPUOpTestWrapper): + + def __init__(self): + self.op_name = 'check_finite_and_unscale' + self.use_dynamic_create_class = False + + class TestCheckFiniteAndUnscaleOpNormal(XPUOpTest): + + def setUp(self): + self.op_type = "check_finite_and_unscale" + self.init_dtype() + x = np.random.random((8, 8)).astype(self.dtype) + scale = np.random.random((1)).astype(np.float32) + self.inputs = {'X': [('x0', x)], 'Scale': scale} + self.outputs = { + 'FoundInfinite': np.array([0]), + 'Out': [('out0', x / scale)], + } + + def init_dtype(self): + self.dtype = self.in_type + + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + class TestCheckFiniteAndUnscaleOpWithNan(XPUOpTest): + + def setUp(self): + self.op_type = "check_finite_and_unscale" + self.init_dtype() + x = np.random.random((256, 256)).astype(self.dtype) + idx1 = np.random.randint(255) + idx2 = np.random.randint(255) + x[idx1][idx2] = np.nan + x[idx2][idx1] = np.nan + scale = np.random.random((1)).astype(np.float32) + + self.inputs = {'X': [('x0', x)], 'Scale': scale} + self.outputs = { + 'FoundInfinite': np.array([1]), + 'Out': [('out0', x)], + } + + def init_dtype(self): + self.dtype = self.in_type + + def test_check_output(self): + # When input contains nan, do not check the output, + # since the output may be nondeterministic and will be discarded. + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place, no_check_set=['Out']) + + class TestCheckFiniteAndUnscaleOpWithInf(XPUOpTest): + + def setUp(self): + self.op_type = "check_finite_and_unscale" + self.init_dtype() + x = np.random.random((256, 256)).astype(self.dtype) + idx1 = np.random.randint(255) + idx2 = np.random.randint(255) + x[idx1][idx2] = np.nan + x[idx2][idx1] = np.nan + scale = np.random.random((1)).astype(np.float32) + myscale = np.array([0.05]).astype(self.dtype) + self.inputs = {'X': [('x0', x)], 'Scale': scale} + self.outputs = { + 'FoundInfinite': np.array([1]), + 'Out': [('out0', x)], + } + + def init_dtype(self): + self.dtype = self.in_type + + def test_check_output(self): + # When input contains inf, do not check the output, + # since the output may be nondeterministic and will be discarded. + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place, no_check_set=['Out']) + + class TestCheckFiniteAndUnscaleOpWithInfAndNan(XPUOpTest): + + def setUp(self): + self.op_type = "check_finite_and_unscale" + self.init_dtype() + x = np.random.random((256, 256)).astype(self.dtype) + idx1 = np.random.randint(255) + idx2 = np.random.randint(255) + x[idx1][idx2] = np.inf + x[idx2][idx1] = np.nan + scale = np.random.random((1)).astype(np.float32) + myscale = np.array([0.05]).astype(self.dtype) + self.inputs = {'X': [('x0', x)], 'Scale': scale} + self.outputs = { + 'FoundInfinite': np.array([1]), + 'Out': [('out0', x)], + } + + def init_dtype(self): + self.dtype = self.in_type + + def test_check_output(self): + # When input contains inf, do not check the output, + # since the output may be nondeterministic and will be discarded. + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place, no_check_set=['Out']) + + +support_types = get_xpu_op_support_types('check_finite_and_unscale') +for stype in support_types: + create_test_class(globals(), XPUTestCheckFiniteAndUnscaleOp, stype) if __name__ == '__main__': unittest.main()