未验证 提交 6197fbf6 编写于 作者: E enzodechine 提交者: GitHub

Re-implemented check_finite_and_unscale_op with newly added xdnn api (#42960)

* Re-implemented check_finite_and_unscale_op  with newly added xdnn api
*test=kunlun

* Re-implemented check_finite_and_unscale_op  with newly added xdnn api

*test=kunlun
上级 b07f469b
...@@ -15,9 +15,12 @@ limitations under the License. */ ...@@ -15,9 +15,12 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/amp/check_finite_and_unscale_op.h" #include "paddle/fluid/operators/amp/check_finite_and_unscale_op.h"
#include "paddle/fluid/operators/amp/fp16_type_traits.h" #include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T>
class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> { class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> {
using MPDType = typename details::MPTypeTrait<T>::Type; using MPDType = typename details::MPTypeTrait<T>::Type;
...@@ -38,6 +41,8 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> { ...@@ -38,6 +41,8 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> {
// cpy to cpu // cpy to cpu
bool cpu_found_inf_data = false; bool cpu_found_inf_data = false;
// number of inf and nans
int nums_inf_nans = 0;
MPDType cpu_scale_data; MPDType cpu_scale_data;
if (platform::is_xpu_place(scale->place())) { if (platform::is_xpu_place(scale->place())) {
memory::Copy(platform::CPUPlace(), static_cast<void*>(&cpu_scale_data), memory::Copy(platform::CPUPlace(), static_cast<void*>(&cpu_scale_data),
...@@ -52,48 +57,21 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> { ...@@ -52,48 +57,21 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> {
const auto* x = xs[i]; const auto* x = xs[i];
auto* out = outs[i]; auto* out = outs[i];
out->mutable_data<T>(dev_ctx.GetPlace()); out->mutable_data<T>(dev_ctx.GetPlace());
framework::Tensor is_finite = framework::Tensor inf_nan_count =
ctx.AllocateTmpTensor<bool, platform::XPUDeviceContext>(x->dims(), ctx.AllocateTmpTensor<int, platform::XPUDeviceContext>(
dev_ctx); found_inf->dims(), dev_ctx);
framework::Tensor is_nan =
ctx.AllocateTmpTensor<bool, platform::XPUDeviceContext>(x->dims(), if (nums_inf_nans == 0) {
dev_ctx); int r = xpu::count_nan_or_inf(
framework::Tensor is_finite_and_nan = dev_ctx.x_context(), reinterpret_cast<const XPUTyp*>(x->data<T>()),
ctx.AllocateTmpTensor<bool, platform::XPUDeviceContext>(x->dims(), inf_nan_count.data<int>(), x->numel());
dev_ctx); PADDLE_ENFORCE_XDNN_SUCCESS(r, "count_nan_or_inf");
if (cpu_found_inf_data == false) { memory::Copy(platform::CPUPlace(), &nums_inf_nans, dev_ctx.GetPlace(),
int r = xpu::isfinite(dev_ctx.x_context(), inf_nan_count.data<int>(), sizeof(int));
reinterpret_cast<const XPUTyp*>(x->data<T>()),
is_finite.data<bool>(), x->numel());
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External("XPU API(isfinite) return wrong "
"value[%d %s]",
r, XPUAPIErrorMsg[r]));
r = xpu::logical_not(
dev_ctx.x_context(),
reinterpret_cast<const bool*>(is_finite.data<bool>()),
is_finite.data<bool>(), x->numel());
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External("XPU API(logical_not) return wrong "
"value[%d %s]",
r, XPUAPIErrorMsg[r]));
r = xpu::any(dev_ctx.x_context(), is_finite.data<bool>(),
found_inf_data, x->numel());
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External("XPU API(any) return wrong "
"value[%d %s]",
r, XPUAPIErrorMsg[r]));
if (dev_ctx.x_context()->xpu_stream) {
dev_ctx.Wait();
}
memory::Copy(platform::CPUPlace(), &cpu_found_inf_data,
dev_ctx.GetPlace(), found_inf_data, sizeof(bool));
} }
if (cpu_found_inf_data) { if (nums_inf_nans > 0) {
cpu_found_inf_data = true;
inverse_scale = 0.0; inverse_scale = 0.0;
} }
...@@ -109,45 +87,25 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> { ...@@ -109,45 +87,25 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> {
int r = xpu::cast_v2(dev_ctx.x_context(), int r = xpu::cast_v2(dev_ctx.x_context(),
reinterpret_cast<const float16*>(x->data<T>()), reinterpret_cast<const float16*>(x->data<T>()),
float_x.data<MPDType>(), x->numel()); float_x.data<MPDType>(), x->numel());
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast_v2");
r, XPU_SUCCESS,
platform::errors::External("XPU API(cast_v2) return wrong "
"value[%d %s]",
r, XPUAPIErrorMsg[r]));
r = xpu::scale(dev_ctx.x_context(), float_x.data<MPDType>(), r = xpu::scale(dev_ctx.x_context(), float_x.data<MPDType>(),
float_out.data<MPDType>(), x->numel(), false, float_out.data<MPDType>(), x->numel(), false,
inverse_scale, 0.0); inverse_scale, 0.0);
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale");
r, XPU_SUCCESS,
platform::errors::External("XPU API(scale) return wrong "
"value[%d %s]",
r, XPUAPIErrorMsg[r]));
r = xpu::cast_v2(dev_ctx.x_context(), float_out.data<MPDType>(), r = xpu::cast_v2(dev_ctx.x_context(), float_out.data<MPDType>(),
reinterpret_cast<float16*>(out->data<T>()), reinterpret_cast<float16*>(out->data<T>()),
out->numel()); out->numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast_v2");
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External("XPU API(cast_v2) return wrong "
"value[%d %s]",
r, XPUAPIErrorMsg[r]));
} else { } else {
int r = xpu::scale(dev_ctx.x_context(), int r = xpu::scale(dev_ctx.x_context(),
reinterpret_cast<const XPUTyp*>(x->data<T>()), reinterpret_cast<const XPUTyp*>(x->data<T>()),
reinterpret_cast<XPUTyp*>(out->data<T>()), reinterpret_cast<XPUTyp*>(out->data<T>()),
x->numel(), false, inverse_scale, 0.0); x->numel(), false, inverse_scale, 0.0);
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale");
r, XPU_SUCCESS,
platform::errors::External("XPU API(scale) return wrong "
"value[%d %s]",
r, XPUAPIErrorMsg[r]));
} }
} }
if (dev_ctx.x_context()->xpu_stream) {
dev_ctx.Wait();
}
memory::Copy(dev_ctx.GetPlace(), found_inf_data, platform::CPUPlace(), memory::Copy(dev_ctx.GetPlace(), found_inf_data, platform::CPUPlace(),
&cpu_found_inf_data, sizeof(bool)); &cpu_found_inf_data, sizeof(bool));
} }
......
...@@ -19,84 +19,126 @@ import paddle ...@@ -19,84 +19,126 @@ import paddle
import unittest import unittest
import numpy as np import numpy as np
from op_test_xpu import XPUOpTest from op_test_xpu import XPUOpTest
from op_test import OpTest, skip_check_grad_ci from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper
import paddle.fluid as fluid
paddle.enable_static() paddle.enable_static()
class TestCheckFiniteAndUnscaleOp(XPUOpTest): class XPUTestCheckFiniteAndUnscaleOp(XPUOpTestWrapper):
def setUp(self): def __init__(self):
self.op_type = "check_finite_and_unscale" self.op_name = 'check_finite_and_unscale'
self.init_dtype() self.use_dynamic_create_class = False
x = np.random.random((1024, 1024)).astype(self.dtype)
scale = np.random.random((1)).astype(self.dtype) class TestCheckFiniteAndUnscaleOpNormal(XPUOpTest):
# self.attrs = {'stop_gradient': True}
self.inputs = {'X': [('x0', x)], 'Scale': scale} def setUp(self):
self.outputs = { self.op_type = "check_finite_and_unscale"
'FoundInfinite': np.array([0]), self.init_dtype()
'Out': [('out0', x / scale)], x = np.random.random((8, 8)).astype(self.dtype)
} scale = np.random.random((1)).astype(np.float32)
self.inputs = {'X': [('x0', x)], 'Scale': scale}
def init_dtype(self): self.outputs = {
self.dtype = np.float32 'FoundInfinite': np.array([0]),
'Out': [('out0', x / scale)],
def test_check_output(self): }
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0) def init_dtype(self):
self.check_output_with_place(place) self.dtype = self.in_type
def test_check_output(self):
# class TestCheckFiniteAndUnscaleOpWithNan(XPUOpTest): if paddle.is_compiled_with_xpu():
# def setUp(self): place = paddle.XPUPlace(0)
# self.op_type = "check_finite_and_unscale" self.check_output_with_place(place)
# self.init_dtype()
# x = np.random.random((1024, 1024)).astype(self.dtype) class TestCheckFiniteAndUnscaleOpWithNan(XPUOpTest):
# x[128][128] = np.nan
# print("x shape = ", x.shape) def setUp(self):
# print(x) self.op_type = "check_finite_and_unscale"
# scale = np.random.random((1)).astype(self.dtype) self.init_dtype()
x = np.random.random((256, 256)).astype(self.dtype)
# self.inputs = {'X': [('x0', x)], 'Scale': scale} idx1 = np.random.randint(255)
# self.outputs = { idx2 = np.random.randint(255)
# 'FoundInfinite': np.array([1]), x[idx1][idx2] = np.nan
# 'Out': [('out0', x)], x[idx2][idx1] = np.nan
# } scale = np.random.random((1)).astype(np.float32)
# def init_dtype(self): self.inputs = {'X': [('x0', x)], 'Scale': scale}
# self.dtype = np.float32 self.outputs = {
'FoundInfinite': np.array([1]),
# def test_check_output(self): 'Out': [('out0', x)],
# # When input contains nan, do not check the output, }
# # since the output may be nondeterministic and will be discarded.
# if paddle.is_compiled_with_xpu(): def init_dtype(self):
# place = paddle.XPUPlace(0) self.dtype = self.in_type
# self.check_output_with_place(place, no_check_set=['Out'])
def test_check_output(self):
# class TestCheckFiniteAndUnscaleOpWithInf(XPUOpTest): # When input contains nan, do not check the output,
# def setUp(self): # since the output may be nondeterministic and will be discarded.
# self.op_type = "check_finite_and_unscale" if paddle.is_compiled_with_xpu():
# self.init_dtype() place = paddle.XPUPlace(0)
# x = np.random.random((1024, 1024)).astype(self.dtype) self.check_output_with_place(place, no_check_set=['Out'])
# x[128][128] = np.inf
# scale = np.random.random((1)).astype(self.dtype) class TestCheckFiniteAndUnscaleOpWithInf(XPUOpTest):
# self.inputs = {'X': [('x0', x)], 'Scale': scale} def setUp(self):
# self.outputs = { self.op_type = "check_finite_and_unscale"
# 'FoundInfinite': np.array([1]), self.init_dtype()
# 'Out': [('out0', x)], x = np.random.random((256, 256)).astype(self.dtype)
# } idx1 = np.random.randint(255)
idx2 = np.random.randint(255)
# def init_dtype(self): x[idx1][idx2] = np.nan
# self.dtype = np.float32 x[idx2][idx1] = np.nan
scale = np.random.random((1)).astype(np.float32)
# def test_check_output(self): myscale = np.array([0.05]).astype(self.dtype)
# # When input contains inf, do not check the output, self.inputs = {'X': [('x0', x)], 'Scale': scale}
# # since the output may be nondeterministic and will be discarded. self.outputs = {
# if paddle.is_compiled_with_xpu(): 'FoundInfinite': np.array([1]),
# place = paddle.XPUPlace(0) 'Out': [('out0', x)],
# self.check_output_with_place(place, no_check_set=['Out']) }
def init_dtype(self):
self.dtype = self.in_type
def test_check_output(self):
# When input contains inf, do not check the output,
# since the output may be nondeterministic and will be discarded.
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place, no_check_set=['Out'])
class TestCheckFiniteAndUnscaleOpWithInfAndNan(XPUOpTest):
def setUp(self):
self.op_type = "check_finite_and_unscale"
self.init_dtype()
x = np.random.random((256, 256)).astype(self.dtype)
idx1 = np.random.randint(255)
idx2 = np.random.randint(255)
x[idx1][idx2] = np.inf
x[idx2][idx1] = np.nan
scale = np.random.random((1)).astype(np.float32)
myscale = np.array([0.05]).astype(self.dtype)
self.inputs = {'X': [('x0', x)], 'Scale': scale}
self.outputs = {
'FoundInfinite': np.array([1]),
'Out': [('out0', x)],
}
def init_dtype(self):
self.dtype = self.in_type
def test_check_output(self):
# When input contains inf, do not check the output,
# since the output may be nondeterministic and will be discarded.
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place, no_check_set=['Out'])
support_types = get_xpu_op_support_types('check_finite_and_unscale')
for stype in support_types:
create_test_class(globals(), XPUTestCheckFiniteAndUnscaleOp, stype)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册