未验证 提交 6197fbf6 编写于 作者: E enzodechine 提交者: GitHub

Re-implemented check_finite_and_unscale_op with newly added xdnn api (#42960)

* Re-implemented check_finite_and_unscale_op  with newly added xdnn api
*test=kunlun

* Re-implemented check_finite_and_unscale_op  with newly added xdnn api

*test=kunlun
上级 b07f469b
......@@ -15,9 +15,12 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/amp/check_finite_and_unscale_op.h"
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle {
namespace operators {
template <typename T>
class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> {
using MPDType = typename details::MPTypeTrait<T>::Type;
......@@ -38,6 +41,8 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> {
// cpy to cpu
bool cpu_found_inf_data = false;
// number of inf and nans
int nums_inf_nans = 0;
MPDType cpu_scale_data;
if (platform::is_xpu_place(scale->place())) {
memory::Copy(platform::CPUPlace(), static_cast<void*>(&cpu_scale_data),
......@@ -52,48 +57,21 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> {
const auto* x = xs[i];
auto* out = outs[i];
out->mutable_data<T>(dev_ctx.GetPlace());
framework::Tensor is_finite =
ctx.AllocateTmpTensor<bool, platform::XPUDeviceContext>(x->dims(),
dev_ctx);
framework::Tensor is_nan =
ctx.AllocateTmpTensor<bool, platform::XPUDeviceContext>(x->dims(),
dev_ctx);
framework::Tensor is_finite_and_nan =
ctx.AllocateTmpTensor<bool, platform::XPUDeviceContext>(x->dims(),
dev_ctx);
if (cpu_found_inf_data == false) {
int r = xpu::isfinite(dev_ctx.x_context(),
reinterpret_cast<const XPUTyp*>(x->data<T>()),
is_finite.data<bool>(), x->numel());
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External("XPU API(isfinite) return wrong "
"value[%d %s]",
r, XPUAPIErrorMsg[r]));
r = xpu::logical_not(
dev_ctx.x_context(),
reinterpret_cast<const bool*>(is_finite.data<bool>()),
is_finite.data<bool>(), x->numel());
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External("XPU API(logical_not) return wrong "
"value[%d %s]",
r, XPUAPIErrorMsg[r]));
r = xpu::any(dev_ctx.x_context(), is_finite.data<bool>(),
found_inf_data, x->numel());
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External("XPU API(any) return wrong "
"value[%d %s]",
r, XPUAPIErrorMsg[r]));
if (dev_ctx.x_context()->xpu_stream) {
dev_ctx.Wait();
}
memory::Copy(platform::CPUPlace(), &cpu_found_inf_data,
dev_ctx.GetPlace(), found_inf_data, sizeof(bool));
framework::Tensor inf_nan_count =
ctx.AllocateTmpTensor<int, platform::XPUDeviceContext>(
found_inf->dims(), dev_ctx);
if (nums_inf_nans == 0) {
int r = xpu::count_nan_or_inf(
dev_ctx.x_context(), reinterpret_cast<const XPUTyp*>(x->data<T>()),
inf_nan_count.data<int>(), x->numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "count_nan_or_inf");
memory::Copy(platform::CPUPlace(), &nums_inf_nans, dev_ctx.GetPlace(),
inf_nan_count.data<int>(), sizeof(int));
}
if (cpu_found_inf_data) {
if (nums_inf_nans > 0) {
cpu_found_inf_data = true;
inverse_scale = 0.0;
}
......@@ -109,45 +87,25 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> {
int r = xpu::cast_v2(dev_ctx.x_context(),
reinterpret_cast<const float16*>(x->data<T>()),
float_x.data<MPDType>(), x->numel());
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External("XPU API(cast_v2) return wrong "
"value[%d %s]",
r, XPUAPIErrorMsg[r]));
PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast_v2");
r = xpu::scale(dev_ctx.x_context(), float_x.data<MPDType>(),
float_out.data<MPDType>(), x->numel(), false,
inverse_scale, 0.0);
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External("XPU API(scale) return wrong "
"value[%d %s]",
r, XPUAPIErrorMsg[r]));
PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale");
r = xpu::cast_v2(dev_ctx.x_context(), float_out.data<MPDType>(),
reinterpret_cast<float16*>(out->data<T>()),
out->numel());
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External("XPU API(cast_v2) return wrong "
"value[%d %s]",
r, XPUAPIErrorMsg[r]));
PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast_v2");
} else {
int r = xpu::scale(dev_ctx.x_context(),
reinterpret_cast<const XPUTyp*>(x->data<T>()),
reinterpret_cast<XPUTyp*>(out->data<T>()),
x->numel(), false, inverse_scale, 0.0);
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External("XPU API(scale) return wrong "
"value[%d %s]",
r, XPUAPIErrorMsg[r]));
PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale");
}
}
if (dev_ctx.x_context()->xpu_stream) {
dev_ctx.Wait();
}
memory::Copy(dev_ctx.GetPlace(), found_inf_data, platform::CPUPlace(),
&cpu_found_inf_data, sizeof(bool));
}
......
......@@ -19,84 +19,126 @@ import paddle
import unittest
import numpy as np
from op_test_xpu import XPUOpTest
from op_test import OpTest, skip_check_grad_ci
import paddle.fluid as fluid
from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper
paddle.enable_static()
class TestCheckFiniteAndUnscaleOp(XPUOpTest):
def setUp(self):
self.op_type = "check_finite_and_unscale"
self.init_dtype()
x = np.random.random((1024, 1024)).astype(self.dtype)
scale = np.random.random((1)).astype(self.dtype)
# self.attrs = {'stop_gradient': True}
self.inputs = {'X': [('x0', x)], 'Scale': scale}
self.outputs = {
'FoundInfinite': np.array([0]),
'Out': [('out0', x / scale)],
}
def init_dtype(self):
self.dtype = np.float32
def test_check_output(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
# class TestCheckFiniteAndUnscaleOpWithNan(XPUOpTest):
# def setUp(self):
# self.op_type = "check_finite_and_unscale"
# self.init_dtype()
# x = np.random.random((1024, 1024)).astype(self.dtype)
# x[128][128] = np.nan
# print("x shape = ", x.shape)
# print(x)
# scale = np.random.random((1)).astype(self.dtype)
# self.inputs = {'X': [('x0', x)], 'Scale': scale}
# self.outputs = {
# 'FoundInfinite': np.array([1]),
# 'Out': [('out0', x)],
# }
# def init_dtype(self):
# self.dtype = np.float32
# def test_check_output(self):
# # When input contains nan, do not check the output,
# # since the output may be nondeterministic and will be discarded.
# if paddle.is_compiled_with_xpu():
# place = paddle.XPUPlace(0)
# self.check_output_with_place(place, no_check_set=['Out'])
# class TestCheckFiniteAndUnscaleOpWithInf(XPUOpTest):
# def setUp(self):
# self.op_type = "check_finite_and_unscale"
# self.init_dtype()
# x = np.random.random((1024, 1024)).astype(self.dtype)
# x[128][128] = np.inf
# scale = np.random.random((1)).astype(self.dtype)
# self.inputs = {'X': [('x0', x)], 'Scale': scale}
# self.outputs = {
# 'FoundInfinite': np.array([1]),
# 'Out': [('out0', x)],
# }
# def init_dtype(self):
# self.dtype = np.float32
# def test_check_output(self):
# # When input contains inf, do not check the output,
# # since the output may be nondeterministic and will be discarded.
# if paddle.is_compiled_with_xpu():
# place = paddle.XPUPlace(0)
# self.check_output_with_place(place, no_check_set=['Out'])
class XPUTestCheckFiniteAndUnscaleOp(XPUOpTestWrapper):
def __init__(self):
self.op_name = 'check_finite_and_unscale'
self.use_dynamic_create_class = False
class TestCheckFiniteAndUnscaleOpNormal(XPUOpTest):
def setUp(self):
self.op_type = "check_finite_and_unscale"
self.init_dtype()
x = np.random.random((8, 8)).astype(self.dtype)
scale = np.random.random((1)).astype(np.float32)
self.inputs = {'X': [('x0', x)], 'Scale': scale}
self.outputs = {
'FoundInfinite': np.array([0]),
'Out': [('out0', x / scale)],
}
def init_dtype(self):
self.dtype = self.in_type
def test_check_output(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
class TestCheckFiniteAndUnscaleOpWithNan(XPUOpTest):
def setUp(self):
self.op_type = "check_finite_and_unscale"
self.init_dtype()
x = np.random.random((256, 256)).astype(self.dtype)
idx1 = np.random.randint(255)
idx2 = np.random.randint(255)
x[idx1][idx2] = np.nan
x[idx2][idx1] = np.nan
scale = np.random.random((1)).astype(np.float32)
self.inputs = {'X': [('x0', x)], 'Scale': scale}
self.outputs = {
'FoundInfinite': np.array([1]),
'Out': [('out0', x)],
}
def init_dtype(self):
self.dtype = self.in_type
def test_check_output(self):
# When input contains nan, do not check the output,
# since the output may be nondeterministic and will be discarded.
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place, no_check_set=['Out'])
class TestCheckFiniteAndUnscaleOpWithInf(XPUOpTest):
def setUp(self):
self.op_type = "check_finite_and_unscale"
self.init_dtype()
x = np.random.random((256, 256)).astype(self.dtype)
idx1 = np.random.randint(255)
idx2 = np.random.randint(255)
x[idx1][idx2] = np.nan
x[idx2][idx1] = np.nan
scale = np.random.random((1)).astype(np.float32)
myscale = np.array([0.05]).astype(self.dtype)
self.inputs = {'X': [('x0', x)], 'Scale': scale}
self.outputs = {
'FoundInfinite': np.array([1]),
'Out': [('out0', x)],
}
def init_dtype(self):
self.dtype = self.in_type
def test_check_output(self):
# When input contains inf, do not check the output,
# since the output may be nondeterministic and will be discarded.
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place, no_check_set=['Out'])
class TestCheckFiniteAndUnscaleOpWithInfAndNan(XPUOpTest):
def setUp(self):
self.op_type = "check_finite_and_unscale"
self.init_dtype()
x = np.random.random((256, 256)).astype(self.dtype)
idx1 = np.random.randint(255)
idx2 = np.random.randint(255)
x[idx1][idx2] = np.inf
x[idx2][idx1] = np.nan
scale = np.random.random((1)).astype(np.float32)
myscale = np.array([0.05]).astype(self.dtype)
self.inputs = {'X': [('x0', x)], 'Scale': scale}
self.outputs = {
'FoundInfinite': np.array([1]),
'Out': [('out0', x)],
}
def init_dtype(self):
self.dtype = self.in_type
def test_check_output(self):
# When input contains inf, do not check the output,
# since the output may be nondeterministic and will be discarded.
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place, no_check_set=['Out'])
support_types = get_xpu_op_support_types('check_finite_and_unscale')
for stype in support_types:
create_test_class(globals(), XPUTestCheckFiniteAndUnscaleOp, stype)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册