未验证 提交 90cb9a0d 编写于 作者: 2 201716010711 提交者: GitHub

[AMP OP&Test] Norm bf16 (#51083)

上级 4c9e34dc
......@@ -116,4 +116,5 @@ PD_REGISTER_KERNEL(norm_grad,
phi::NormGradKernel,
float,
double,
phi::dtype::float16) {}
phi::dtype::float16,
phi::dtype::bfloat16) {}
......@@ -43,7 +43,7 @@ __global__ void Normalize(const T* x,
const int pre,
const int axis_n, // dim in axis
const int post,
const T eps,
const float eps,
T* y,
T* out_norm) {
using MT = typename phi::dtype::MPTypeTrait<T>::Type;
......@@ -86,7 +86,6 @@ void NormKernel(const Context& ctx,
auto xdim = in_x->dims();
if (axis < 0) axis = xdim.size() + axis;
T eps = static_cast<T>(epsilon);
DenseTensor* out_norm;
DenseTensor out_norm_tmp;
......@@ -117,8 +116,8 @@ void NormKernel(const Context& ctx,
int max_threads = ctx.GetMaxPhysicalThreadCount();
const int max_blocks = std::max(max_threads / block, 1);
int grid = std::min(max_blocks, pre * post);
Normalize<T, block>
<<<grid, block, 0, ctx.stream()>>>(x_ptr, pre, n, post, eps, y, norm_ptr);
Normalize<T, block><<<grid, block, 0, ctx.stream()>>>(
x_ptr, pre, n, post, epsilon, y, norm_ptr);
}
} // namespace phi
......@@ -129,4 +128,5 @@ PD_REGISTER_KERNEL(norm,
phi::NormKernel,
float,
double,
phi::dtype::float16) {}
phi::dtype::float16,
phi::dtype::bfloat16) {}
......@@ -16,9 +16,11 @@ import unittest
import numpy as np
from eager_op_test import OpTest, skip_check_grad_ci
from op_test import convert_float_to_uint16
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
def l2_norm(x, axis, epsilon):
......@@ -157,6 +159,37 @@ class TestNormTestOp(OpTest):
self.epsilon = 1e-8
@unittest.skipIf(
not core.is_compiled_with_cuda(),
"core is not compiled with CUDA and not support the bfloat16",
)
class TestNormBF16Op(OpTest):
def setUp(self):
self.op_type = "norm"
self.python_api = norm_wrapper
self.init_test_case()
self.dtype = "float32"
x = np.random.random(self.shape).astype(self.dtype)
y, norm = l2_norm(x, self.axis, self.epsilon)
self.inputs = {'X': convert_float_to_uint16(x)}
self.attrs = {'epsilon': self.epsilon, 'axis': self.axis}
self.outputs = {'Out': convert_float_to_uint16(y), 'Norm': norm}
self.python_out_sig = ['Out']
def test_check_output(self):
self.check_output_with_place(core.CUDAPlace(0), atol=1e-1)
def test_check_grad(self):
self.check_grad_with_place(
core.CUDAPlace(0), ['X'], 'Out', max_relative_error=1e-2
)
def init_test_case(self):
self.shape = [2, 3, 4, 5]
self.axis = 1
self.epsilon = 1e-8
class API_NormTest(unittest.TestCase):
def test_errors(self):
with fluid.program_guard(fluid.Program()):
......
......@@ -148,12 +148,10 @@ class TestScaleFp16Op(TestScaleOp):
self.dtype = np.float16
def test_check_output(self):
place = core.CUDAPlace(0)
self.check_output_with_place(place, check_eager=True)
self.check_output(check_eager=True)
def test_check_grad(self):
place = core.CUDAPlace(0)
self.check_grad_with_place(place, ["X"], "Out", check_eager=True)
self.check_grad(["X"], "Out", check_eager=True)
class TestScaleBF16Op(OpTest):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册