未验证 提交 1a13fa0f 编写于 作者: F furnace 提交者: GitHub

[NPU] fix fp16 (PART II) (#40537)

[NPU] fix fp16 (PART II)
上级 ef4ef154
......@@ -356,7 +356,7 @@ class NPUConvGradOpKernel : public framework::OpKernel<T> {
auto stream = ctx.template device_context<NPUDeviceContext>().stream();
if (filter_grad) {
filter_grad->mutable_data<T>(ctx.GetPlace());
filter_grad->mutable_data<float>(ctx.GetPlace());
std::vector<int> filter_shape_vec = phi::vectorize<int>(filter->dims());
const auto& runner = NpuOpRunner(
......
......@@ -90,5 +90,5 @@ namespace plat = paddle::platform;
REGISTER_OP_NPU_KERNEL(pad, ops::PadNPUKernel<plat::float16>,
ops::PadNPUKernel<float>, ops::PadNPUKernel<int>);
REGISTER_OP_NPU_KERNEL(pad_grad, ops::PadNPUKernel<plat::float16>,
REGISTER_OP_NPU_KERNEL(pad_grad, ops::PadGradNPUKernel<plat::float16>,
ops::PadGradNPUKernel<float>);
......@@ -22,4 +22,5 @@ if (WITH_ASCEND_CL)
set_tests_properties(test_conv2d_transpose_op_npu PROPERTIES TIMEOUT 200)
set_tests_properties(test_conv2d_op_npu PROPERTIES TIMEOUT 300)
set_tests_properties(test_matmulv2_op_npu PROPERTIES TIMEOUT 300)
set_tests_properties(test_elementwise_add_op_npu PROPERTIES TIMEOUT 200)
endif()
......@@ -132,36 +132,50 @@ class TestDepthwiseConvNPU(OpTest):
self.check_output_with_place(self.place, atol=1e-2)
def test_check_grad(self):
if self.dtype == np.float16:
return
if self.dilations[0] == 1 and self.dilations[1] == 1:
self.check_grad_with_place(
self.place, {'Input', 'Filter'},
'Output',
max_relative_error=0.03,
numeric_place=paddle.CPUPlace())
if self.dtype == np.float16:
self.check_grad_with_place(
self.place, {'Input', 'Filter'},
'Output',
max_relative_error=0.9)
else:
self.check_grad_with_place(
self.place, {'Input', 'Filter'},
'Output',
max_relative_error=0.03,
numeric_place=paddle.CPUPlace())
def test_check_grad_no_filter(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(
self.place, ['Input'],
'Output',
no_grad_set=set(['Filter']),
max_relative_error=0.03,
numeric_place=paddle.CPUPlace())
def test_check_grad_no_input(self):
if self.dtype == np.float16:
return
if self.dilations[0] == 1 and self.dilations[1] == 1:
self.check_grad_with_place(
self.place, ['Filter'],
self.place, ['Input'],
'Output',
no_grad_set=set(['Input']),
no_grad_set=set(['Filter']),
max_relative_error=0.9)
else:
self.check_grad_with_place(
self.place, ['Input'],
'Output',
no_grad_set=set(['Filter']),
max_relative_error=0.03,
numeric_place=paddle.CPUPlace())
def test_check_grad_no_input(self):
if self.dilations[0] == 1 and self.dilations[1] == 1:
if self.dtype == np.float16:
self.check_grad_with_place(
self.place, ['Filter'],
'Output',
no_grad_set=set(['Input']),
max_relative_error=0.9)
else:
self.check_grad_with_place(
self.place, ['Filter'],
'Output',
no_grad_set=set(['Input']),
max_relative_error=0.03,
numeric_place=paddle.CPUPlace())
def init_data_format(self):
self.data_format = "NCHW"
......@@ -267,32 +281,46 @@ class TestDepthwiseConvNPU_Padding(OpTest):
def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(
self.place, {'Input', 'Filter'},
'Output',
max_relative_error=0.03,
numeric_place=paddle.CPUPlace())
self.check_grad_with_place(
self.place, {'Input', 'Filter'},
'Output',
max_relative_error=1.2)
else:
self.check_grad_with_place(
self.place, {'Input', 'Filter'},
'Output',
max_relative_error=0.03,
numeric_place=paddle.CPUPlace())
def test_check_grad_no_filter(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(
self.place, ['Input'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Filter']),
numeric_place=paddle.CPUPlace())
self.check_grad_with_place(
self.place, ['Input'],
'Output',
max_relative_error=0.7,
no_grad_set=set(['Filter']))
else:
self.check_grad_with_place(
self.place, ['Input'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Filter']),
numeric_place=paddle.CPUPlace())
def test_check_grad_no_input(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(
self.place, ['Filter'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Input']),
numeric_place=paddle.CPUPlace())
self.check_grad_with_place(
self.place, ['Filter'],
'Output',
max_relative_error=0.8,
no_grad_set=set(['Input']))
else:
self.check_grad_with_place(
self.place, ['Filter'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Input']),
numeric_place=paddle.CPUPlace())
def init_data_format(self):
self.data_format = "NCHW"
......
......@@ -127,8 +127,6 @@ class TestConv2DOp(OpTest):
self.check_output_with_place(fluid.NPUPlace(0), atol=1e-2)
def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(
fluid.NPUPlace(0), {'Input', 'Filter'},
'Output',
......@@ -136,8 +134,6 @@ class TestConv2DOp(OpTest):
numeric_place=paddle.CPUPlace())
def test_check_grad_no_filter(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(
fluid.NPUPlace(0), ['Input'],
'Output',
......@@ -146,8 +142,6 @@ class TestConv2DOp(OpTest):
numeric_place=paddle.CPUPlace())
def test_check_grad_no_input(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(
fluid.NPUPlace(0), ['Filter'],
'Output',
......@@ -276,10 +270,13 @@ class TestConv2DOp_v2(OpTest):
def set_npu(self):
self.__class__.use_npu = True
def init_dtype(self):
self.dtype = np.float32
def setUp(self):
self.set_npu()
self.op_type = "conv2d"
self.dtype = np.float32
self.init_dtype()
self.init_kernel_type()
self.init_group()
self.init_dilation()
......@@ -320,31 +317,45 @@ class TestConv2DOp_v2(OpTest):
def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(
paddle.NPUPlace(0), {'Input', 'Filter'},
'Output',
max_relative_error=0.02,
numeric_place=paddle.CPUPlace())
self.check_grad_with_place(
paddle.NPUPlace(0), {'Input', 'Filter'},
'Output',
max_relative_error=1.1)
else:
self.check_grad_with_place(
paddle.NPUPlace(0), {'Input', 'Filter'},
'Output',
max_relative_error=0.02,
numeric_place=paddle.CPUPlace())
def test_check_grad_no_filter(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(
paddle.NPUPlace(0), ['Input'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Filter']),
numeric_place=paddle.CPUPlace())
self.check_grad_with_place(
paddle.NPUPlace(0), ['Input'],
'Output',
max_relative_error=0.99,
no_grad_set=set(['Filter']))
else:
self.check_grad_with_place(
paddle.NPUPlace(0), ['Input'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Filter']),
numeric_place=paddle.CPUPlace())
def test_check_grad_no_input(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(
paddle.NPUPlace(0), ['Filter'],
'Output',
no_grad_set=set(['Input']),
numeric_place=paddle.CPUPlace())
self.check_grad_with_place(
paddle.NPUPlace(0), ['Filter'],
'Output',
max_relative_error=0.99,
no_grad_set=set(['Input']))
else:
self.check_grad_with_place(
paddle.NPUPlace(0), ['Filter'],
'Output',
no_grad_set=set(['Input']),
numeric_place=paddle.CPUPlace())
def init_test_case(self):
self.pad = [0, 0]
......
......@@ -65,36 +65,59 @@ class TestElementwiseAddOp(OpTest):
self.check_output_with_place(self.place)
def test_check_grad_normal(self):
if self.dtype == np.float16 or self.dtype == np.int64:
if self.dtype == np.int64:
return
self.check_grad_with_place(
self.place,
['X', 'Y'],
'Out',
max_relative_error=0.006, )
if self.dtype == np.float16:
self.check_grad_with_place(
self.place,
['X', 'Y'],
'Out',
max_relative_error=0.15, )
else:
self.check_grad_with_place(
self.place,
['X', 'Y'],
'Out',
max_relative_error=0.006, )
def test_check_grad_ingore_x(self):
if self.dtype == np.float16 or self.dtype == np.int64:
if self.dtype == np.int64:
return
self.check_grad_with_place(
self.place,
['Y'],
'Out',
no_grad_set=set("X"),
max_relative_error=0.006, )
if self.dtype == np.float16:
self.check_grad_with_place(
self.place,
['Y'],
'Out',
no_grad_set=set("X"),
max_relative_error=0.92, )
else:
self.check_grad_with_place(
self.place,
['Y'],
'Out',
no_grad_set=set("X"),
max_relative_error=0.006, )
def test_check_grad_ingore_y(self):
if self.dtype == np.float16 or self.dtype == np.int64:
if self.dtype == np.int64:
return
self.check_grad_with_place(
self.place,
['X'],
'Out',
no_grad_set=set("Y"),
max_relative_error=0.006, )
if self.dtype == np.float16:
self.check_grad_with_place(
self.place,
['X'],
'Out',
no_grad_set=set("Y"),
max_relative_error=0.8, )
else:
self.check_grad_with_place(
self.place,
['X'],
'Out',
no_grad_set=set("Y"),
max_relative_error=0.006, )
class TestFP16ElementwiseAddOp(TestElementwiseAddOp):
......
......@@ -116,19 +116,13 @@ class TestElementwiseMaxOp(OpTest):
self.check_output_with_place(self.place)
def test_check_grad_normal(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(self.place, ['X', 'Y'], 'Out')
def test_check_grad_ingore_x(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(
self.place, ['Y'], 'Out', no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(
self.place, ['X'], 'Out', no_grad_set=set("Y"))
......@@ -213,15 +207,11 @@ class TestElementwiseMaxOp_broadcast_2(TestElementwiseMaxOp):
self.out = np.maximum(self.x, self.y.reshape(1, 1, 100))
def test_check_grad_normal(self):
if self.dtype == np.float16:
return
dx, dy = ComputeGrad(self.x, self.y, self.out, self.axis)
self.check_grad_with_place(
self.place, ['X', 'Y'], 'Out', user_defined_grads=[dx, dy])
def test_check_grad_ingore_x(self):
if self.dtype == np.float16:
return
_, dy = ComputeGrad(self.x, self.y, self.out, self.axis)
self.check_grad_with_place(
self.place, ['Y'],
......@@ -230,8 +220,6 @@ class TestElementwiseMaxOp_broadcast_2(TestElementwiseMaxOp):
user_defined_grads=[dy])
def test_check_grad_ingore_y(self):
if self.dtype == np.float16:
return
dx, _ = ComputeGrad(self.x, self.y, self.out, self.axis)
self.check_grad_with_place(
self.place, ['X'],
......
......@@ -64,32 +64,41 @@ class TestElementwiseMinOp(OpTest):
def test_check_grad_normal(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(
self.place,
['X', 'Y'],
'Out', )
self.check_grad_with_place(
self.place, ['X', 'Y'], 'Out', max_relative_error=0.5)
else:
self.check_grad_with_place(
self.place,
['X', 'Y'],
'Out', )
def test_check_grad_ingore_x(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(
self.place,
['Y'],
'Out',
no_grad_set=set("X"), )
self.check_grad_with_place(
self.place, ['Y'],
'Out',
no_grad_set=set("X"),
max_relative_error=0.9)
else:
self.check_grad_with_place(
self.place,
['Y'],
'Out',
no_grad_set=set("X"), )
def test_check_grad_ingore_y(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(
self.place,
['X'],
'Out',
no_grad_set=set("Y"), )
self.check_grad_with_place(
self.place, ['X'],
'Out',
no_grad_set=set("Y"),
max_relative_error=0.1)
else:
self.check_grad_with_place(
self.place,
['X'],
'Out',
no_grad_set=set("Y"), )
class TestElementwiseMinOpFp16(TestElementwiseMinOp):
......
......@@ -114,8 +114,6 @@ class TestElementwisePow(OpTest):
self.out = np.power(self.x, self.y)
def test_check_grad_normal(self):
if self.dtype == np.float16:
return
dx, dy = ComputeGrad(self.x, self.y, self.out, self.axis)
self.check_grad_with_place(
self.place, ['X', 'Y'], 'Out', user_defined_grads=[dx, dy])
......@@ -184,8 +182,6 @@ class TestElementwisePowOp_broadcast_0(TestElementwisePow):
self.out = np.power(self.x, self.y)
def test_check_grad_normal(self):
if self.dtype == np.float16:
return
dx, dy = ComputeGrad(self.x, self.y, self.out, self.axis)
self.check_grad_with_place(
self.place, ['X', 'Y'], 'Out', user_defined_grads=[dx, dy])
......@@ -218,8 +214,6 @@ class TestElementwisePowOp_broadcast_1(TestElementwisePow):
self.out = np.power(self.x, self.y.reshape(1, 100, 1))
def test_check_grad_normal(self):
if self.dtype == np.float16:
return
dx, dy = ComputeGrad(self.x, self.y, self.out, self.axis)
self.check_grad_with_place(
self.place, ['X', 'Y'], 'Out', user_defined_grads=[dx, dy])
......@@ -252,8 +246,6 @@ class TestElementwisePowOp_broadcast_2(TestElementwisePow):
self.out = np.power(self.x, self.y.reshape(100, 1, 1))
def test_check_grad_normal(self):
if self.dtype == np.float16:
return
dx, dy = ComputeGrad(self.x, self.y, self.out, self.axis)
self.check_grad_with_place(
self.place, ['X', 'Y'], 'Out', user_defined_grads=[dx, dy])
......
......@@ -34,7 +34,7 @@ class TestExpand(OpTest):
self.init_dtype()
np.random.seed(SEED)
x = np.random.randn(3, 1, 7).astype(self.dtype)
x = np.random.randn(30, 1, 7).astype(self.dtype)
out = np.tile(x, [1, 10, 1])
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
......@@ -50,12 +50,8 @@ class TestExpand(OpTest):
def test_check_output(self):
self.check_output_with_place(self.place)
# TODO(ascendrc): Add grad test
# def test_check_grad(self):
# if self.dtype == np.float16:
# return
# self.check_grad(['X'], 'Out')
#
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestExpandV2(TestExpand):
......@@ -66,7 +62,7 @@ class TestExpandV2(TestExpand):
self.init_dtype()
np.random.seed(SEED)
x = np.random.randn(3, 1, 7).astype(self.dtype)
x = np.random.randn(30, 1, 7).astype(self.dtype)
out = np.tile(x, [1, 10, 1])
expand_times = np.array([1, 10, 1]).astype(np.int32)
......@@ -145,7 +141,7 @@ class TestExpand_expand_times_all_one(TestExpand):
self.init_dtype()
np.random.seed(SEED)
x = np.random.randn(3, 1, 7).astype(self.dtype)
x = np.random.randn(30, 1, 7).astype(self.dtype)
out = np.tile(x, [1, 1, 1])
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
......
......@@ -81,13 +81,9 @@ class TestHuberLossOp(OpTest):
self.check_output_with_place(self.place)
def test_check_grad_normal(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(self.place, ['X', 'Y'], 'Out')
def test_check_grad_ingore_x(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(
self.place, ['Y'],
'Out',
......@@ -95,8 +91,6 @@ class TestHuberLossOp(OpTest):
no_grad_set=set("residual"))
def test_check_grad_ingore_y(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(
self.place, ['X'],
'Out',
......
......@@ -78,8 +78,10 @@ class TestLabelSmoothOp(OpTest):
def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(self.place, ['X'], 'Out')
self.check_grad_with_place(
self.place, ['X'], 'Out', max_relative_error=0.5)
else:
self.check_grad_with_place(self.place, ['X'], 'Out')
class TestLabelSmoothOpWithPriorDist(TestLabelSmoothOp):
......
......@@ -63,8 +63,10 @@ class TestLeadyRelu(OpTest):
def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(self.place, ['X'], 'Out')
self.check_grad_with_place(
self.place, ['X'], 'Out', max_relative_error=0.006)
else:
self.check_grad_with_place(self.place, ['X'], 'Out')
class TestLeadyReluFP16(TestLeadyRelu):
......
......@@ -63,9 +63,13 @@ class TestLogSoftmaxNPUOp(OpTest):
def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(
self.place, ['X'], ['Out'], user_defined_grads=[self.x_grad])
self.check_grad_with_place(
self.place, ['X'], ['Out'],
user_defined_grads=[self.x_grad],
max_relative_error=0.02)
else:
self.check_grad_with_place(
self.place, ['X'], ['Out'], user_defined_grads=[self.x_grad])
def test_class(op_type, typename):
......
......@@ -77,8 +77,10 @@ class TestLookupTableV2(OpTest):
def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(self.place, ['W'], 'Out')
self.check_grad_with_place(
self.place, ['W'], 'Out', max_relative_error=0.01)
else:
self.check_grad_with_place(self.place, ['W'], 'Out')
class TestLookupTableV2FP16(TestLookupTableV2):
......
......@@ -39,10 +39,11 @@ class TestNearestInterpOp(OpTest):
self.set_npu()
self.out_size = None
self.actual_shape = None
self.init_dtype()
self.data_layout = 'NCHW'
self.init_test_case()
self.op_type = "nearest_interp_v2"
input_np = np.random.random(self.input_shape).astype("float32")
input_np = np.random.random(self.input_shape).astype(self.dtype)
if self.data_layout == "NCHW":
in_h = self.input_shape[2]
......@@ -95,8 +96,21 @@ class TestNearestInterpOp(OpTest):
self.check_output_with_place(self.place)
def test_check_grad(self):
self.check_grad_with_place(
self.place, ['X'], 'Out', in_place=True, max_relative_error=0.006)
if self.dtype == np.float16:
self.check_grad_with_place(
self.place, ['X'],
'Out',
in_place=True,
max_relative_error=0.02)
else:
self.check_grad_with_place(
self.place, ['X'],
'Out',
in_place=True,
max_relative_error=0.006)
def init_dtype(self):
self.dtype = np.float32
def init_test_case(self):
self.interp_method = 'nearest'
......@@ -108,6 +122,11 @@ class TestNearestInterpOp(OpTest):
self.align_corners = False
class TestNearestNeighborInterpFP16(TestNearestInterpOp):
def init_dtype(self):
self.dtype = np.float16
class TestNearestNeighborInterpCase1(TestNearestInterpOp):
def init_test_case(self):
self.interp_method = 'nearest'
......
......@@ -50,9 +50,10 @@ class TestPadOp(OpTest):
def test_check_grad_normal(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(self.place, ['X'], 'Out')
self.check_grad_with_place(
self.place, ['X'], 'Out', max_relative_error=0.6)
else:
self.check_grad_with_place(self.place, ['X'], 'Out')
def set_npu(self):
self.__class__.use_npu = True
......
......@@ -34,11 +34,12 @@ class TestRelu(OpTest):
self.init_dtype()
np.random.seed(SEED)
x = np.random.rand(3, 2).astype(self.dtype)
out = x
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.attrs = {}
x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
# The same reason with TestAbs
x[np.abs(x) < 0.005] = 0.02
out = np.maximum(x, 0)
self.inputs = {'X': x}
self.outputs = {'Out': out}
def set_npu(self):
......@@ -50,32 +51,18 @@ class TestRelu(OpTest):
def test_check_output(self):
self.check_output_with_place(self.place)
def test_check_grad(self):
if self.dtype == np.float16:
self.check_grad_with_place(
self.place, ['X'], 'Out', max_relative_error=0.006)
else:
self.check_grad_with_place(self.place, ['X'], 'Out')
class TestReluFp16(OpTest):
def setUp(self):
self.set_npu()
self.op_type = "relu"
self.place = paddle.NPUPlace(0)
self.init_dtype()
np.random.seed(SEED)
x = np.random.rand(3, 2).astype(self.dtype)
out = x
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.attrs = {}
self.outputs = {'Out': out}
def set_npu(self):
self.__class__.use_npu = True
self.__class__.no_need_check_grad = True
class TestReluFp16(TestRelu):
def init_dtype(self):
self.dtype = np.float16
def test_check_output(self):
self.check_output_with_place(self.place, atol=1e-5)
class TestReluNeg(OpTest):
def setUp(self):
......
......@@ -58,12 +58,17 @@ class TestSliceOp(OpTest):
self.place = paddle.NPUPlace(0)
def test_check_output(self):
self.check_output_with_place(self.place)
if self.dtype == np.float16:
self.check_output_with_place(self.place)
else:
self.check_output_with_place(self.place)
def test_check_grad_normal(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(self.place, ['Input'], 'Out')
self.check_grad_with_place(
self.place, ['Input'], 'Out', max_relative_error=0.02)
else:
self.check_grad_with_place(self.place, ['Input'], 'Out')
class TestSliceOp2(TestSliceOp):
......@@ -347,8 +352,10 @@ class TestSliceOpDecsDim(OpTest):
def test_check_grad_normal(self):
if self.dtype == np.float16:
return
self.check_grad_with_place(self.place, ['Input'], 'Out')
self.check_grad_with_place(
self.place, ['Input'], 'Out', max_relative_error=0.5)
else:
self.check_grad_with_place(self.place, ['Input'], 'Out')
class TestSliceOpDecsDimFp16(TestSliceOpDecsDim):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册