未验证 提交 2dec64d0 编写于 作者: V Vvsmile 提交者: GitHub

change outputs and grads from fp16-fp16-comparision and fp16-fp32 (#50700)

* change outputs and grads from fp16-fp16-comparision and fp16-fp32
comparision

* support grad comparision fp16-fp32

* the change of reference dtype only occured from np.float16 to np.float32

* fix the list type can not infer the dtype by attribute dtype by transfer
the list to array

* adjust the default atol and rtol of float16 to 1e-3

* Polish code

* fix error

* fix

* Polish code

* fix the _is_cal_ref and np.float16

* fix the combination of is_calc_ref and np.float16

* remove unuseful codes in op_test.py

* fix ci

* fix the rtol set in the dygraph checker and eager checker

---------
Co-authored-by: NZzSean <18818272991@163.com>
上级 8129c22e
...@@ -333,6 +333,7 @@ class OpTest(unittest.TestCase): ...@@ -333,6 +333,7 @@ class OpTest(unittest.TestCase):
cls.dtype = None cls.dtype = None
cls.outputs = {} cls.outputs = {}
cls.input_shape_is_large = True cls.input_shape_is_large = True
cls.is_calc_ref = False
cls.check_prim = False cls.check_prim = False
np.random.seed(123) np.random.seed(123)
...@@ -456,6 +457,26 @@ class OpTest(unittest.TestCase): ...@@ -456,6 +457,26 @@ class OpTest(unittest.TestCase):
) )
) )
def is_float16_op(self):
# self.dtype is the dtype of inputs, and is set in infer_dtype_from_inputs_outputs.
# Make sure this function is called after calling infer_dtype_from_inputs_outputs.
return (
self.dtype == np.float16
or (
hasattr(self, 'output_dtype')
and self.output_dtype == np.float16
)
or (
hasattr(self, 'mkldnn_data_type')
and getattr(self, 'mkldnn_data_type') == "float16"
)
or (
hasattr(self, 'attrs')
and 'mkldnn_data_type' in self.attrs
and self.attrs['mkldnn_data_type'] == 'float16'
)
)
def is_mkldnn_op(self): def is_mkldnn_op(self):
return (hasattr(self, "use_mkldnn") and self.use_mkldnn) or ( return (hasattr(self, "use_mkldnn") and self.use_mkldnn) or (
hasattr(self, "attrs") hasattr(self, "attrs")
...@@ -470,6 +491,18 @@ class OpTest(unittest.TestCase): ...@@ -470,6 +491,18 @@ class OpTest(unittest.TestCase):
and self.attrs["use_xpu"] and self.attrs["use_xpu"]
) )
def is_fp16_compared_with_fp32(self):
return self.is_float16_op() and (
self.op_type
not in op_accuracy_white_list.NO_FP16_COMPARED_WITH_FP32_OP_LIST
)
def enable_cal_ref_output(self):
self.is_calc_ref = self.is_fp16_compared_with_fp32()
def disable_cal_ref_output(self):
self.is_calc_ref = False
# set the self.output_dtype . # set the self.output_dtype .
def infer_dtype_from_inputs_outputs(self, inputs, outputs): def infer_dtype_from_inputs_outputs(self, inputs, outputs):
def is_np_data(input): def is_np_data(input):
...@@ -543,7 +576,21 @@ class OpTest(unittest.TestCase): ...@@ -543,7 +576,21 @@ class OpTest(unittest.TestCase):
tensor = core.LoDTensor() tensor = core.LoDTensor()
if isinstance(np_value, tuple): if isinstance(np_value, tuple):
tensor.set(np_value[0], place) tensor.set(np_value[0], place)
dtype = np.array(np_value[1]).dtype
if self.is_calc_ref and dtype == np.float16:
if isinstance(np_value[1], list):
tensor.set_recursive_sequence_lengths(
np.array(np_value[1]).astype(np.float32)
)
else:
tensor.set_recursive_sequence_lengths(
np_value[1].astype(np.float32)
)
else:
tensor.set_recursive_sequence_lengths(np_value[1]) tensor.set_recursive_sequence_lengths(np_value[1])
else:
if self.is_calc_ref and np_value.dtype == np.float16:
tensor.set(np_value.astype(np.float32), place)
else: else:
tensor.set(np_value, place) tensor.set(np_value, place)
feed_map[name] = tensor feed_map[name] = tensor
...@@ -551,9 +598,25 @@ class OpTest(unittest.TestCase): ...@@ -551,9 +598,25 @@ class OpTest(unittest.TestCase):
tensor = core.LoDTensor() tensor = core.LoDTensor()
if isinstance(self.inputs[var_name], tuple): if isinstance(self.inputs[var_name], tuple):
tensor.set(self.inputs[var_name][0], place) tensor.set(self.inputs[var_name][0], place)
if (
self.is_calc_ref
and self.inputs[var_name][1].dtype == np.float16
):
tensor.set_recursive_sequence_lengths(
self.inputs[var_name][1].astype(np.float32)
)
else:
tensor.set_recursive_sequence_lengths( tensor.set_recursive_sequence_lengths(
self.inputs[var_name][1] self.inputs[var_name][1]
) )
else:
if (
self.is_calc_ref
and self.inputs[var_name].dtype == np.float16
):
tensor.set(
self.inputs[var_name].astype(np.float32), place
)
else: else:
tensor.set(self.inputs[var_name], place) tensor.set(self.inputs[var_name], place)
feed_map[var_name] = tensor feed_map[var_name] = tensor
...@@ -579,10 +642,10 @@ class OpTest(unittest.TestCase): ...@@ -579,10 +642,10 @@ class OpTest(unittest.TestCase):
else: else:
self.infer_dtype_from_inputs_outputs(self.inputs, self.outputs) self.infer_dtype_from_inputs_outputs(self.inputs, self.outputs)
inputs = append_input_output( inputs = append_input_output(
block, op_proto, self.inputs, True, self.dtype block, op_proto, self.inputs, True, self.dtype, self.is_calc_ref
) )
outputs = append_input_output( outputs = append_input_output(
block, op_proto, self.outputs, False, self.dtype block, op_proto, self.outputs, False, self.dtype, self.is_calc_ref
) )
if hasattr(self, "cache_name_list"): if hasattr(self, "cache_name_list"):
...@@ -703,7 +766,13 @@ class OpTest(unittest.TestCase): ...@@ -703,7 +766,13 @@ class OpTest(unittest.TestCase):
def append_input_output_for_dygraph( def append_input_output_for_dygraph(
self, op_proto, np_list, is_input, if_return_inputs_grad_dict, block self, op_proto, np_list, is_input, if_return_inputs_grad_dict, block
): ):
def create_var(np_value, name, is_input, if_return_inputs_grad_dict): def create_var(
np_value,
name,
is_input,
if_return_inputs_grad_dict,
is_calc_ref=False,
):
np_value_temp = np_value np_value_temp = np_value
has_lod = False has_lod = False
lod_temp = None lod_temp = None
...@@ -713,6 +782,11 @@ class OpTest(unittest.TestCase): ...@@ -713,6 +782,11 @@ class OpTest(unittest.TestCase):
lod_temp = np_value[1] lod_temp = np_value[1]
if is_input: if is_input:
if is_calc_ref and np_value_temp.dtype == np.float16:
v = self._create_var_from_numpy(
np_value_temp.astype(np.float32)
)
else:
v = self._create_var_from_numpy(np_value_temp) v = self._create_var_from_numpy(np_value_temp)
if if_return_inputs_grad_dict: if if_return_inputs_grad_dict:
...@@ -724,6 +798,15 @@ class OpTest(unittest.TestCase): ...@@ -724,6 +798,15 @@ class OpTest(unittest.TestCase):
v.value().get_tensor().set_recursive_sequence_lengths( v.value().get_tensor().set_recursive_sequence_lengths(
lod_temp lod_temp
) )
else:
if is_calc_ref and np_value_temp.dtype == np.float16:
v = block.create_var(
name=name,
dtype=np.float32,
type=core.VarDesc.VarType.LOD_TENSOR,
persistable=False,
stop_gradient=False,
)
else: else:
v = block.create_var( v = block.create_var(
name=name, name=name,
...@@ -760,7 +843,11 @@ class OpTest(unittest.TestCase): ...@@ -760,7 +843,11 @@ class OpTest(unittest.TestCase):
slot_name = name slot_name = name
for (name, np_value) in np_list[name]: for (name, np_value) in np_list[name]:
v = create_var( v = create_var(
np_value, name, is_input, if_return_inputs_grad_dict np_value,
name,
is_input,
if_return_inputs_grad_dict,
self.is_calc_ref,
) )
var_list.append(v) var_list.append(v)
if if_return_inputs_grad_dict: if if_return_inputs_grad_dict:
...@@ -780,6 +867,7 @@ class OpTest(unittest.TestCase): ...@@ -780,6 +867,7 @@ class OpTest(unittest.TestCase):
name_temp, name_temp,
is_input, is_input,
if_return_inputs_grad_dict, if_return_inputs_grad_dict,
self.is_calc_ref,
) )
var_dict[name].append(v) var_dict[name].append(v)
if if_return_inputs_grad_dict: if if_return_inputs_grad_dict:
...@@ -1438,6 +1526,19 @@ class OpTest(unittest.TestCase): ...@@ -1438,6 +1526,19 @@ class OpTest(unittest.TestCase):
"Found failed {} {}".format(dygraph_outs.keys(), target_name), "Found failed {} {}".format(dygraph_outs.keys(), target_name),
) )
def find_imperative_expect(target_name, dygraph_outs, place):
for name in dygraph_outs:
if name == target_name:
return dygraph_outs[name][0]
var_list = dygraph_outs[name]
for i, var in enumerate(var_list):
if var.name == target_name:
return dygraph_outs[name][i]
self.assertTrue(
False,
"Found failed {} {}".format(dygraph_outs.keys(), target_name),
)
def find_actual(target_name, fetch_list): def find_actual(target_name, fetch_list):
found = [ found = [
i i
...@@ -1449,6 +1550,17 @@ class OpTest(unittest.TestCase): ...@@ -1449,6 +1550,17 @@ class OpTest(unittest.TestCase):
) )
return found[0] return found[0]
def find_expect(target_name, fetch_list):
found = [
i
for i, var_name in enumerate(fetch_list)
if var_name == target_name
]
self.assertTrue(
len(found) == 1, "Found {} {}".format(len(found), target_name)
)
return found[0]
class Checker: class Checker:
"""base class for check with self.outputs. """base class for check with self.outputs.
currently don't support check between checkers. currently don't support check between checkers.
...@@ -1486,6 +1598,10 @@ class OpTest(unittest.TestCase): ...@@ -1486,6 +1598,10 @@ class OpTest(unittest.TestCase):
"""return: (actual_tensor(var_base), actual_numpy)""" """return: (actual_tensor(var_base), actual_numpy)"""
raise NotImplementedError("base class, not implement!") raise NotImplementedError("base class, not implement!")
def find_expect_value(self, name):
"""return: (expect_tensor(var_base), actual_numpy)"""
raise NotImplementedError("base class, not implement!")
def _compare_numpy(self, name, actual_np, expect_np): def _compare_numpy(self, name, actual_np, expect_np):
self.op_test.assertTrue( self.op_test.assertTrue(
np.allclose( np.allclose(
...@@ -1509,10 +1625,17 @@ class OpTest(unittest.TestCase): ...@@ -1509,10 +1625,17 @@ class OpTest(unittest.TestCase):
def compare_single_output_with_expect(self, name, expect): def compare_single_output_with_expect(self, name, expect):
actual, actual_np = self.find_actual_value(name) actual, actual_np = self.find_actual_value(name)
expect_np = expect[0] if isinstance(expect, tuple) else expect if self.op_test.is_fp16_compared_with_fp32():
expect, expect_np = self.find_expect_value(name)
else:
expect_np = (
expect[0] if isinstance(expect, tuple) else expect
)
actual_np, expect_np = self.convert_uint16_to_float_ifneed( actual_np, expect_np = self.convert_uint16_to_float_ifneed(
actual_np, expect_np actual_np, expect_np
) )
# modify there for fp32 check
# NOTE(zhiqiu): np.allclose([], [1.]) returns True # NOTE(zhiqiu): np.allclose([], [1.]) returns True
# see details: https://stackoverflow.com/questions/38331703/why-does-numpys-broadcasting-sometimes-allow-comparing-arrays-of-different-leng # see details: https://stackoverflow.com/questions/38331703/why-does-numpys-broadcasting-sometimes-allow-comparing-arrays-of-different-leng
if expect_np.size == 0: if expect_np.size == 0:
...@@ -1561,6 +1684,14 @@ class OpTest(unittest.TestCase): ...@@ -1561,6 +1684,14 @@ class OpTest(unittest.TestCase):
) )
self.outputs = outs self.outputs = outs
self.fetch_list = fetch_list self.fetch_list = fetch_list
if self.op_test.is_fp16_compared_with_fp32():
self.op_test.enable_cal_ref_output()
ref_outs, ref_fetch_list = self.op_test._calc_output(
place, no_check_set=no_check_set
)
self.op_test.disable_cal_ref_output()
self.ref_outputs = ref_outs
self.ref_fetch_list = ref_fetch_list
def find_actual_value(self, name): def find_actual_value(self, name):
idx = find_actual(name, self.fetch_list) idx = find_actual(name, self.fetch_list)
...@@ -1568,6 +1699,12 @@ class OpTest(unittest.TestCase): ...@@ -1568,6 +1699,12 @@ class OpTest(unittest.TestCase):
actual_t = np.array(actual) actual_t = np.array(actual)
return actual, actual_t return actual, actual_t
def find_expect_value(self, name):
idx = find_expect(name, self.ref_fetch_list)
expect = self.ref_outputs[idx]
expect_t = np.array(expect)
return expect, expect_t
def convert_uint16_to_float_ifneed(self, actual_np, expect_np): def convert_uint16_to_float_ifneed(self, actual_np, expect_np):
""" """
judge whether convert current output and expect to uint16. judge whether convert current output and expect to uint16.
...@@ -1579,6 +1716,8 @@ class OpTest(unittest.TestCase): ...@@ -1579,6 +1716,8 @@ class OpTest(unittest.TestCase):
]: ]:
actual_np = convert_uint16_to_float(actual_np) actual_np = convert_uint16_to_float(actual_np)
self.rtol = 1.0e-2 self.rtol = 1.0e-2
elif actual_np.dtype == np.float16:
self.rtol = 1.0e-3
else: else:
self.rtol = 1.0e-5 self.rtol = 1.0e-5
if ( if (
...@@ -1607,6 +1746,12 @@ class OpTest(unittest.TestCase): ...@@ -1607,6 +1746,12 @@ class OpTest(unittest.TestCase):
self.outputs = self.op_test._calc_dygraph_output( self.outputs = self.op_test._calc_dygraph_output(
place, no_check_set=no_check_set place, no_check_set=no_check_set
) )
if self.op_test.is_fp16_compared_with_fp32():
self.op_test.enable_cal_ref_output()
self.ref_outputs = self.op_test._calc_dygraph_output(
place, no_check_set=no_check_set
)
self.op_test.disable_cal_ref_output()
def find_actual_value(self, name): def find_actual_value(self, name):
with fluid.dygraph.base.guard(place=place): with fluid.dygraph.base.guard(place=place):
...@@ -1618,12 +1763,24 @@ class OpTest(unittest.TestCase): ...@@ -1618,12 +1763,24 @@ class OpTest(unittest.TestCase):
) )
return imperative_actual, imperative_actual_t return imperative_actual, imperative_actual_t
def find_expect_value(self, name):
with fluid.dygraph.base.guard(place=place):
imperative_expect = find_imperative_expect(
name, self.ref_outputs, place
)
imperative_expect_t = np.array(
imperative_expect.value().get_tensor()
)
return imperative_expect, imperative_expect_t
def convert_uint16_to_float_ifneed(self, actual_np, expect_np): def convert_uint16_to_float_ifneed(self, actual_np, expect_np):
if actual_np.dtype == np.uint16 and expect_np.dtype in [ if actual_np.dtype == np.uint16 and expect_np.dtype in [
np.float32, np.float32,
np.float64, np.float64,
]: ]:
self.rtol = 1.0e-2 self.rtol = 1.0e-2
elif actual_np.dtype == np.float16:
self.rtol = 1.0e-3
else: else:
self.rtol = 1.0e-5 self.rtol = 1.0e-5
if self.op_test.is_bfloat16_op(): if self.op_test.is_bfloat16_op():
...@@ -1692,6 +1849,23 @@ class OpTest(unittest.TestCase): ...@@ -1692,6 +1849,23 @@ class OpTest(unittest.TestCase):
) )
self.outputs = eager_dygraph_outs self.outputs = eager_dygraph_outs
if self.op_test.is_fp16_compared_with_fp32():
self.op_test.enable_cal_ref_output()
with _test_eager_guard():
self.is_python_api_test = True
ref_eager_dygraph_outs = (
self.op_test._calc_python_api_output(place)
)
if eager_dygraph_outs is None:
self.is_python_api_test = False
ref_eager_dygraph_outs = (
self.op_test._calc_dygraph_output(
place, no_check_set=no_check_set
)
)
self.op_test.disable_cal_ref_output()
self.ref_outputs = ref_eager_dygraph_outs
def _compare_numpy(self, name, actual_np, expect_np): def _compare_numpy(self, name, actual_np, expect_np):
with _test_eager_guard(): with _test_eager_guard():
super()._compare_numpy(name, actual_np, expect_np) super()._compare_numpy(name, actual_np, expect_np)
...@@ -1706,6 +1880,10 @@ class OpTest(unittest.TestCase): ...@@ -1706,6 +1880,10 @@ class OpTest(unittest.TestCase):
with _test_eager_guard(): with _test_eager_guard():
return super().find_actual_value(name) return super().find_actual_value(name)
def find_expect_valur(self, name):
with _test_eager_guard():
return super().find_expect_value(name)
def _compare_list(self, name, actual, expect): def _compare_list(self, name, actual, expect):
"""if expect is a tuple, we need to compare list.""" """if expect is a tuple, we need to compare list."""
with _test_eager_guard(): with _test_eager_guard():
...@@ -1743,6 +1921,9 @@ class OpTest(unittest.TestCase): ...@@ -1743,6 +1921,9 @@ class OpTest(unittest.TestCase):
else: else:
atol = 1e-1 atol = 1e-1
if self.is_float16_op():
atol = 1e-3
if no_check_set is not None: if no_check_set is not None:
if ( if (
self.op_type self.op_type
...@@ -1951,6 +2132,7 @@ class OpTest(unittest.TestCase): ...@@ -1951,6 +2132,7 @@ class OpTest(unittest.TestCase):
# the value of np.abs(a) is between 1e-10 and 1e-8, we set np.abs(a)*=1e4. # the value of np.abs(a) is between 1e-10 and 1e-8, we set np.abs(a)*=1e4.
# Therefore, it asserts np.abs(a - b) / (np.abs(a)*1e4) < max_relative_error, # Therefore, it asserts np.abs(a - b) / (np.abs(a)*1e4) < max_relative_error,
# which is the same as np.abs(a - b) / np.abs(a) < max_relative_error*1e4. # which is the same as np.abs(a - b) / np.abs(a) < max_relative_error*1e4.
abs_a = np.abs(a) abs_a = np.abs(a)
if abs_a.ndim > 0: if abs_a.ndim > 0:
if ( if (
...@@ -2171,6 +2353,18 @@ class OpTest(unittest.TestCase): ...@@ -2171,6 +2353,18 @@ class OpTest(unittest.TestCase):
) )
for input_to_check in inputs_to_check for input_to_check in inputs_to_check
] ]
if self.is_fp16_compared_with_fp32():
self.enable_cal_ref_output()
numeric_grads = self._get_gradient(
inputs_to_check,
place,
output_names,
no_grad_set,
user_defined_grad_outputs,
)
self.disable_cal_ref_output()
analytic_grads = self._get_gradient( analytic_grads = self._get_gradient(
inputs_to_check, inputs_to_check,
place, place,
......
...@@ -94,11 +94,13 @@ def set_input(scope, op, inputs, place): ...@@ -94,11 +94,13 @@ def set_input(scope, op, inputs, place):
__set_input__(in_name, inputs[in_name]) __set_input__(in_name, inputs[in_name])
def append_input_output(block, op_proto, np_list, is_input, dtype): def append_input_output(
block, op_proto, np_list, is_input, dtype, is_calc_ref=False
):
'''Insert VarDesc and generate Python variable instance''' '''Insert VarDesc and generate Python variable instance'''
proto_list = op_proto.inputs if is_input else op_proto.outputs proto_list = op_proto.inputs if is_input else op_proto.outputs
def create_var(block, name, np_list, var_proto): def create_var(block, name, np_list, var_proto, is_calc_ref=False):
dtype = None dtype = None
shape = None shape = None
lod_level = None lod_level = None
...@@ -118,6 +120,8 @@ def append_input_output(block, op_proto, np_list, is_input, dtype): ...@@ -118,6 +120,8 @@ def append_input_output(block, op_proto, np_list, is_input, dtype):
if is_input: if is_input:
shape = list(np_value.shape) shape = list(np_value.shape)
lod_level = 0 lod_level = 0
if is_calc_ref and dtype == np.float16:
dtype = np.float32
return block.create_var( return block.create_var(
dtype=dtype, shape=shape, lod_level=lod_level, name=name dtype=dtype, shape=shape, lod_level=lod_level, name=name
) )
...@@ -138,11 +142,15 @@ def append_input_output(block, op_proto, np_list, is_input, dtype): ...@@ -138,11 +142,15 @@ def append_input_output(block, op_proto, np_list, is_input, dtype):
var_list = [] var_list = []
for (name, np_value) in np_list[var_name]: for (name, np_value) in np_list[var_name]:
var_list.append( var_list.append(
create_var(block, name, {name: np_value}, var_proto) create_var(
block, name, {name: np_value}, var_proto, is_calc_ref
)
) )
var_dict[var_name] = var_list var_dict[var_name] = var_list
else: else:
var_dict[var_name] = create_var(block, var_name, np_list, var_proto) var_dict[var_name] = create_var(
block, var_name, np_list, var_proto, is_calc_ref
)
return var_dict return var_dict
......
...@@ -87,3 +87,8 @@ NO_FP16_CHECK_GRAD_OP_LIST = [ ...@@ -87,3 +87,8 @@ NO_FP16_CHECK_GRAD_OP_LIST = [
'softmax', 'softmax',
'conv2d_transpose', 'conv2d_transpose',
] ]
NO_FP16_COMPARED_WITH_FP32_OP_LIST = [
'fake_quantize_moving_average_abs_max',
'p_norm',
]
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册