From 2c543193b192bf8386c6ce32bf2c228587ce73f5 Mon Sep 17 00:00:00 2001 From: Vvsmile <17864154871@163.com> Date: Mon, 20 Mar 2023 14:42:09 +0800 Subject: [PATCH] Adjust tolerance with modi grad (#51791) --- .../fluid/tests/unittests/eager_op_test.py | 184 ++++++++++++++++-- .../paddle/fluid/tests/unittests/op_test.py | 3 +- 2 files changed, 167 insertions(+), 20 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/eager_op_test.py b/python/paddle/fluid/tests/unittests/eager_op_test.py index 3aeb0bc87ce..737f0d7b9bc 100644 --- a/python/paddle/fluid/tests/unittests/eager_op_test.py +++ b/python/paddle/fluid/tests/unittests/eager_op_test.py @@ -332,6 +332,7 @@ class OpTest(unittest.TestCase): cls.dtype = None cls.outputs = {} cls.input_shape_is_large = True + cls.is_calc_ref = False cls.check_prim = False np.random.seed(123) @@ -463,6 +464,7 @@ class OpTest(unittest.TestCase): # Make sure this function is called after calling infer_dtype_from_inputs_outputs. return ( self.dtype == np.float16 + or self.dtype == "float16" or ( hasattr(self, 'output_dtype') and self.output_dtype == np.float16 @@ -492,6 +494,18 @@ class OpTest(unittest.TestCase): and self.attrs["use_xpu"] ) + def is_fp16_compared_with_fp32(self): + return self.is_float16_op() and ( + self.op_type + not in op_accuracy_white_list.NO_FP16_COMPARED_WITH_FP32_OP_LIST + ) + + def enable_cal_ref_output(self): + self.is_calc_ref = self.is_fp16_compared_with_fp32() + + def disable_cal_ref_output(self): + self.is_calc_ref = False + # set the self.output_dtype . def infer_dtype_from_inputs_outputs(self, inputs, outputs): def is_np_data(input): @@ -565,19 +579,49 @@ class OpTest(unittest.TestCase): tensor = core.LoDTensor() if isinstance(np_value, tuple): tensor.set(np_value[0], place) - tensor.set_recursive_sequence_lengths(np_value[1]) + dtype = np.array(np_value[1]).dtype + if self.is_calc_ref and dtype == np.float16: + if isinstance(np_value[1], list): + tensor.set_recursive_sequence_lengths( + np.array(np_value[1]).astype(np.float32) + ) + else: + tensor.set_recursive_sequence_lengths( + np_value[1].astype(np.float32) + ) + else: + tensor.set_recursive_sequence_lengths(np_value[1]) else: - tensor.set(np_value, place) + if self.is_calc_ref and np_value.dtype == np.float16: + tensor.set(np_value.astype(np.float32), place) + else: + tensor.set(np_value, place) feed_map[name] = tensor else: tensor = core.LoDTensor() if isinstance(self.inputs[var_name], tuple): tensor.set(self.inputs[var_name][0], place) - tensor.set_recursive_sequence_lengths( - self.inputs[var_name][1] - ) + if ( + self.is_calc_ref + and self.inputs[var_name][1].dtype == np.float16 + ): + tensor.set_recursive_sequence_lengths( + self.inputs[var_name][1].astype(np.float32) + ) + else: + tensor.set_recursive_sequence_lengths( + self.inputs[var_name][1] + ) else: - tensor.set(self.inputs[var_name], place) + if ( + self.is_calc_ref + and self.inputs[var_name].dtype == np.float16 + ): + tensor.set( + self.inputs[var_name].astype(np.float32), place + ) + else: + tensor.set(self.inputs[var_name], place) feed_map[var_name] = tensor return feed_map @@ -601,10 +645,10 @@ class OpTest(unittest.TestCase): else: self.infer_dtype_from_inputs_outputs(self.inputs, self.outputs) inputs = append_input_output( - block, op_proto, self.inputs, True, self.dtype + block, op_proto, self.inputs, True, self.dtype, self.is_calc_ref ) outputs = append_input_output( - block, op_proto, self.outputs, False, self.dtype + block, op_proto, self.outputs, False, self.dtype, self.is_calc_ref ) if hasattr(self, "cache_name_list"): @@ -724,7 +768,13 @@ class OpTest(unittest.TestCase): def append_input_output_for_dygraph( self, op_proto, np_list, is_input, if_return_inputs_grad_dict, block ): - def create_var(np_value, name, is_input, if_return_inputs_grad_dict): + def create_var( + np_value, + name, + is_input, + if_return_inputs_grad_dict, + is_calc_ref=False, + ): np_value_temp = np_value has_lod = False lod_temp = None @@ -734,7 +784,13 @@ class OpTest(unittest.TestCase): lod_temp = np_value[1] if is_input: - v = self._create_var_from_numpy(np_value_temp) + if self.is_calc_ref and np_value_temp.dtype == np.float16: + v = self._create_var_from_numpy( + np_value_temp.astype(np.float32) + ) + else: + v = self._create_var_from_numpy(np_value_temp) + if if_return_inputs_grad_dict: v.stop_gradient = False v.retain_grads() @@ -744,13 +800,22 @@ class OpTest(unittest.TestCase): lod_temp ) else: - v = block.create_var( - name=name, - dtype=np_value_temp.dtype, - type=core.VarDesc.VarType.LOD_TENSOR, - persistable=False, - stop_gradient=False, - ) + if self.is_calc_ref and np_value_temp.dtype == np.float16: + v = block.create_var( + name=name, + dtype=np.float32, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + stop_gradient=False, + ) + else: + v = block.create_var( + name=name, + dtype=np_value_temp.dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + stop_gradient=False, + ) return v # prepare variable for input or output @@ -779,7 +844,11 @@ class OpTest(unittest.TestCase): slot_name = name for (name, np_value) in np_list[name]: v = create_var( - np_value, name, is_input, if_return_inputs_grad_dict + np_value, + name, + is_input, + if_return_inputs_grad_dict, + self.is_calc_ref, ) var_list.append(v) if if_return_inputs_grad_dict: @@ -799,6 +868,7 @@ class OpTest(unittest.TestCase): name_temp, is_input, if_return_inputs_grad_dict, + self.is_calc_ref, ) var_dict[name].append(v) if if_return_inputs_grad_dict: @@ -1457,6 +1527,19 @@ class OpTest(unittest.TestCase): "Found failed {} {}".format(dygraph_outs.keys(), target_name), ) + def find_imperative_expect(target_name, dygraph_outs, place): + for name in dygraph_outs: + if name == target_name: + return dygraph_outs[name][0] + var_list = dygraph_outs[name] + for i, var in enumerate(var_list): + if var.name == target_name: + return dygraph_outs[name][i] + self.assertTrue( + False, + "Found failed {} {}".format(dygraph_outs.keys(), target_name), + ) + def find_actual(target_name, fetch_list): found = [ i @@ -1468,6 +1551,17 @@ class OpTest(unittest.TestCase): ) return found[0] + def find_expect(target_name, fetch_list): + found = [ + i + for i, var_name in enumerate(fetch_list) + if var_name == target_name + ] + self.assertTrue( + len(found) == 1, "Found {} {}".format(len(found), target_name) + ) + return found[0] + class Checker: """base class for check with self.outputs. currently don't support check between checkers. @@ -1505,6 +1599,10 @@ class OpTest(unittest.TestCase): """return: (actual_tensor(var_base), actual_numpy)""" raise NotImplementedError("base class, not implement!") + def find_expect_value(self, name): + """return: (expect_tensor(var_base), actual_numpy)""" + raise NotImplementedError("base class, not implement!") + def _compare_numpy(self, name, actual_np, expect_np): self.op_test.assertTrue( np.allclose( @@ -1528,7 +1626,13 @@ class OpTest(unittest.TestCase): def compare_single_output_with_expect(self, name, expect): actual, actual_np = self.find_actual_value(name) - expect_np = expect[0] if isinstance(expect, tuple) else expect + # expect_np = expect[0] if isinstance(expect, tuple) else expect + if self.op_test.is_fp16_compared_with_fp32(): + expect, expect_np = self.find_expect_value(name) + else: + expect_np = ( + expect[0] if isinstance(expect, tuple) else expect + ) actual_np, expect_np = self.convert_uint16_to_float_ifneed( actual_np, expect_np ) @@ -1580,6 +1684,14 @@ class OpTest(unittest.TestCase): ) self.outputs = outs self.fetch_list = fetch_list + if self.op_test.is_fp16_compared_with_fp32(): + self.op_test.enable_cal_ref_output() + ref_outs, ref_fetch_list = self.op_test._calc_output( + place, no_check_set=no_check_set + ) + self.op_test.disable_cal_ref_output() + self.ref_outputs = ref_outs + self.ref_fetch_list = ref_fetch_list def find_actual_value(self, name): idx = find_actual(name, self.fetch_list) @@ -1587,6 +1699,12 @@ class OpTest(unittest.TestCase): actual_t = np.array(actual) return actual, actual_t + def find_expect_value(self, name): + idx = find_expect(name, self.ref_fetch_list) + expect = self.ref_outputs[idx] + expect_t = np.array(expect) + return expect, expect_t + def convert_uint16_to_float_ifneed(self, actual_np, expect_np): """ judge whether convert current output and expect to uint16. @@ -1598,6 +1716,8 @@ class OpTest(unittest.TestCase): ]: actual_np = convert_uint16_to_float(actual_np) self.rtol = 1.0e-2 + elif actual_np.dtype == np.float16: + self.rtol = 1.0e-3 else: self.rtol = 1.0e-5 if ( @@ -1634,6 +1754,20 @@ class OpTest(unittest.TestCase): ) self.outputs = dygraph_outs + if self.op_test.is_fp16_compared_with_fp32(): + self.op_test.enable_cal_ref_output() + self.is_python_api_test = True + ref_dygraph_outs = self.op_test._calc_python_api_output( + place + ) + if ref_dygraph_outs is None: + self.is_python_api_test = False + ref_dygraph_outs = self.op_test._calc_dygraph_output( + place, no_check_set=no_check_set + ) + self.ref_outputs = ref_dygraph_outs + self.op_test.disable_cal_ref_output() + def _compare_numpy(self, name, actual_np, expect_np): if ( functools.reduce(lambda x, y: x * y, actual_np.shape, 1) @@ -1665,6 +1799,8 @@ class OpTest(unittest.TestCase): np.float64, ]: self.rtol = 1.0e-2 + elif actual_np.dtype == np.float16: + self.rtol = 1.0e-3 else: self.rtol = 1.0e-5 if self.op_test.is_bfloat16_op(): @@ -1684,6 +1820,16 @@ class OpTest(unittest.TestCase): ) return imperative_actual, imperative_actual_t + def find_expect_value(self, name): + with fluid.dygraph.base.guard(place=place): + imperative_expect = find_imperative_expect( + name, self.ref_outputs, place + ) + imperative_expect_t = np.array( + imperative_expect.value().get_tensor() + ) + return imperative_expect, imperative_expect_t + def _compare_list(self, name, actual, expect): """if expect is a tuple, we need to compare list.""" with fluid.dygraph.base.guard(place=place): diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index cdb521fe258..9edf5565519 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -465,6 +465,7 @@ class OpTest(unittest.TestCase): # Make sure this function is called after calling infer_dtype_from_inputs_outputs. return ( self.dtype == np.float16 + or self.dtype == "float16" or ( hasattr(self, 'output_dtype') and self.output_dtype == np.float16 @@ -1875,7 +1876,7 @@ class OpTest(unittest.TestCase): with _test_eager_guard(): return super().find_actual_value(name) - def find_expect_valur(self, name): + def find_expect_value(self, name): with _test_eager_guard(): return super().find_expect_value(name) -- GitLab