From 8077d79a9ded8ad41bed45573037fd081cddbc94 Mon Sep 17 00:00:00 2001 From: Qi Shao <17864154871@163.com> Date: Fri, 16 Jun 2023 10:54:10 +0800 Subject: [PATCH] [Cherry-Pick] Modify the bf16 accuracy checking framework in OpTest (#54658) * modify the bf16 accuracy checking framework in OpTest * modify the bf16 accuracy checking framework in OpTest * modify the bf16 accuracy checking framework in OpTest * modify the bf16 accuracy checking framework in OpTest * modify the bf16 accuracy checking framework in OpTest * modify the bf16 accuracy checking framework in OpTest --- test/legacy_test/eager_op_test.py | 122 ++++++++++++++++------ test/legacy_test/testsuite.py | 2 +- test/white_list/op_accuracy_white_list.py | 8 ++ 3 files changed, 100 insertions(+), 32 deletions(-) diff --git a/test/legacy_test/eager_op_test.py b/test/legacy_test/eager_op_test.py index b809d371c25..28763f31100 100644 --- a/test/legacy_test/eager_op_test.py +++ b/test/legacy_test/eager_op_test.py @@ -550,8 +550,17 @@ class OpTest(unittest.TestCase): not in op_accuracy_white_list.NO_FP16_COMPARED_WITH_FP32_OP_LIST ) + def is_bf16_compared_with_fp32(self): + return self.is_bfloat16_op() and ( + self.op_type + not in op_accuracy_white_list.NO_BF16_COMPARED_WITH_FP32_OP_LIST + ) + def enable_cal_ref_output(self): - self.is_calc_ref = self.is_fp16_compared_with_fp32() + self.is_calc_ref = ( + self.is_fp16_compared_with_fp32() + or self.is_bf16_compared_with_fp32() + ) def disable_cal_ref_output(self): self.is_calc_ref = False @@ -652,20 +661,47 @@ class OpTest(unittest.TestCase): if isinstance(np_value, tuple): tensor.set(np_value[0], place) dtype = np.array(np_value[1]).dtype - if self.is_calc_ref and dtype == np.float16: - if isinstance(np_value[1], list): - tensor.set_recursive_sequence_lengths( - np.array(np_value[1]).astype(np.float32) - ) + + if self.is_calc_ref: + # convert the float16 to float by numpy.astype + if dtype == np.float16: + if isinstance(np_value[1], list): + tensor.set_recursive_sequence_lengths( + np.array(np_value[1]).astype(np.float32) + ) + else: + tensor.set_recursive_sequence_lengths( + np_value[1].astype(np.float32) + ) + # convert the bfloat16 to float by convert_uint16_to_float + # provided in this file + elif dtype == np.uint16: + if isinstance(np_value[1], list): + tensor.set_recursive_sequence_lengths( + convert_uint16_to_float( + np.array(np_value[1]) + ) + ) + else: + tensor.set_recursive_sequence_lengths( + convert_uint16_to_float(np_value[1]) + ) else: tensor.set_recursive_sequence_lengths( - np_value[1].astype(np.float32) + np_value[1] ) else: tensor.set_recursive_sequence_lengths(np_value[1]) else: - if self.is_calc_ref and np_value.dtype == np.float16: - tensor.set(np_value.astype(np.float32), place) + if self.is_calc_ref: + if np_value.dtype == np.float16: + tensor.set(np_value.astype(np.float32), place) + elif np_value.dtype == np.uint16: + tensor.set( + convert_uint16_to_float(np_value), place + ) + else: + tensor.set(np_value, place) else: tensor.set(np_value, place) feed_map[name] = tensor @@ -673,25 +709,38 @@ class OpTest(unittest.TestCase): tensor = core.LoDTensor() if isinstance(self.inputs[var_name], tuple): tensor.set(self.inputs[var_name][0], place) - if ( - self.is_calc_ref - and self.inputs[var_name][1].dtype == np.float16 - ): - tensor.set_recursive_sequence_lengths( - self.inputs[var_name][1].astype(np.float32) - ) + if self.is_calc_ref: + if self.inputs[var_name][1].dtype == np.float16: + tensor.set_recursive_sequence_lengths( + self.inputs[var_name][1].astype(np.float32) + ) + elif self.inputs[var_name][1].dtype == np.uint16: + tensor.set_recursive_sequence_lengths( + convert_uint16_to_float( + self.inputs[var_name][1] + ) + ) + else: + tensor.set_recursive_sequence_lengths( + self.inputs[var_name][1] + ) else: tensor.set_recursive_sequence_lengths( self.inputs[var_name][1] ) else: - if ( - self.is_calc_ref - and self.inputs[var_name].dtype == np.float16 - ): - tensor.set( - self.inputs[var_name].astype(np.float32), place - ) + if self.is_calc_ref: + if self.inputs[var_name].dtype == np.float16: + tensor.set( + self.inputs[var_name].astype(np.float32), place + ) + elif self.inputs[var_name].dtype == np.uint16: + tensor.set( + convert_uint16_to_float(self.inputs[var_name]), + place, + ) + else: + tensor.set(self.inputs[var_name], place) else: tensor.set(self.inputs[var_name], place) feed_map[var_name] = tensor @@ -1761,7 +1810,10 @@ class OpTest(unittest.TestCase): def compare_single_output_with_expect(self, name, expect): actual, actual_np = self.find_actual_value(name) # expect_np = expect[0] if isinstance(expect, tuple) else expect - if self.op_test.is_fp16_compared_with_fp32(): + if ( + self.op_test.is_fp16_compared_with_fp32() + or self.op_test.is_bf16_compared_with_fp32() + ): expect, expect_np = self.find_expect_value(name) else: expect_np = ( @@ -1816,7 +1868,10 @@ class OpTest(unittest.TestCase): ) self.outputs = outs self.fetch_list = fetch_list - if self.op_test.is_fp16_compared_with_fp32(): + if ( + self.op_test.is_fp16_compared_with_fp32() + or self.op_test.is_bf16_compared_with_fp32() + ): self.op_test.enable_cal_ref_output() ref_outs, ref_fetch_list = self.op_test._calc_output( place, no_check_set=no_check_set @@ -1883,7 +1938,10 @@ class OpTest(unittest.TestCase): place, no_check_set=no_check_set ) self.outputs = dygraph_outs - if self.op_test.is_fp16_compared_with_fp32(): + if ( + self.op_test.is_fp16_compared_with_fp32() + or self.op_test.is_bf16_compared_with_fp32() + ): self.op_test.enable_cal_ref_output() self.is_python_api_test = True self.ref_outputs = self.op_test._calc_python_api_output( @@ -2228,9 +2286,8 @@ class OpTest(unittest.TestCase): atol=atol, equal_nan=False, err_msg=( - "Operator %s error, %s variable %s (shape: %s, dtype: %s) max gradient diff over limit" - ) - % ( + "Operator {} error, {} variable {} (shape: {}, dtype: {}) max gradient diff over limit" + ).format( self.op_type, msg_prefix, name, @@ -2486,7 +2543,10 @@ class OpTest(unittest.TestCase): if numeric_place is None: numeric_place = place - if user_defined_grads is None and self.is_fp16_compared_with_fp32(): + if user_defined_grads is None and ( + self.is_fp16_compared_with_fp32() + or self.is_bf16_compared_with_fp32() + ): self.enable_cal_ref_output() numeric_grads = self._get_gradient( inputs_to_check, @@ -2769,7 +2829,7 @@ class OpTest(unittest.TestCase): feed_dict = self.feed_var(inputs, place) if user_defined_grad_outputs is None: - if self.dtype == np.uint16: + if self.dtype == np.uint16 and not self.is_calc_ref: cast_inputs = list(map(block.var, output_names)) if self.op_type in ["broadcast_tensors", "meshgrid"]: output_names = self.cast_bf16_output(block, cast_inputs) diff --git a/test/legacy_test/testsuite.py b/test/legacy_test/testsuite.py index d4ab629ea2b..9f724ee22d4 100644 --- a/test/legacy_test/testsuite.py +++ b/test/legacy_test/testsuite.py @@ -120,7 +120,7 @@ def append_input_output( if is_input: shape = list(np_value.shape) lod_level = 0 - if is_calc_ref and dtype == np.float16: + if is_calc_ref and (dtype == np.float16 or dtype == np.uint16): dtype = np.float32 return block.create_var( dtype=dtype, shape=shape, lod_level=lod_level, name=name diff --git a/test/white_list/op_accuracy_white_list.py b/test/white_list/op_accuracy_white_list.py index d7613f7b284..1f539fc3c7b 100644 --- a/test/white_list/op_accuracy_white_list.py +++ b/test/white_list/op_accuracy_white_list.py @@ -94,3 +94,11 @@ NO_FP16_COMPARED_WITH_FP32_OP_LIST = [ 'fake_quantize_moving_average_abs_max', 'p_norm', ] + + +NO_BF16_COMPARED_WITH_FP32_OP_LIST = [ + 'unique', + 'fusion_gru', + 'fusion_lstm', + 'dequantize', +] -- GitLab