From d4b44015e576e0242ccabd0e40570e39002b5967 Mon Sep 17 00:00:00 2001 From: Leo Chen <39020268+leo0519@users.noreply.github.com> Date: Thu, 23 Jun 2022 17:05:44 +0800 Subject: [PATCH] Fix elementwise_div UT by providing user defined gradients (#43536) --- .../unittests/test_elementwise_div_op.py | 399 +++++++++--------- 1 file changed, 197 insertions(+), 202 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py index d522a9d0cde..98916c7a6ee 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,10 +15,10 @@ from __future__ import print_function import unittest import numpy as np -import paddle -import paddle.fluid as fluid -import paddle.fluid.core as core from op_test import OpTest, skip_check_grad_ci, convert_float_to_uint16 +import paddle +from paddle import fluid +from paddle.fluid import core class ElementwiseDivOp(OpTest): @@ -26,257 +26,266 @@ class ElementwiseDivOp(OpTest): def setUp(self): self.op_type = "elementwise_div" self.python_api = paddle.divide - self.dtype = np.float64 + self.init_args() self.init_dtype() - """ Warning - CPU gradient check error! - 'X': np.random.random((32,84)).astype("float32"), - 'Y': np.random.random((32,84)).astype("float32") - """ + self.init_shape() + + x = self.gen_data(self.x_shape).astype(self.val_dtype) + y = self.gen_data(self.y_shape).astype(self.val_dtype) + out = self.compute_output(x, y).astype(self.val_dtype) + grad_out = np.ones(out.shape).astype(self.val_dtype) + grad_x = self.compute_gradient_x(grad_out, y).astype(self.val_dtype) + grad_y = self.compute_gradient_y(grad_out, out, + y).astype(self.val_dtype) + + # Convert np.float32 data to np.uint16 for bfloat16 Paddle OP + if self.dtype == np.uint16: + x = convert_float_to_uint16(x) + y = convert_float_to_uint16(y) + out = convert_float_to_uint16(out) + grad_out = convert_float_to_uint16(grad_out) + grad_x = convert_float_to_uint16(grad_x) + grad_y = convert_float_to_uint16(grad_y) + + self.inputs = {'X': x, 'Y': y} + self.outputs = {'Out': out} + self.grad_out = grad_out + self.grad_x = grad_x + self.grad_y = grad_y + + def init_args(self): + self.check_dygraph = True + self.place = None - self.inputs = { - 'X': np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype), - 'Y': np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) - } - self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])} + def init_dtype(self): + self.dtype = np.float64 + self.val_dtype = np.float64 - def check_eager(self): - return (not hasattr(self, "attrs") or (self.attrs["axis"] != -1)) + def init_shape(self): + self.x_shape = [13, 17] + self.y_shape = [13, 17] - def test_check_output(self): - self.check_output(check_eager=False) + def gen_data(self, shape): + return np.random.uniform(0.1, 1, shape) - def test_check_grad_normal(self): - self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.05) + def compute_output(self, x, y): + return x / y - def test_check_grad_ingore_x(self): - self.check_grad(['Y'], - 'Out', - max_relative_error=0.05, - no_grad_set=set("X")) + def compute_gradient_x(self, grad_out, y): + return grad_out / y - def test_check_grad_ingore_y(self): - self.check_grad(['X'], - 'Out', - max_relative_error=0.05, - no_grad_set=set('Y')) + def compute_gradient_y(self, grad_out, out, y): + return -1 * grad_out * out / y - def init_dtype(self): - pass + def test_check_output(self): + if self.place is None: + self.check_output() + else: + self.check_output_with_place(self.place) + + def test_check_gradient(self): + check_list = [] + check_list.append({ + 'grad': ['X', 'Y'], + 'no_grad': None, + 'val_grad': [self.grad_x, self.grad_y] + }) + check_list.append({ + 'grad': ['Y'], + 'no_grad': set('X'), + 'val_grad': [self.grad_y] + }) + check_list.append({ + 'grad': ['X'], + 'no_grad': set('Y'), + 'val_grad': [self.grad_x] + }) + for check_option in check_list: + check_args = [check_option['grad'], 'Out'] + check_kwargs = { + 'no_grad_set': check_option['no_grad'], + 'user_defined_grads': check_option['val_grad'], + 'user_defined_grad_outputs': [self.grad_out], + 'check_dygraph': self.check_dygraph + } + if self.place is None: + self.check_grad(*check_args, **check_kwargs) + else: + check_args.insert(0, self.place) + self.check_grad_with_place(*check_args, **check_kwargs) @unittest.skipIf(not core.is_compiled_with_cuda() or not core.is_bfloat16_supported(core.CUDAPlace(0)), - "core is not compiled with CUDA and not support the bfloat16") -class TestElementwiseDivOpBF16(OpTest): + "core is not compiled with CUDA or not support the bfloat16") +class TestElementwiseDivOpBF16(ElementwiseDivOp): - def setUp(self): - self.op_type = "elementwise_div" - self.python_api = paddle.divide + def init_args(self): + # In due to output data type inconsistence of bfloat16 paddle op, we disable the dygraph check. + self.check_dygraph = False + self.place = core.CUDAPlace(0) + + def init_dtype(self): self.dtype = np.uint16 + self.val_dtype = np.float32 - x = np.random.uniform(0.1, 1, [12, 13]).astype(np.float32) - y = np.random.uniform(0.1, 1, [12, 13]).astype(np.float32) + def init_shape(self): + self.x_shape = [12, 13] + self.y_shape = [12, 13] - out = np.divide(x, y) - self.inputs = { - 'X': convert_float_to_uint16(x), - 'Y': convert_float_to_uint16(y) - } - self.outputs = {'Out': convert_float_to_uint16(out)} +@skip_check_grad_ci( + reason="[skip shape check] Use y_shape(1) to test broadcast.") +class TestElementwiseDivOpScalar(ElementwiseDivOp): - def test_check_output(self): - place = core.CUDAPlace(0) - self.check_output_with_place(place) + def init_shape(self): + self.x_shape = [20, 3, 4] + self.y_shape = [1] - def test_check_grad_normal(self): - place = core.CUDAPlace(0) - self.check_grad_with_place(place, ['X', 'Y'], 'Out') + def compute_gradient_y(self, grad_out, out, y): + return np.array([np.sum(-1 * grad_out * out / y)]) - def test_check_grad_ingore_x(self): - place = core.CUDAPlace(0) - self.check_grad_with_place(place, ['Y'], 'Out', no_grad_set=set("X")) - def test_check_grad_ingore_y(self): - place = core.CUDAPlace(0) - self.check_grad_with_place(place, ['X'], 'Out', no_grad_set=set('Y')) +class TestElementwiseDivOpVector(ElementwiseDivOp): + def init_shape(self): + self.x_shape = [100] + self.y_shape = [100] -@skip_check_grad_ci( - reason="[skip shape check] Use y_shape(1) to test broadcast.") -class TestElementwiseDivOp_scalar(ElementwiseDivOp): - def setUp(self): - self.op_type = "elementwise_div" - self.python_api = paddle.divide - self.inputs = { - 'X': np.random.uniform(0.1, 1, [20, 3, 4]).astype(np.float64), - 'Y': np.random.uniform(0.1, 1, [1]).astype(np.float64) - } - self.outputs = {'Out': self.inputs['X'] / self.inputs['Y']} +class TestElementwiseDivOpBroadcast0(ElementwiseDivOp): + def init_shape(self): + self.x_shape = [100, 3, 4] + self.y_shape = [100] + self.attrs = {'axis': 0} -class TestElementwiseDivOp_Vector(ElementwiseDivOp): + def compute_output(self, x, y): + return x / y.reshape(100, 1, 1) - def setUp(self): - self.op_type = "elementwise_div" - self.python_api = paddle.divide - self.inputs = { - 'X': np.random.uniform(0.1, 1, [100]).astype("float64"), - 'Y': np.random.uniform(0.1, 1, [100]).astype("float64") - } - self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])} + def compute_gradient_x(self, grad_out, y): + return grad_out / y.reshape(100, 1, 1) + def compute_gradient_y(self, grad_out, out, y): + return np.sum(-1 * grad_out * out / y.reshape(100, 1, 1), axis=(1, 2)) -class TestElementwiseDivOp_broadcast_0(ElementwiseDivOp): - def setUp(self): - self.op_type = "elementwise_div" - self.python_api = paddle.divide - self.inputs = { - 'X': np.random.uniform(0.1, 1, [100, 3, 4]).astype("float64"), - 'Y': np.random.uniform(0.1, 1, [100]).astype("float64") - } +class TestElementwiseDivOpBroadcast1(ElementwiseDivOp): - self.attrs = {'axis': 0} - self.outputs = { - 'Out': np.divide(self.inputs['X'], - self.inputs['Y'].reshape(100, 1, 1)) - } + def init_shape(self): + self.x_shape = [2, 100, 4] + self.y_shape = [100] + self.attrs = {'axis': 1} + def compute_output(self, x, y): + return x / y.reshape(1, 100, 1) -class TestElementwiseDivOp_broadcast_1(ElementwiseDivOp): + def compute_gradient_x(self, grad_out, y): + return grad_out / y.reshape(1, 100, 1) - def setUp(self): - self.op_type = "elementwise_div" - self.python_api = paddle.divide - self.inputs = { - 'X': np.random.uniform(0.1, 1, [2, 100, 4]).astype("float64"), - 'Y': np.random.uniform(0.1, 1, [100]).astype("float64") - } + def compute_gradient_y(self, grad_out, out, y): + return np.sum(-1 * grad_out * out / y.reshape(1, 100, 1), axis=(0, 2)) - self.attrs = {'axis': 1} - self.outputs = { - 'Out': np.divide(self.inputs['X'], - self.inputs['Y'].reshape(1, 100, 1)) - } +class TestElementwiseDivOpBroadcast2(ElementwiseDivOp): -class TestElementwiseDivOp_broadcast_2(ElementwiseDivOp): + def init_shape(self): + self.x_shape = [2, 3, 100] + self.y_shape = [100] - def setUp(self): - self.op_type = "elementwise_div" - self.python_api = paddle.divide - self.inputs = { - 'X': np.random.uniform(0.1, 1, [2, 3, 100]).astype("float64"), - 'Y': np.random.uniform(0.1, 1, [100]).astype("float64") - } + def compute_output(self, x, y): + return x / y.reshape(1, 1, 100) - self.outputs = { - 'Out': np.divide(self.inputs['X'], - self.inputs['Y'].reshape(1, 1, 100)) - } + def compute_gradient_x(self, grad_out, y): + return grad_out / y.reshape(1, 1, 100) + def compute_gradient_y(self, grad_out, out, y): + return np.sum(-1 * grad_out * out / y.reshape(1, 1, 100), axis=(0, 1)) -class TestElementwiseDivOp_broadcast_3(ElementwiseDivOp): - def setUp(self): - self.op_type = "elementwise_div" - self.python_api = paddle.divide - self.inputs = { - 'X': np.random.uniform(0.1, 1, [2, 10, 12, 5]).astype("float64"), - 'Y': np.random.uniform(0.1, 1, [10, 12]).astype("float64") - } +class TestElementwiseDivOpBroadcast3(ElementwiseDivOp): + def init_shape(self): + self.x_shape = [2, 10, 12, 5] + self.y_shape = [10, 12] self.attrs = {'axis': 1} - self.outputs = { - 'Out': - np.divide(self.inputs['X'], self.inputs['Y'].reshape(1, 10, 12, 1)) - } + def compute_output(self, x, y): + return x / y.reshape(1, 10, 12, 1) -class TestElementwiseDivOp_broadcast_4(ElementwiseDivOp): + def compute_gradient_x(self, grad_out, y): + return grad_out / y.reshape(1, 10, 12, 1) - def setUp(self): - self.op_type = "elementwise_div" - self.python_api = paddle.divide - self.inputs = { - 'X': np.random.uniform(0.1, 1, [2, 3, 50]).astype("float64"), - 'Y': np.random.uniform(0.1, 1, [2, 1, 50]).astype("float64") - } - self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])} + def compute_gradient_y(self, grad_out, out, y): + return np.sum(-1 * grad_out * out / y.reshape(1, 10, 12, 1), + axis=(0, 3)) -class TestElementwiseDivOp_broadcast_5(ElementwiseDivOp): +class TestElementwiseDivOpBroadcast4(ElementwiseDivOp): - def setUp(self): - self.op_type = "elementwise_div" - self.python_api = paddle.divide - self.inputs = { - 'X': np.random.uniform(0.1, 1, [2, 3, 4, 20]).astype("float64"), - 'Y': np.random.uniform(0.1, 1, [2, 3, 1, 20]).astype("float64") - } - self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])} + def init_shape(self): + self.x_shape = [2, 3, 50] + self.y_shape = [2, 1, 50] + def compute_gradient_y(self, grad_out, out, y): + return np.sum(-1 * grad_out * out / y, axis=(1)).reshape(2, 1, 50) -class TestElementwiseDivOp_commonuse_1(ElementwiseDivOp): - def setUp(self): - self.op_type = "elementwise_div" - self.python_api = paddle.divide - self.inputs = { - 'X': np.random.uniform(0.1, 1, [2, 3, 100]).astype("float64"), - 'Y': np.random.uniform(0.1, 1, [1, 1, 100]).astype("float64"), - } - self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])} +class TestElementwiseDivOpBroadcast5(ElementwiseDivOp): + def init_shape(self): + self.x_shape = [2, 3, 4, 20] + self.y_shape = [2, 3, 1, 20] -class TestElementwiseDivOp_commonuse_2(ElementwiseDivOp): + def compute_gradient_y(self, grad_out, out, y): + return np.sum(-1 * grad_out * out / y, axis=(2)).reshape(2, 3, 1, 20) - def setUp(self): - self.op_type = "elementwise_div" - self.python_api = paddle.divide - self.inputs = { - 'X': np.random.uniform(0.1, 1, [30, 3, 1, 5]).astype("float64"), - 'Y': np.random.uniform(0.1, 1, [30, 1, 4, 1]).astype("float64"), - } - self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])} +class TestElementwiseDivOpCommonuse1(ElementwiseDivOp): -class TestElementwiseDivOp_xsize_lessthan_ysize(ElementwiseDivOp): + def init_shape(self): + self.x_shape = [2, 3, 100] + self.y_shape = [1, 1, 100] - def setUp(self): - self.op_type = "elementwise_div" - self.python_api = paddle.divide - self.inputs = { - 'X': np.random.uniform(0.1, 1, [10, 12]).astype("float64"), - 'Y': np.random.uniform(0.1, 1, [2, 3, 10, 12]).astype("float64"), - } + def compute_gradient_y(self, grad_out, out, y): + return np.sum(-1 * grad_out * out / y, axis=(0, 1)).reshape(1, 1, 100) + + +class TestElementwiseDivOpCommonuse2(ElementwiseDivOp): + + def init_shape(self): + self.x_shape = [30, 3, 1, 5] + self.y_shape = [30, 1, 4, 1] + + def compute_gradient_x(self, grad_out, y): + return np.sum(grad_out / y, axis=(2)).reshape(30, 3, 1, 5) + def compute_gradient_y(self, grad_out, out, y): + return np.sum(-1 * grad_out * out / y, axis=(1, 3)).reshape(30, 1, 4, 1) + + +class TestElementwiseDivOpXsizeLessThanYsize(ElementwiseDivOp): + + def init_shape(self): + self.x_shape = [10, 12] + self.y_shape = [2, 3, 10, 12] self.attrs = {'axis': 2} - self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])} + def compute_gradient_x(self, grad_out, y): + return np.sum(grad_out / y, axis=(0, 1)) -class TestElementwiseDivOp_INT(OpTest): +class TestElementwiseDivOpInt(ElementwiseDivOp): - def setUp(self): - self.op_type = "elementwise_div" - self.python_api = paddle.divide + def init_dtype(self): self.dtype = np.int32 - self.init_dtype() - self.inputs = { - 'X': np.random.randint(1, 5, size=[13, 17]).astype(self.dtype), - 'Y': np.random.randint(1, 5, size=[13, 17]).astype(self.dtype) - } - self.outputs = {'Out': self.inputs['X'] // self.inputs['Y']} + self.val_dtype = np.int32 - def test_check_output(self): - self.check_output() + def gen_data(self, shape): + return np.random.randint(1, 5, size=shape) - def init_dtype(self): - pass + def compute_output(self, x, y): + return x // y @unittest.skipIf(not core.is_compiled_with_cuda(), @@ -285,21 +294,7 @@ class TestElementwiseDivOpFp16(ElementwiseDivOp): def init_dtype(self): self.dtype = np.float16 - - def test_check_grad_normal(self): - self.check_grad(['X', 'Y'], 'Out', max_relative_error=1) - - def test_check_grad_ingore_x(self): - self.check_grad(['Y'], - 'Out', - max_relative_error=1, - no_grad_set=set("X")) - - def test_check_grad_ingore_y(self): - self.check_grad(['X'], - 'Out', - max_relative_error=1, - no_grad_set=set('Y')) + self.val_dtype = np.float16 class TestElementwiseDivBroadcast(unittest.TestCase): -- GitLab