From 26187c27776e4c094228f5c573f19dba2d55e6a3 Mon Sep 17 00:00:00 2001
From: Qi Li <qili93@qq.com>
Date: Wed, 29 Jun 2022 13:56:37 +0800
Subject: [PATCH] Fix elementwise_div UT by providing user defined gradients
 (#43536) (#43909)

Cherry-pick of #43536

Backgroud in #43262

In elementwise_div UT, the numeric gradient (validation) has large relative error in comparison to analytic gradient (Paddle OP).

    The default rtol for UTs is 0.005
    The rtol for float32 and float64 elementwise_div OP is set to be 0.05
    The rtol for float16 and bfloat16 elementwise_div OP is set to be 1.0

The relative error is too large, so this PR provides user defined gradients to test elementwise_div followed by the analytic method.
---
 .../unittests/test_elementwise_div_op.py      | 452 +++++++++---------
 1 file changed, 235 insertions(+), 217 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
index 27dbd3752b5..98916c7a6ee 100644
--- a/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#  Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,277 +15,295 @@
 from __future__ import print_function
 import unittest
 import numpy as np
-import paddle
-import paddle.fluid as fluid
-import paddle.fluid.core as core
 from op_test import OpTest, skip_check_grad_ci, convert_float_to_uint16
+import paddle
+from paddle import fluid
+from paddle.fluid import core
 
 
 class ElementwiseDivOp(OpTest):
+
     def setUp(self):
         self.op_type = "elementwise_div"
         self.python_api = paddle.divide
-        self.dtype = np.float64
+        self.init_args()
         self.init_dtype()
-        """ Warning
-        CPU gradient check error!
-        'X': np.random.random((32,84)).astype("float32"),
-        'Y': np.random.random((32,84)).astype("float32")
-        """
+        self.init_shape()
+
+        x = self.gen_data(self.x_shape).astype(self.val_dtype)
+        y = self.gen_data(self.y_shape).astype(self.val_dtype)
+        out = self.compute_output(x, y).astype(self.val_dtype)
+        grad_out = np.ones(out.shape).astype(self.val_dtype)
+        grad_x = self.compute_gradient_x(grad_out, y).astype(self.val_dtype)
+        grad_y = self.compute_gradient_y(grad_out, out,
+                                         y).astype(self.val_dtype)
+
+        # Convert np.float32 data to np.uint16 for bfloat16 Paddle OP
+        if self.dtype == np.uint16:
+            x = convert_float_to_uint16(x)
+            y = convert_float_to_uint16(y)
+            out = convert_float_to_uint16(out)
+            grad_out = convert_float_to_uint16(grad_out)
+            grad_x = convert_float_to_uint16(grad_x)
+            grad_y = convert_float_to_uint16(grad_y)
+
+        self.inputs = {'X': x, 'Y': y}
+        self.outputs = {'Out': out}
+        self.grad_out = grad_out
+        self.grad_x = grad_x
+        self.grad_y = grad_y
+
+    def init_args(self):
+        self.check_dygraph = True
+        self.place = None
 
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype),
-            'Y': np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
-        }
-        self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
+    def init_dtype(self):
+        self.dtype = np.float64
+        self.val_dtype = np.float64
 
-    def check_eager(self):
-        return (not hasattr(self, "attrs") or (self.attrs["axis"] != -1))
+    def init_shape(self):
+        self.x_shape = [13, 17]
+        self.y_shape = [13, 17]
 
-    def test_check_output(self):
-        self.check_output(check_eager=False)
-
-    def test_check_grad_normal(self):
-        self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.05)
+    def gen_data(self, shape):
+        return np.random.uniform(0.1, 1, shape)
 
-    def test_check_grad_ingore_x(self):
-        self.check_grad(
-            ['Y'], 'Out', max_relative_error=0.05, no_grad_set=set("X"))
+    def compute_output(self, x, y):
+        return x / y
 
-    def test_check_grad_ingore_y(self):
-        self.check_grad(
-            ['X'], 'Out', max_relative_error=0.05, no_grad_set=set('Y'))
+    def compute_gradient_x(self, grad_out, y):
+        return grad_out / y
 
-    def init_dtype(self):
-        pass
+    def compute_gradient_y(self, grad_out, out, y):
+        return -1 * grad_out * out / y
 
+    def test_check_output(self):
+        if self.place is None:
+            self.check_output()
+        else:
+            self.check_output_with_place(self.place)
+
+    def test_check_gradient(self):
+        check_list = []
+        check_list.append({
+            'grad': ['X', 'Y'],
+            'no_grad': None,
+            'val_grad': [self.grad_x, self.grad_y]
+        })
+        check_list.append({
+            'grad': ['Y'],
+            'no_grad': set('X'),
+            'val_grad': [self.grad_y]
+        })
+        check_list.append({
+            'grad': ['X'],
+            'no_grad': set('Y'),
+            'val_grad': [self.grad_x]
+        })
+        for check_option in check_list:
+            check_args = [check_option['grad'], 'Out']
+            check_kwargs = {
+                'no_grad_set': check_option['no_grad'],
+                'user_defined_grads': check_option['val_grad'],
+                'user_defined_grad_outputs': [self.grad_out],
+                'check_dygraph': self.check_dygraph
+            }
+            if self.place is None:
+                self.check_grad(*check_args, **check_kwargs)
+            else:
+                check_args.insert(0, self.place)
+                self.check_grad_with_place(*check_args, **check_kwargs)
+
+
+@unittest.skipIf(not core.is_compiled_with_cuda()
+                 or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+                 "core is not compiled with CUDA or not support the bfloat16")
+class TestElementwiseDivOpBF16(ElementwiseDivOp):
+
+    def init_args(self):
+        # In due to output data type inconsistence of bfloat16 paddle op, we disable the dygraph check.
+        self.check_dygraph = False
+        self.place = core.CUDAPlace(0)
 
-@unittest.skipIf(not core.is_compiled_with_cuda() or
-                 not core.is_bfloat16_supported(core.CUDAPlace(0)),
-                 "core is not compiled with CUDA and not support the bfloat16")
-class TestElementwiseDivOpBF16(OpTest):
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
+    def init_dtype(self):
         self.dtype = np.uint16
+        self.val_dtype = np.float32
 
-        x = np.random.uniform(0.1, 1, [12, 13]).astype(np.float32)
-        y = np.random.uniform(0.1, 1, [12, 13]).astype(np.float32)
+    def init_shape(self):
+        self.x_shape = [12, 13]
+        self.y_shape = [12, 13]
 
-        out = np.divide(x, y)
 
-        self.inputs = {
-            'X': convert_float_to_uint16(x),
-            'Y': convert_float_to_uint16(y)
-        }
-        self.outputs = {'Out': convert_float_to_uint16(out)}
+@skip_check_grad_ci(
+    reason="[skip shape check] Use y_shape(1) to test broadcast.")
+class TestElementwiseDivOpScalar(ElementwiseDivOp):
 
-    def test_check_output(self):
-        place = core.CUDAPlace(0)
-        self.check_output_with_place(place)
+    def init_shape(self):
+        self.x_shape = [20, 3, 4]
+        self.y_shape = [1]
 
-    def test_check_grad_normal(self):
-        place = core.CUDAPlace(0)
-        self.check_grad_with_place(place, ['X', 'Y'], 'Out')
+    def compute_gradient_y(self, grad_out, out, y):
+        return np.array([np.sum(-1 * grad_out * out / y)])
 
-    def test_check_grad_ingore_x(self):
-        place = core.CUDAPlace(0)
-        self.check_grad_with_place(place, ['Y'], 'Out', no_grad_set=set("X"))
 
-    def test_check_grad_ingore_y(self):
-        place = core.CUDAPlace(0)
-        self.check_grad_with_place(place, ['X'], 'Out', no_grad_set=set('Y'))
+class TestElementwiseDivOpVector(ElementwiseDivOp):
 
+    def init_shape(self):
+        self.x_shape = [100]
+        self.y_shape = [100]
 
-@skip_check_grad_ci(
-    reason="[skip shape check] Use y_shape(1) to test broadcast.")
-class TestElementwiseDivOp_scalar(ElementwiseDivOp):
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [20, 3, 4]).astype(np.float64),
-            'Y': np.random.uniform(0.1, 1, [1]).astype(np.float64)
-        }
-        self.outputs = {'Out': self.inputs['X'] / self.inputs['Y']}
 
+class TestElementwiseDivOpBroadcast0(ElementwiseDivOp):
 
-class TestElementwiseDivOp_Vector(ElementwiseDivOp):
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [100]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [100]).astype("float64")
-        }
-        self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
+    def init_shape(self):
+        self.x_shape = [100, 3, 4]
+        self.y_shape = [100]
+        self.attrs = {'axis': 0}
 
+    def compute_output(self, x, y):
+        return x / y.reshape(100, 1, 1)
 
-class TestElementwiseDivOp_broadcast_0(ElementwiseDivOp):
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [100, 3, 4]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [100]).astype("float64")
-        }
+    def compute_gradient_x(self, grad_out, y):
+        return grad_out / y.reshape(100, 1, 1)
 
-        self.attrs = {'axis': 0}
-        self.outputs = {
-            'Out':
-            np.divide(self.inputs['X'], self.inputs['Y'].reshape(100, 1, 1))
-        }
+    def compute_gradient_y(self, grad_out, out, y):
+        return np.sum(-1 * grad_out * out / y.reshape(100, 1, 1), axis=(1, 2))
 
 
-class TestElementwiseDivOp_broadcast_1(ElementwiseDivOp):
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [2, 100, 4]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [100]).astype("float64")
-        }
+class TestElementwiseDivOpBroadcast1(ElementwiseDivOp):
 
+    def init_shape(self):
+        self.x_shape = [2, 100, 4]
+        self.y_shape = [100]
         self.attrs = {'axis': 1}
-        self.outputs = {
-            'Out':
-            np.divide(self.inputs['X'], self.inputs['Y'].reshape(1, 100, 1))
-        }
 
+    def compute_output(self, x, y):
+        return x / y.reshape(1, 100, 1)
 
-class TestElementwiseDivOp_broadcast_2(ElementwiseDivOp):
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [2, 3, 100]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [100]).astype("float64")
-        }
+    def compute_gradient_x(self, grad_out, y):
+        return grad_out / y.reshape(1, 100, 1)
 
-        self.outputs = {
-            'Out':
-            np.divide(self.inputs['X'], self.inputs['Y'].reshape(1, 1, 100))
-        }
+    def compute_gradient_y(self, grad_out, out, y):
+        return np.sum(-1 * grad_out * out / y.reshape(1, 100, 1), axis=(0, 2))
 
 
-class TestElementwiseDivOp_broadcast_3(ElementwiseDivOp):
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [2, 10, 12, 5]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [10, 12]).astype("float64")
-        }
+class TestElementwiseDivOpBroadcast2(ElementwiseDivOp):
 
+    def init_shape(self):
+        self.x_shape = [2, 3, 100]
+        self.y_shape = [100]
+
+    def compute_output(self, x, y):
+        return x / y.reshape(1, 1, 100)
+
+    def compute_gradient_x(self, grad_out, y):
+        return grad_out / y.reshape(1, 1, 100)
+
+    def compute_gradient_y(self, grad_out, out, y):
+        return np.sum(-1 * grad_out * out / y.reshape(1, 1, 100), axis=(0, 1))
+
+
+class TestElementwiseDivOpBroadcast3(ElementwiseDivOp):
+
+    def init_shape(self):
+        self.x_shape = [2, 10, 12, 5]
+        self.y_shape = [10, 12]
         self.attrs = {'axis': 1}
-        self.outputs = {
-            'Out':
-            np.divide(self.inputs['X'], self.inputs['Y'].reshape(1, 10, 12, 1))
-        }
 
+    def compute_output(self, x, y):
+        return x / y.reshape(1, 10, 12, 1)
 
-class TestElementwiseDivOp_broadcast_4(ElementwiseDivOp):
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [2, 3, 50]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [2, 1, 50]).astype("float64")
-        }
-        self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
+    def compute_gradient_x(self, grad_out, y):
+        return grad_out / y.reshape(1, 10, 12, 1)
 
+    def compute_gradient_y(self, grad_out, out, y):
+        return np.sum(-1 * grad_out * out / y.reshape(1, 10, 12, 1),
+                      axis=(0, 3))
 
-class TestElementwiseDivOp_broadcast_5(ElementwiseDivOp):
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [2, 3, 4, 20]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [2, 3, 1, 20]).astype("float64")
-        }
-        self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
 
+class TestElementwiseDivOpBroadcast4(ElementwiseDivOp):
 
-class TestElementwiseDivOp_commonuse_1(ElementwiseDivOp):
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [2, 3, 100]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [1, 1, 100]).astype("float64"),
-        }
-        self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
+    def init_shape(self):
+        self.x_shape = [2, 3, 50]
+        self.y_shape = [2, 1, 50]
 
+    def compute_gradient_y(self, grad_out, out, y):
+        return np.sum(-1 * grad_out * out / y, axis=(1)).reshape(2, 1, 50)
 
-class TestElementwiseDivOp_commonuse_2(ElementwiseDivOp):
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [30, 3, 1, 5]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [30, 1, 4, 1]).astype("float64"),
-        }
-        self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
 
+class TestElementwiseDivOpBroadcast5(ElementwiseDivOp):
 
-class TestElementwiseDivOp_xsize_lessthan_ysize(ElementwiseDivOp):
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [10, 12]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [2, 3, 10, 12]).astype("float64"),
-        }
+    def init_shape(self):
+        self.x_shape = [2, 3, 4, 20]
+        self.y_shape = [2, 3, 1, 20]
+
+    def compute_gradient_y(self, grad_out, out, y):
+        return np.sum(-1 * grad_out * out / y, axis=(2)).reshape(2, 3, 1, 20)
 
+
+class TestElementwiseDivOpCommonuse1(ElementwiseDivOp):
+
+    def init_shape(self):
+        self.x_shape = [2, 3, 100]
+        self.y_shape = [1, 1, 100]
+
+    def compute_gradient_y(self, grad_out, out, y):
+        return np.sum(-1 * grad_out * out / y, axis=(0, 1)).reshape(1, 1, 100)
+
+
+class TestElementwiseDivOpCommonuse2(ElementwiseDivOp):
+
+    def init_shape(self):
+        self.x_shape = [30, 3, 1, 5]
+        self.y_shape = [30, 1, 4, 1]
+
+    def compute_gradient_x(self, grad_out, y):
+        return np.sum(grad_out / y, axis=(2)).reshape(30, 3, 1, 5)
+
+    def compute_gradient_y(self, grad_out, out, y):
+        return np.sum(-1 * grad_out * out / y, axis=(1, 3)).reshape(30, 1, 4, 1)
+
+
+class TestElementwiseDivOpXsizeLessThanYsize(ElementwiseDivOp):
+
+    def init_shape(self):
+        self.x_shape = [10, 12]
+        self.y_shape = [2, 3, 10, 12]
         self.attrs = {'axis': 2}
 
-        self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
+    def compute_gradient_x(self, grad_out, y):
+        return np.sum(grad_out / y, axis=(0, 1))
 
 
-class TestElementwiseDivOp_INT(OpTest):
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
+class TestElementwiseDivOpInt(ElementwiseDivOp):
+
+    def init_dtype(self):
         self.dtype = np.int32
-        self.init_dtype()
-        self.inputs = {
-            'X': np.random.randint(
-                1, 5, size=[13, 17]).astype(self.dtype),
-            'Y': np.random.randint(
-                1, 5, size=[13, 17]).astype(self.dtype)
-        }
-        self.outputs = {'Out': self.inputs['X'] // self.inputs['Y']}
+        self.val_dtype = np.int32
 
-    def test_check_output(self):
-        self.check_output()
+    def gen_data(self, shape):
+        return np.random.randint(1, 5, size=shape)
 
-    def init_dtype(self):
-        pass
+    def compute_output(self, x, y):
+        return x // y
 
 
 @unittest.skipIf(not core.is_compiled_with_cuda(),
                  "core is not compiled with CUDA")
 class TestElementwiseDivOpFp16(ElementwiseDivOp):
+
     def init_dtype(self):
         self.dtype = np.float16
-
-    def test_check_grad_normal(self):
-        self.check_grad(['X', 'Y'], 'Out', max_relative_error=1)
-
-    def test_check_grad_ingore_x(self):
-        self.check_grad(
-            ['Y'], 'Out', max_relative_error=1, no_grad_set=set("X"))
-
-    def test_check_grad_ingore_y(self):
-        self.check_grad(
-            ['X'], 'Out', max_relative_error=1, no_grad_set=set('Y'))
+        self.val_dtype = np.float16
 
 
 class TestElementwiseDivBroadcast(unittest.TestCase):
+
     def test_shape_with_batch_sizes(self):
         with fluid.program_guard(fluid.Program()):
-            x_var = fluid.data(
-                name='x', dtype='float32', shape=[None, 3, None, None])
+            x_var = fluid.data(name='x',
+                               dtype='float32',
+                               shape=[None, 3, None, None])
             one = 2.
             out = one / x_var
             exe = fluid.Executor(fluid.CPUPlace())
@@ -295,6 +313,7 @@ class TestElementwiseDivBroadcast(unittest.TestCase):
 
 
 class TestDivideOp(unittest.TestCase):
+
     def test_name(self):
         with fluid.program_guard(fluid.Program()):
             x = fluid.data(name="x", shape=[2, 3], dtype="float32")
@@ -316,6 +335,7 @@ class TestDivideOp(unittest.TestCase):
 
 
 class TestComplexElementwiseDivOp(OpTest):
+
     def setUp(self):
         self.op_type = "elementwise_div"
         self.python_api = paddle.divide
@@ -352,30 +372,28 @@ class TestComplexElementwiseDivOp(OpTest):
         self.check_output(check_eager=False)
 
     def test_check_grad_normal(self):
-        self.check_grad(
-            ['X', 'Y'],
-            'Out',
-            user_defined_grads=[self.grad_x, self.grad_y],
-            user_defined_grad_outputs=[self.grad_out])
+        self.check_grad(['X', 'Y'],
+                        'Out',
+                        user_defined_grads=[self.grad_x, self.grad_y],
+                        user_defined_grad_outputs=[self.grad_out])
 
     def test_check_grad_ingore_x(self):
-        self.check_grad(
-            ['Y'],
-            'Out',
-            no_grad_set=set("X"),
-            user_defined_grads=[self.grad_y],
-            user_defined_grad_outputs=[self.grad_out])
+        self.check_grad(['Y'],
+                        'Out',
+                        no_grad_set=set("X"),
+                        user_defined_grads=[self.grad_y],
+                        user_defined_grad_outputs=[self.grad_out])
 
     def test_check_grad_ingore_y(self):
-        self.check_grad(
-            ['X'],
-            'Out',
-            no_grad_set=set('Y'),
-            user_defined_grads=[self.grad_x],
-            user_defined_grad_outputs=[self.grad_out])
+        self.check_grad(['X'],
+                        'Out',
+                        no_grad_set=set('Y'),
+                        user_defined_grads=[self.grad_x],
+                        user_defined_grad_outputs=[self.grad_out])
 
 
 class TestRealComplexElementwiseDivOp(TestComplexElementwiseDivOp):
+
     def init_input_output(self):
         self.x = np.random.random((2, 3, 4, 5)).astype(self.dtype)
         self.y = np.random.random(
-- 
GitLab