From d4b44015e576e0242ccabd0e40570e39002b5967 Mon Sep 17 00:00:00 2001
From: Leo Chen <39020268+leo0519@users.noreply.github.com>
Date: Thu, 23 Jun 2022 17:05:44 +0800
Subject: [PATCH] Fix elementwise_div UT by providing user defined gradients
 (#43536)

---
 .../unittests/test_elementwise_div_op.py      | 399 +++++++++---------
 1 file changed, 197 insertions(+), 202 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
index d522a9d0cd..98916c7a6e 100644
--- a/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#  Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,10 +15,10 @@
 from __future__ import print_function
 import unittest
 import numpy as np
-import paddle
-import paddle.fluid as fluid
-import paddle.fluid.core as core
 from op_test import OpTest, skip_check_grad_ci, convert_float_to_uint16
+import paddle
+from paddle import fluid
+from paddle.fluid import core
 
 
 class ElementwiseDivOp(OpTest):
@@ -26,257 +26,266 @@ class ElementwiseDivOp(OpTest):
     def setUp(self):
         self.op_type = "elementwise_div"
         self.python_api = paddle.divide
-        self.dtype = np.float64
+        self.init_args()
         self.init_dtype()
-        """ Warning
-        CPU gradient check error!
-        'X': np.random.random((32,84)).astype("float32"),
-        'Y': np.random.random((32,84)).astype("float32")
-        """
+        self.init_shape()
+
+        x = self.gen_data(self.x_shape).astype(self.val_dtype)
+        y = self.gen_data(self.y_shape).astype(self.val_dtype)
+        out = self.compute_output(x, y).astype(self.val_dtype)
+        grad_out = np.ones(out.shape).astype(self.val_dtype)
+        grad_x = self.compute_gradient_x(grad_out, y).astype(self.val_dtype)
+        grad_y = self.compute_gradient_y(grad_out, out,
+                                         y).astype(self.val_dtype)
+
+        # Convert np.float32 data to np.uint16 for bfloat16 Paddle OP
+        if self.dtype == np.uint16:
+            x = convert_float_to_uint16(x)
+            y = convert_float_to_uint16(y)
+            out = convert_float_to_uint16(out)
+            grad_out = convert_float_to_uint16(grad_out)
+            grad_x = convert_float_to_uint16(grad_x)
+            grad_y = convert_float_to_uint16(grad_y)
+
+        self.inputs = {'X': x, 'Y': y}
+        self.outputs = {'Out': out}
+        self.grad_out = grad_out
+        self.grad_x = grad_x
+        self.grad_y = grad_y
+
+    def init_args(self):
+        self.check_dygraph = True
+        self.place = None
 
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype),
-            'Y': np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
-        }
-        self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
+    def init_dtype(self):
+        self.dtype = np.float64
+        self.val_dtype = np.float64
 
-    def check_eager(self):
-        return (not hasattr(self, "attrs") or (self.attrs["axis"] != -1))
+    def init_shape(self):
+        self.x_shape = [13, 17]
+        self.y_shape = [13, 17]
 
-    def test_check_output(self):
-        self.check_output(check_eager=False)
+    def gen_data(self, shape):
+        return np.random.uniform(0.1, 1, shape)
 
-    def test_check_grad_normal(self):
-        self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.05)
+    def compute_output(self, x, y):
+        return x / y
 
-    def test_check_grad_ingore_x(self):
-        self.check_grad(['Y'],
-                        'Out',
-                        max_relative_error=0.05,
-                        no_grad_set=set("X"))
+    def compute_gradient_x(self, grad_out, y):
+        return grad_out / y
 
-    def test_check_grad_ingore_y(self):
-        self.check_grad(['X'],
-                        'Out',
-                        max_relative_error=0.05,
-                        no_grad_set=set('Y'))
+    def compute_gradient_y(self, grad_out, out, y):
+        return -1 * grad_out * out / y
 
-    def init_dtype(self):
-        pass
+    def test_check_output(self):
+        if self.place is None:
+            self.check_output()
+        else:
+            self.check_output_with_place(self.place)
+
+    def test_check_gradient(self):
+        check_list = []
+        check_list.append({
+            'grad': ['X', 'Y'],
+            'no_grad': None,
+            'val_grad': [self.grad_x, self.grad_y]
+        })
+        check_list.append({
+            'grad': ['Y'],
+            'no_grad': set('X'),
+            'val_grad': [self.grad_y]
+        })
+        check_list.append({
+            'grad': ['X'],
+            'no_grad': set('Y'),
+            'val_grad': [self.grad_x]
+        })
+        for check_option in check_list:
+            check_args = [check_option['grad'], 'Out']
+            check_kwargs = {
+                'no_grad_set': check_option['no_grad'],
+                'user_defined_grads': check_option['val_grad'],
+                'user_defined_grad_outputs': [self.grad_out],
+                'check_dygraph': self.check_dygraph
+            }
+            if self.place is None:
+                self.check_grad(*check_args, **check_kwargs)
+            else:
+                check_args.insert(0, self.place)
+                self.check_grad_with_place(*check_args, **check_kwargs)
 
 
 @unittest.skipIf(not core.is_compiled_with_cuda()
                  or not core.is_bfloat16_supported(core.CUDAPlace(0)),
-                 "core is not compiled with CUDA and not support the bfloat16")
-class TestElementwiseDivOpBF16(OpTest):
+                 "core is not compiled with CUDA or not support the bfloat16")
+class TestElementwiseDivOpBF16(ElementwiseDivOp):
 
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
+    def init_args(self):
+        # In due to output data type inconsistence of bfloat16 paddle op, we disable the dygraph check.
+        self.check_dygraph = False
+        self.place = core.CUDAPlace(0)
+
+    def init_dtype(self):
         self.dtype = np.uint16
+        self.val_dtype = np.float32
 
-        x = np.random.uniform(0.1, 1, [12, 13]).astype(np.float32)
-        y = np.random.uniform(0.1, 1, [12, 13]).astype(np.float32)
+    def init_shape(self):
+        self.x_shape = [12, 13]
+        self.y_shape = [12, 13]
 
-        out = np.divide(x, y)
 
-        self.inputs = {
-            'X': convert_float_to_uint16(x),
-            'Y': convert_float_to_uint16(y)
-        }
-        self.outputs = {'Out': convert_float_to_uint16(out)}
+@skip_check_grad_ci(
+    reason="[skip shape check] Use y_shape(1) to test broadcast.")
+class TestElementwiseDivOpScalar(ElementwiseDivOp):
 
-    def test_check_output(self):
-        place = core.CUDAPlace(0)
-        self.check_output_with_place(place)
+    def init_shape(self):
+        self.x_shape = [20, 3, 4]
+        self.y_shape = [1]
 
-    def test_check_grad_normal(self):
-        place = core.CUDAPlace(0)
-        self.check_grad_with_place(place, ['X', 'Y'], 'Out')
+    def compute_gradient_y(self, grad_out, out, y):
+        return np.array([np.sum(-1 * grad_out * out / y)])
 
-    def test_check_grad_ingore_x(self):
-        place = core.CUDAPlace(0)
-        self.check_grad_with_place(place, ['Y'], 'Out', no_grad_set=set("X"))
 
-    def test_check_grad_ingore_y(self):
-        place = core.CUDAPlace(0)
-        self.check_grad_with_place(place, ['X'], 'Out', no_grad_set=set('Y'))
+class TestElementwiseDivOpVector(ElementwiseDivOp):
 
+    def init_shape(self):
+        self.x_shape = [100]
+        self.y_shape = [100]
 
-@skip_check_grad_ci(
-    reason="[skip shape check] Use y_shape(1) to test broadcast.")
-class TestElementwiseDivOp_scalar(ElementwiseDivOp):
 
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [20, 3, 4]).astype(np.float64),
-            'Y': np.random.uniform(0.1, 1, [1]).astype(np.float64)
-        }
-        self.outputs = {'Out': self.inputs['X'] / self.inputs['Y']}
+class TestElementwiseDivOpBroadcast0(ElementwiseDivOp):
 
+    def init_shape(self):
+        self.x_shape = [100, 3, 4]
+        self.y_shape = [100]
+        self.attrs = {'axis': 0}
 
-class TestElementwiseDivOp_Vector(ElementwiseDivOp):
+    def compute_output(self, x, y):
+        return x / y.reshape(100, 1, 1)
 
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [100]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [100]).astype("float64")
-        }
-        self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
+    def compute_gradient_x(self, grad_out, y):
+        return grad_out / y.reshape(100, 1, 1)
 
+    def compute_gradient_y(self, grad_out, out, y):
+        return np.sum(-1 * grad_out * out / y.reshape(100, 1, 1), axis=(1, 2))
 
-class TestElementwiseDivOp_broadcast_0(ElementwiseDivOp):
 
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [100, 3, 4]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [100]).astype("float64")
-        }
+class TestElementwiseDivOpBroadcast1(ElementwiseDivOp):
 
-        self.attrs = {'axis': 0}
-        self.outputs = {
-            'Out': np.divide(self.inputs['X'],
-                             self.inputs['Y'].reshape(100, 1, 1))
-        }
+    def init_shape(self):
+        self.x_shape = [2, 100, 4]
+        self.y_shape = [100]
+        self.attrs = {'axis': 1}
 
+    def compute_output(self, x, y):
+        return x / y.reshape(1, 100, 1)
 
-class TestElementwiseDivOp_broadcast_1(ElementwiseDivOp):
+    def compute_gradient_x(self, grad_out, y):
+        return grad_out / y.reshape(1, 100, 1)
 
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [2, 100, 4]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [100]).astype("float64")
-        }
+    def compute_gradient_y(self, grad_out, out, y):
+        return np.sum(-1 * grad_out * out / y.reshape(1, 100, 1), axis=(0, 2))
 
-        self.attrs = {'axis': 1}
-        self.outputs = {
-            'Out': np.divide(self.inputs['X'],
-                             self.inputs['Y'].reshape(1, 100, 1))
-        }
 
+class TestElementwiseDivOpBroadcast2(ElementwiseDivOp):
 
-class TestElementwiseDivOp_broadcast_2(ElementwiseDivOp):
+    def init_shape(self):
+        self.x_shape = [2, 3, 100]
+        self.y_shape = [100]
 
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [2, 3, 100]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [100]).astype("float64")
-        }
+    def compute_output(self, x, y):
+        return x / y.reshape(1, 1, 100)
 
-        self.outputs = {
-            'Out': np.divide(self.inputs['X'],
-                             self.inputs['Y'].reshape(1, 1, 100))
-        }
+    def compute_gradient_x(self, grad_out, y):
+        return grad_out / y.reshape(1, 1, 100)
 
+    def compute_gradient_y(self, grad_out, out, y):
+        return np.sum(-1 * grad_out * out / y.reshape(1, 1, 100), axis=(0, 1))
 
-class TestElementwiseDivOp_broadcast_3(ElementwiseDivOp):
 
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [2, 10, 12, 5]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [10, 12]).astype("float64")
-        }
+class TestElementwiseDivOpBroadcast3(ElementwiseDivOp):
 
+    def init_shape(self):
+        self.x_shape = [2, 10, 12, 5]
+        self.y_shape = [10, 12]
         self.attrs = {'axis': 1}
-        self.outputs = {
-            'Out':
-            np.divide(self.inputs['X'], self.inputs['Y'].reshape(1, 10, 12, 1))
-        }
 
+    def compute_output(self, x, y):
+        return x / y.reshape(1, 10, 12, 1)
 
-class TestElementwiseDivOp_broadcast_4(ElementwiseDivOp):
+    def compute_gradient_x(self, grad_out, y):
+        return grad_out / y.reshape(1, 10, 12, 1)
 
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [2, 3, 50]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [2, 1, 50]).astype("float64")
-        }
-        self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
+    def compute_gradient_y(self, grad_out, out, y):
+        return np.sum(-1 * grad_out * out / y.reshape(1, 10, 12, 1),
+                      axis=(0, 3))
 
 
-class TestElementwiseDivOp_broadcast_5(ElementwiseDivOp):
+class TestElementwiseDivOpBroadcast4(ElementwiseDivOp):
 
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [2, 3, 4, 20]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [2, 3, 1, 20]).astype("float64")
-        }
-        self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
+    def init_shape(self):
+        self.x_shape = [2, 3, 50]
+        self.y_shape = [2, 1, 50]
 
+    def compute_gradient_y(self, grad_out, out, y):
+        return np.sum(-1 * grad_out * out / y, axis=(1)).reshape(2, 1, 50)
 
-class TestElementwiseDivOp_commonuse_1(ElementwiseDivOp):
 
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [2, 3, 100]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [1, 1, 100]).astype("float64"),
-        }
-        self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
+class TestElementwiseDivOpBroadcast5(ElementwiseDivOp):
 
+    def init_shape(self):
+        self.x_shape = [2, 3, 4, 20]
+        self.y_shape = [2, 3, 1, 20]
 
-class TestElementwiseDivOp_commonuse_2(ElementwiseDivOp):
+    def compute_gradient_y(self, grad_out, out, y):
+        return np.sum(-1 * grad_out * out / y, axis=(2)).reshape(2, 3, 1, 20)
 
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [30, 3, 1, 5]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [30, 1, 4, 1]).astype("float64"),
-        }
-        self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
 
+class TestElementwiseDivOpCommonuse1(ElementwiseDivOp):
 
-class TestElementwiseDivOp_xsize_lessthan_ysize(ElementwiseDivOp):
+    def init_shape(self):
+        self.x_shape = [2, 3, 100]
+        self.y_shape = [1, 1, 100]
 
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
-        self.inputs = {
-            'X': np.random.uniform(0.1, 1, [10, 12]).astype("float64"),
-            'Y': np.random.uniform(0.1, 1, [2, 3, 10, 12]).astype("float64"),
-        }
+    def compute_gradient_y(self, grad_out, out, y):
+        return np.sum(-1 * grad_out * out / y, axis=(0, 1)).reshape(1, 1, 100)
+
+
+class TestElementwiseDivOpCommonuse2(ElementwiseDivOp):
+
+    def init_shape(self):
+        self.x_shape = [30, 3, 1, 5]
+        self.y_shape = [30, 1, 4, 1]
+
+    def compute_gradient_x(self, grad_out, y):
+        return np.sum(grad_out / y, axis=(2)).reshape(30, 3, 1, 5)
 
+    def compute_gradient_y(self, grad_out, out, y):
+        return np.sum(-1 * grad_out * out / y, axis=(1, 3)).reshape(30, 1, 4, 1)
+
+
+class TestElementwiseDivOpXsizeLessThanYsize(ElementwiseDivOp):
+
+    def init_shape(self):
+        self.x_shape = [10, 12]
+        self.y_shape = [2, 3, 10, 12]
         self.attrs = {'axis': 2}
 
-        self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
+    def compute_gradient_x(self, grad_out, y):
+        return np.sum(grad_out / y, axis=(0, 1))
 
 
-class TestElementwiseDivOp_INT(OpTest):
+class TestElementwiseDivOpInt(ElementwiseDivOp):
 
-    def setUp(self):
-        self.op_type = "elementwise_div"
-        self.python_api = paddle.divide
+    def init_dtype(self):
         self.dtype = np.int32
-        self.init_dtype()
-        self.inputs = {
-            'X': np.random.randint(1, 5, size=[13, 17]).astype(self.dtype),
-            'Y': np.random.randint(1, 5, size=[13, 17]).astype(self.dtype)
-        }
-        self.outputs = {'Out': self.inputs['X'] // self.inputs['Y']}
+        self.val_dtype = np.int32
 
-    def test_check_output(self):
-        self.check_output()
+    def gen_data(self, shape):
+        return np.random.randint(1, 5, size=shape)
 
-    def init_dtype(self):
-        pass
+    def compute_output(self, x, y):
+        return x // y
 
 
 @unittest.skipIf(not core.is_compiled_with_cuda(),
@@ -285,21 +294,7 @@ class TestElementwiseDivOpFp16(ElementwiseDivOp):
 
     def init_dtype(self):
         self.dtype = np.float16
-
-    def test_check_grad_normal(self):
-        self.check_grad(['X', 'Y'], 'Out', max_relative_error=1)
-
-    def test_check_grad_ingore_x(self):
-        self.check_grad(['Y'],
-                        'Out',
-                        max_relative_error=1,
-                        no_grad_set=set("X"))
-
-    def test_check_grad_ingore_y(self):
-        self.check_grad(['X'],
-                        'Out',
-                        max_relative_error=1,
-                        no_grad_set=set('Y'))
+        self.val_dtype = np.float16
 
 
 class TestElementwiseDivBroadcast(unittest.TestCase):
-- 
GitLab