upgrade fp32 to fp64, enlarge input shape for hsigmoid unittest (#22773)

* upgrade fp32 to fp64, remove no_grad_set, enlarge input shape for hsigmoid unittest test=develop * revert "remove no_grad_set" test=develop

upgrade fp32 to fp64, enlarge input shape for hsigmoid unittest (#22773)
* upgrade fp32 to fp64, remove no_grad_set, enlarge input shape for hsigmoid unittest test=develop * revert "remove no_grad_set" test=develop
cde53789 · zhupengyang · GitHub · 7d8d5734 · cde53789 · cde53789
3 changed file
--- a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
+++ b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
@@ -71,7 +71,7 @@ def hsigmoid(x, w, label, bias, num_classes):
    code_table = [0 for _ in range(code_length)]
    pre_output = np.zeros((batch_size, code_length))
    pre_sum = np.zeros((batch_size, 1))
-    out = np.zeros((batch_size, 1)).astype("float32")
+    out = np.zeros((batch_size, 1))
    for i in range(batch_size):
        code_table = CodeTable(num_classes, label[i])
        length = code_table.get_length()
@@ -102,6 +102,30 @@ def hsigmoid(x, w, label, bias, num_classes):
    return pre_output, out


+def hsigmoid_grad(x, w, label, bias, num_classes):
+    batch_size = x.shape[0]
+    dx = np.zeros(x.shape)
+    dw = np.zeros(w.shape)
+    db = np.zeros(bias.shape)
+    for i in range(batch_size):
+        code_table = CodeTable(num_classes, label[i])
+        length = code_table.get_length()
+        for j in range(length):
+            idx = code_table.cal_index(j)
+            t = 1 / (1 + np.exp(-(np.dot(w[idx], x[i]) + bias[idx])))
+            dx[i] = dx[i] + t * w[idx]
+            dw[idx] += t * x[i]
+            db[idx] += t
+            if code_table.cal_bit(j):
+                dx[i] = dx[i] - w[idx]
+                dw[idx] -= x[i]
+                db[idx] -= 1
+    dx /= batch_size
+    dw /= batch_size
+    db /= batch_size
+    return [dx, dw, db]
+
+
 def hsigmoidWithCustomTree(x, w, path_table, path_code, label, bias,
                           num_classes):
    batch_size = x.shape[0]
@@ -110,7 +134,7 @@ def hsigmoidWithCustomTree(x, w, path_table, path_code, label, bias,
    # init pre_out with shape [N, code_length]
    pre_output = np.zeros((batch_size, code_length))
    pre_sum = np.zeros((batch_size, 1))
-    out = np.zeros((batch_size, 1)).astype("float32")
+    out = np.zeros((batch_size, 1))
    if isinstance(bias, np.ndarray):
        for i in range(batch_size):
            code_table = CodeTableWithCustomTree(path_table, path_code, i)
@@ -145,28 +169,30 @@ def hsigmoidWithCustomTree(x, w, path_table, path_code, label, bias,
 class TestHSigmoidOp(OpTest):
    def setUp(self):
        self.op_type = "hierarchical_sigmoid"
-        num_classes = 6
-        feature_size = 8
-        batch_size = 15
-        x = np.random.random((batch_size, feature_size)).astype("float32") * 2
-        w = np.random.random(
-            (num_classes - 1, feature_size)).astype("float32") * 2
+        num_classes = 101
+        feature_size = 5
+        batch_size = 20
+        x = np.random.uniform(-1, 1, (batch_size, feature_size))
+        w = np.random.uniform(-1, 1, (num_classes - 1, feature_size))
        label = np.random.randint(0, num_classes, (batch_size, 1))
-        bias = np.random.random((num_classes - 1, 1)).astype("float32")
+        bias = np.random.uniform(-1, 1, (num_classes - 1, 1))
        self.attrs = {'num_classes': num_classes, 'is_sparse': False}
        self.inputs = {'X': x, 'W': w, 'Label': label, 'Bias': bias}
        pre_output, out = hsigmoid(x, w, label, bias, num_classes)
        self.outputs = {'PreOut': pre_output, 'Out': out}
+        self.user_grads = hsigmoid_grad(x, w, label, bias, num_classes)

    def test_check_output(self):
        self.check_output()

    def test_check_grad(self):
-        self.check_grad(['Bias', 'X', 'W'], ['Out'], no_grad_set=set('Label'))
+        self.check_grad(
+            ['X', 'W', 'Bias'], ['Out'], user_defined_grads=self.user_grads)
+        #self.check_grad(['X', 'W', 'Bias'], ['Out'])


 @skip_check_grad_ci(
-    reason="For 'TestHSigmoidOpSparse', check_grad is 'TestHSigmoidOpWithSparseGrad'."
+    reason="For 'TestHSigmoidOpSparse', check_grad is is separately calculated by 'TestHSigmoidOpWithSparseGrad'."
 )
 class TestHSigmoidOpSparse(OpTest):
    def setUp(self):
@@ -174,8 +200,8 @@ class TestHSigmoidOpSparse(OpTest):
        num_classes = 6  #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample
        feature_size = 8
        batch_size = 4
-        x = np.random.random((batch_size, feature_size)).astype("float32")
-        w = np.random.random((num_classes - 1, feature_size)).astype("float32")
+        x = np.random.random((batch_size, feature_size))
+        w = np.random.random((num_classes - 1, feature_size))
        label = np.array([0, 1, 4, 5])
        path_table = np.array(
            [(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1),
@@ -183,7 +209,7 @@ class TestHSigmoidOpSparse(OpTest):
              -1)])  #np.array to store 1,2,5,6s' non-leaf path(root -> leaf)
        path_code = np.array([(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), (
            1, 0, 0, -1, -1), (0, 1, -1, -1, -1)])  #np.array to store 
-        bias = np.random.random((num_classes - 1, 1)).astype("float32")
+        bias = np.random.random((num_classes - 1, 1))
        self.attrs = {'num_classes': num_classes, 'is_sparse': True}
        self.inputs = {
            'X': x,
@@ -269,15 +295,17 @@ class TestHSigmoidOpWithSparseGrad(unittest.TestCase):
        assert (dense_result == sparse_result)


+@skip_check_grad_ci(
+    reason="[skip shape check] The huffman tree is structed separately. It will be complicated if use large shape."
+)
 class TestHSigmoidOpWithCostumTree(OpTest):
    def setUp(self):
        self.op_type = "hierarchical_sigmoid"
        num_classes = 6  #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample
        feature_size = 8
        batch_size = 4
-        x = np.random.random((batch_size, feature_size)).astype("float32") * 2
-        w = np.random.random(
-            (num_classes - 1, feature_size)).astype("float32") * 2
+        x = np.random.uniform(-1, 1, (batch_size, feature_size))
+        w = np.random.uniform(-1, 1, (num_classes - 1, feature_size))
        label = np.array([0, 1, 4, 5])
        path_table = np.array(
            [(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1),
@@ -285,7 +313,7 @@ class TestHSigmoidOpWithCostumTree(OpTest):
              -1)])  #np.array to store 1,2,5,6s' non-leaf path(root -> leaf)
        path_code = np.array([(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), (
            1, 0, 0, -1, -1), (0, 1, -1, -1, -1)])  #np.array to store 
-        bias = np.random.random((num_classes - 1, 1)).astype("float32")
+        bias = np.random.random((num_classes - 1, 1))
        self.attrs = {'num_classes': num_classes, 'is_sparse': False}
        self.inputs = {
            'X': x,
@@ -306,15 +334,17 @@ class TestHSigmoidOpWithCostumTree(OpTest):
        self.check_grad(['Bias', 'X', 'W'], ['Out'], no_grad_set=set('Label'))


+@skip_check_grad_ci(
+    reason="[skip shape check] The huffman tree is structed separately. It will be complicated if use large shape."
+)
 class TestHSigmoidOpWithCostumTreeWithoutBias(OpTest):
    def setUp(self):
        self.op_type = "hierarchical_sigmoid"
        num_classes = 6  #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample
        feature_size = 8
        batch_size = 4
-        x = np.random.random((batch_size, feature_size)).astype("float32") * 2
-        w = np.random.random(
-            (num_classes - 1, feature_size)).astype("float32") * 2
+        x = np.random.uniform(-1, 1, (batch_size, feature_size))
+        w = np.random.uniform(-1, 1, (num_classes - 1, feature_size))
        label = np.array([0, 1, 4, 5])
        path_table = np.array(
            [(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1),

--- a/python/paddle/fluid/tests/unittests/white_list/check_shape_white_list.py
+++ b/python/paddle/fluid/tests/unittests/white_list/check_shape_white_list.py
@@ -20,7 +20,6 @@ NEED_TO_FIX_OP_LIST = [
    'conv2d_transpose',
    'depthwise_conv2d_transpose',
    'grid_sampler',
-    'hierarchical_sigmoid',
    'lstmp',
    'margin_rank_loss',
    'matmul',

--- a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py
+++ b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py
@@ -32,7 +32,6 @@ NO_FP64_CHECK_GRAD_OP_LIST = [
    'depthwise_conv2d_transpose', \
    'dropout', \
    'fused_elemwise_activation', \
-    'hierarchical_sigmoid', \
    'hinge_loss', \
    'huber_loss', \
    'im2sequence', \