diff --git a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py index 33c65d00e0992a7ef56f94960179ea4b7256518d..a5673e1e186d3d41e05427ad9aefad5e5fca8571 100644 --- a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py +++ b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py @@ -71,7 +71,7 @@ def hsigmoid(x, w, label, bias, num_classes): code_table = [0 for _ in range(code_length)] pre_output = np.zeros((batch_size, code_length)) pre_sum = np.zeros((batch_size, 1)) - out = np.zeros((batch_size, 1)).astype("float32") + out = np.zeros((batch_size, 1)) for i in range(batch_size): code_table = CodeTable(num_classes, label[i]) length = code_table.get_length() @@ -102,6 +102,30 @@ def hsigmoid(x, w, label, bias, num_classes): return pre_output, out +def hsigmoid_grad(x, w, label, bias, num_classes): + batch_size = x.shape[0] + dx = np.zeros(x.shape) + dw = np.zeros(w.shape) + db = np.zeros(bias.shape) + for i in range(batch_size): + code_table = CodeTable(num_classes, label[i]) + length = code_table.get_length() + for j in range(length): + idx = code_table.cal_index(j) + t = 1 / (1 + np.exp(-(np.dot(w[idx], x[i]) + bias[idx]))) + dx[i] = dx[i] + t * w[idx] + dw[idx] += t * x[i] + db[idx] += t + if code_table.cal_bit(j): + dx[i] = dx[i] - w[idx] + dw[idx] -= x[i] + db[idx] -= 1 + dx /= batch_size + dw /= batch_size + db /= batch_size + return [dx, dw, db] + + def hsigmoidWithCustomTree(x, w, path_table, path_code, label, bias, num_classes): batch_size = x.shape[0] @@ -110,7 +134,7 @@ def hsigmoidWithCustomTree(x, w, path_table, path_code, label, bias, # init pre_out with shape [N, code_length] pre_output = np.zeros((batch_size, code_length)) pre_sum = np.zeros((batch_size, 1)) - out = np.zeros((batch_size, 1)).astype("float32") + out = np.zeros((batch_size, 1)) if isinstance(bias, np.ndarray): for i in range(batch_size): code_table = CodeTableWithCustomTree(path_table, path_code, i) @@ -145,28 +169,30 @@ def hsigmoidWithCustomTree(x, w, path_table, path_code, label, bias, class TestHSigmoidOp(OpTest): def setUp(self): self.op_type = "hierarchical_sigmoid" - num_classes = 6 - feature_size = 8 - batch_size = 15 - x = np.random.random((batch_size, feature_size)).astype("float32") * 2 - w = np.random.random( - (num_classes - 1, feature_size)).astype("float32") * 2 + num_classes = 101 + feature_size = 5 + batch_size = 20 + x = np.random.uniform(-1, 1, (batch_size, feature_size)) + w = np.random.uniform(-1, 1, (num_classes - 1, feature_size)) label = np.random.randint(0, num_classes, (batch_size, 1)) - bias = np.random.random((num_classes - 1, 1)).astype("float32") + bias = np.random.uniform(-1, 1, (num_classes - 1, 1)) self.attrs = {'num_classes': num_classes, 'is_sparse': False} self.inputs = {'X': x, 'W': w, 'Label': label, 'Bias': bias} pre_output, out = hsigmoid(x, w, label, bias, num_classes) self.outputs = {'PreOut': pre_output, 'Out': out} + self.user_grads = hsigmoid_grad(x, w, label, bias, num_classes) def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['Bias', 'X', 'W'], ['Out'], no_grad_set=set('Label')) + self.check_grad( + ['X', 'W', 'Bias'], ['Out'], user_defined_grads=self.user_grads) + #self.check_grad(['X', 'W', 'Bias'], ['Out']) @skip_check_grad_ci( - reason="For 'TestHSigmoidOpSparse', check_grad is 'TestHSigmoidOpWithSparseGrad'." + reason="For 'TestHSigmoidOpSparse', check_grad is is separately calculated by 'TestHSigmoidOpWithSparseGrad'." ) class TestHSigmoidOpSparse(OpTest): def setUp(self): @@ -174,8 +200,8 @@ class TestHSigmoidOpSparse(OpTest): num_classes = 6 #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample feature_size = 8 batch_size = 4 - x = np.random.random((batch_size, feature_size)).astype("float32") - w = np.random.random((num_classes - 1, feature_size)).astype("float32") + x = np.random.random((batch_size, feature_size)) + w = np.random.random((num_classes - 1, feature_size)) label = np.array([0, 1, 4, 5]) path_table = np.array( [(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1), @@ -183,7 +209,7 @@ class TestHSigmoidOpSparse(OpTest): -1)]) #np.array to store 1,2,5,6s' non-leaf path(root -> leaf) path_code = np.array([(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), ( 1, 0, 0, -1, -1), (0, 1, -1, -1, -1)]) #np.array to store - bias = np.random.random((num_classes - 1, 1)).astype("float32") + bias = np.random.random((num_classes - 1, 1)) self.attrs = {'num_classes': num_classes, 'is_sparse': True} self.inputs = { 'X': x, @@ -269,15 +295,17 @@ class TestHSigmoidOpWithSparseGrad(unittest.TestCase): assert (dense_result == sparse_result) +@skip_check_grad_ci( + reason="[skip shape check] The huffman tree is structed separately. It will be complicated if use large shape." +) class TestHSigmoidOpWithCostumTree(OpTest): def setUp(self): self.op_type = "hierarchical_sigmoid" num_classes = 6 #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample feature_size = 8 batch_size = 4 - x = np.random.random((batch_size, feature_size)).astype("float32") * 2 - w = np.random.random( - (num_classes - 1, feature_size)).astype("float32") * 2 + x = np.random.uniform(-1, 1, (batch_size, feature_size)) + w = np.random.uniform(-1, 1, (num_classes - 1, feature_size)) label = np.array([0, 1, 4, 5]) path_table = np.array( [(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1), @@ -285,7 +313,7 @@ class TestHSigmoidOpWithCostumTree(OpTest): -1)]) #np.array to store 1,2,5,6s' non-leaf path(root -> leaf) path_code = np.array([(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), ( 1, 0, 0, -1, -1), (0, 1, -1, -1, -1)]) #np.array to store - bias = np.random.random((num_classes - 1, 1)).astype("float32") + bias = np.random.random((num_classes - 1, 1)) self.attrs = {'num_classes': num_classes, 'is_sparse': False} self.inputs = { 'X': x, @@ -306,15 +334,17 @@ class TestHSigmoidOpWithCostumTree(OpTest): self.check_grad(['Bias', 'X', 'W'], ['Out'], no_grad_set=set('Label')) +@skip_check_grad_ci( + reason="[skip shape check] The huffman tree is structed separately. It will be complicated if use large shape." +) class TestHSigmoidOpWithCostumTreeWithoutBias(OpTest): def setUp(self): self.op_type = "hierarchical_sigmoid" num_classes = 6 #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample feature_size = 8 batch_size = 4 - x = np.random.random((batch_size, feature_size)).astype("float32") * 2 - w = np.random.random( - (num_classes - 1, feature_size)).astype("float32") * 2 + x = np.random.uniform(-1, 1, (batch_size, feature_size)) + w = np.random.uniform(-1, 1, (num_classes - 1, feature_size)) label = np.array([0, 1, 4, 5]) path_table = np.array( [(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1), diff --git a/python/paddle/fluid/tests/unittests/white_list/check_shape_white_list.py b/python/paddle/fluid/tests/unittests/white_list/check_shape_white_list.py index 2a2f7c84fb73c05e111990f4af0db69f061526b3..a43cd8f80dde889b3c338d58809c7b16334428d0 100644 --- a/python/paddle/fluid/tests/unittests/white_list/check_shape_white_list.py +++ b/python/paddle/fluid/tests/unittests/white_list/check_shape_white_list.py @@ -20,7 +20,6 @@ NEED_TO_FIX_OP_LIST = [ 'conv2d_transpose', 'depthwise_conv2d_transpose', 'grid_sampler', - 'hierarchical_sigmoid', 'lstmp', 'margin_rank_loss', 'matmul', diff --git a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py index e5d65db236c05b43df45caf809b75c1f4dff2d57..44e0c7b5e9008b5125f374bf076b5c8f57c8de73 100644 --- a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py +++ b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py @@ -32,7 +32,6 @@ NO_FP64_CHECK_GRAD_OP_LIST = [ 'depthwise_conv2d_transpose', \ 'dropout', \ 'fused_elemwise_activation', \ - 'hierarchical_sigmoid', \ 'hinge_loss', \ 'huber_loss', \ 'im2sequence', \