未验证 提交 cde53789 编写于 作者: Z zhupengyang 提交者: GitHub

upgrade fp32 to fp64, enlarge input shape for hsigmoid unittest (#22773)

* upgrade fp32 to fp64, remove no_grad_set, enlarge input shape for hsigmoid unittest test=develop

* revert "remove no_grad_set" test=develop
上级 7d8d5734
...@@ -71,7 +71,7 @@ def hsigmoid(x, w, label, bias, num_classes): ...@@ -71,7 +71,7 @@ def hsigmoid(x, w, label, bias, num_classes):
code_table = [0 for _ in range(code_length)] code_table = [0 for _ in range(code_length)]
pre_output = np.zeros((batch_size, code_length)) pre_output = np.zeros((batch_size, code_length))
pre_sum = np.zeros((batch_size, 1)) pre_sum = np.zeros((batch_size, 1))
out = np.zeros((batch_size, 1)).astype("float32") out = np.zeros((batch_size, 1))
for i in range(batch_size): for i in range(batch_size):
code_table = CodeTable(num_classes, label[i]) code_table = CodeTable(num_classes, label[i])
length = code_table.get_length() length = code_table.get_length()
...@@ -102,6 +102,30 @@ def hsigmoid(x, w, label, bias, num_classes): ...@@ -102,6 +102,30 @@ def hsigmoid(x, w, label, bias, num_classes):
return pre_output, out return pre_output, out
def hsigmoid_grad(x, w, label, bias, num_classes):
batch_size = x.shape[0]
dx = np.zeros(x.shape)
dw = np.zeros(w.shape)
db = np.zeros(bias.shape)
for i in range(batch_size):
code_table = CodeTable(num_classes, label[i])
length = code_table.get_length()
for j in range(length):
idx = code_table.cal_index(j)
t = 1 / (1 + np.exp(-(np.dot(w[idx], x[i]) + bias[idx])))
dx[i] = dx[i] + t * w[idx]
dw[idx] += t * x[i]
db[idx] += t
if code_table.cal_bit(j):
dx[i] = dx[i] - w[idx]
dw[idx] -= x[i]
db[idx] -= 1
dx /= batch_size
dw /= batch_size
db /= batch_size
return [dx, dw, db]
def hsigmoidWithCustomTree(x, w, path_table, path_code, label, bias, def hsigmoidWithCustomTree(x, w, path_table, path_code, label, bias,
num_classes): num_classes):
batch_size = x.shape[0] batch_size = x.shape[0]
...@@ -110,7 +134,7 @@ def hsigmoidWithCustomTree(x, w, path_table, path_code, label, bias, ...@@ -110,7 +134,7 @@ def hsigmoidWithCustomTree(x, w, path_table, path_code, label, bias,
# init pre_out with shape [N, code_length] # init pre_out with shape [N, code_length]
pre_output = np.zeros((batch_size, code_length)) pre_output = np.zeros((batch_size, code_length))
pre_sum = np.zeros((batch_size, 1)) pre_sum = np.zeros((batch_size, 1))
out = np.zeros((batch_size, 1)).astype("float32") out = np.zeros((batch_size, 1))
if isinstance(bias, np.ndarray): if isinstance(bias, np.ndarray):
for i in range(batch_size): for i in range(batch_size):
code_table = CodeTableWithCustomTree(path_table, path_code, i) code_table = CodeTableWithCustomTree(path_table, path_code, i)
...@@ -145,28 +169,30 @@ def hsigmoidWithCustomTree(x, w, path_table, path_code, label, bias, ...@@ -145,28 +169,30 @@ def hsigmoidWithCustomTree(x, w, path_table, path_code, label, bias,
class TestHSigmoidOp(OpTest): class TestHSigmoidOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = "hierarchical_sigmoid" self.op_type = "hierarchical_sigmoid"
num_classes = 6 num_classes = 101
feature_size = 8 feature_size = 5
batch_size = 15 batch_size = 20
x = np.random.random((batch_size, feature_size)).astype("float32") * 2 x = np.random.uniform(-1, 1, (batch_size, feature_size))
w = np.random.random( w = np.random.uniform(-1, 1, (num_classes - 1, feature_size))
(num_classes - 1, feature_size)).astype("float32") * 2
label = np.random.randint(0, num_classes, (batch_size, 1)) label = np.random.randint(0, num_classes, (batch_size, 1))
bias = np.random.random((num_classes - 1, 1)).astype("float32") bias = np.random.uniform(-1, 1, (num_classes - 1, 1))
self.attrs = {'num_classes': num_classes, 'is_sparse': False} self.attrs = {'num_classes': num_classes, 'is_sparse': False}
self.inputs = {'X': x, 'W': w, 'Label': label, 'Bias': bias} self.inputs = {'X': x, 'W': w, 'Label': label, 'Bias': bias}
pre_output, out = hsigmoid(x, w, label, bias, num_classes) pre_output, out = hsigmoid(x, w, label, bias, num_classes)
self.outputs = {'PreOut': pre_output, 'Out': out} self.outputs = {'PreOut': pre_output, 'Out': out}
self.user_grads = hsigmoid_grad(x, w, label, bias, num_classes)
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
def test_check_grad(self): def test_check_grad(self):
self.check_grad(['Bias', 'X', 'W'], ['Out'], no_grad_set=set('Label')) self.check_grad(
['X', 'W', 'Bias'], ['Out'], user_defined_grads=self.user_grads)
#self.check_grad(['X', 'W', 'Bias'], ['Out'])
@skip_check_grad_ci( @skip_check_grad_ci(
reason="For 'TestHSigmoidOpSparse', check_grad is 'TestHSigmoidOpWithSparseGrad'." reason="For 'TestHSigmoidOpSparse', check_grad is is separately calculated by 'TestHSigmoidOpWithSparseGrad'."
) )
class TestHSigmoidOpSparse(OpTest): class TestHSigmoidOpSparse(OpTest):
def setUp(self): def setUp(self):
...@@ -174,8 +200,8 @@ class TestHSigmoidOpSparse(OpTest): ...@@ -174,8 +200,8 @@ class TestHSigmoidOpSparse(OpTest):
num_classes = 6 #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample num_classes = 6 #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample
feature_size = 8 feature_size = 8
batch_size = 4 batch_size = 4
x = np.random.random((batch_size, feature_size)).astype("float32") x = np.random.random((batch_size, feature_size))
w = np.random.random((num_classes - 1, feature_size)).astype("float32") w = np.random.random((num_classes - 1, feature_size))
label = np.array([0, 1, 4, 5]) label = np.array([0, 1, 4, 5])
path_table = np.array( path_table = np.array(
[(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1), [(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1),
...@@ -183,7 +209,7 @@ class TestHSigmoidOpSparse(OpTest): ...@@ -183,7 +209,7 @@ class TestHSigmoidOpSparse(OpTest):
-1)]) #np.array to store 1,2,5,6s' non-leaf path(root -> leaf) -1)]) #np.array to store 1,2,5,6s' non-leaf path(root -> leaf)
path_code = np.array([(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), ( path_code = np.array([(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), (
1, 0, 0, -1, -1), (0, 1, -1, -1, -1)]) #np.array to store 1, 0, 0, -1, -1), (0, 1, -1, -1, -1)]) #np.array to store
bias = np.random.random((num_classes - 1, 1)).astype("float32") bias = np.random.random((num_classes - 1, 1))
self.attrs = {'num_classes': num_classes, 'is_sparse': True} self.attrs = {'num_classes': num_classes, 'is_sparse': True}
self.inputs = { self.inputs = {
'X': x, 'X': x,
...@@ -269,15 +295,17 @@ class TestHSigmoidOpWithSparseGrad(unittest.TestCase): ...@@ -269,15 +295,17 @@ class TestHSigmoidOpWithSparseGrad(unittest.TestCase):
assert (dense_result == sparse_result) assert (dense_result == sparse_result)
@skip_check_grad_ci(
reason="[skip shape check] The huffman tree is structed separately. It will be complicated if use large shape."
)
class TestHSigmoidOpWithCostumTree(OpTest): class TestHSigmoidOpWithCostumTree(OpTest):
def setUp(self): def setUp(self):
self.op_type = "hierarchical_sigmoid" self.op_type = "hierarchical_sigmoid"
num_classes = 6 #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample num_classes = 6 #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample
feature_size = 8 feature_size = 8
batch_size = 4 batch_size = 4
x = np.random.random((batch_size, feature_size)).astype("float32") * 2 x = np.random.uniform(-1, 1, (batch_size, feature_size))
w = np.random.random( w = np.random.uniform(-1, 1, (num_classes - 1, feature_size))
(num_classes - 1, feature_size)).astype("float32") * 2
label = np.array([0, 1, 4, 5]) label = np.array([0, 1, 4, 5])
path_table = np.array( path_table = np.array(
[(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1), [(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1),
...@@ -285,7 +313,7 @@ class TestHSigmoidOpWithCostumTree(OpTest): ...@@ -285,7 +313,7 @@ class TestHSigmoidOpWithCostumTree(OpTest):
-1)]) #np.array to store 1,2,5,6s' non-leaf path(root -> leaf) -1)]) #np.array to store 1,2,5,6s' non-leaf path(root -> leaf)
path_code = np.array([(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), ( path_code = np.array([(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), (
1, 0, 0, -1, -1), (0, 1, -1, -1, -1)]) #np.array to store 1, 0, 0, -1, -1), (0, 1, -1, -1, -1)]) #np.array to store
bias = np.random.random((num_classes - 1, 1)).astype("float32") bias = np.random.random((num_classes - 1, 1))
self.attrs = {'num_classes': num_classes, 'is_sparse': False} self.attrs = {'num_classes': num_classes, 'is_sparse': False}
self.inputs = { self.inputs = {
'X': x, 'X': x,
...@@ -306,15 +334,17 @@ class TestHSigmoidOpWithCostumTree(OpTest): ...@@ -306,15 +334,17 @@ class TestHSigmoidOpWithCostumTree(OpTest):
self.check_grad(['Bias', 'X', 'W'], ['Out'], no_grad_set=set('Label')) self.check_grad(['Bias', 'X', 'W'], ['Out'], no_grad_set=set('Label'))
@skip_check_grad_ci(
reason="[skip shape check] The huffman tree is structed separately. It will be complicated if use large shape."
)
class TestHSigmoidOpWithCostumTreeWithoutBias(OpTest): class TestHSigmoidOpWithCostumTreeWithoutBias(OpTest):
def setUp(self): def setUp(self):
self.op_type = "hierarchical_sigmoid" self.op_type = "hierarchical_sigmoid"
num_classes = 6 #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample num_classes = 6 #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample
feature_size = 8 feature_size = 8
batch_size = 4 batch_size = 4
x = np.random.random((batch_size, feature_size)).astype("float32") * 2 x = np.random.uniform(-1, 1, (batch_size, feature_size))
w = np.random.random( w = np.random.uniform(-1, 1, (num_classes - 1, feature_size))
(num_classes - 1, feature_size)).astype("float32") * 2
label = np.array([0, 1, 4, 5]) label = np.array([0, 1, 4, 5])
path_table = np.array( path_table = np.array(
[(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1), [(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1),
......
...@@ -20,7 +20,6 @@ NEED_TO_FIX_OP_LIST = [ ...@@ -20,7 +20,6 @@ NEED_TO_FIX_OP_LIST = [
'conv2d_transpose', 'conv2d_transpose',
'depthwise_conv2d_transpose', 'depthwise_conv2d_transpose',
'grid_sampler', 'grid_sampler',
'hierarchical_sigmoid',
'lstmp', 'lstmp',
'margin_rank_loss', 'margin_rank_loss',
'matmul', 'matmul',
......
...@@ -32,7 +32,6 @@ NO_FP64_CHECK_GRAD_OP_LIST = [ ...@@ -32,7 +32,6 @@ NO_FP64_CHECK_GRAD_OP_LIST = [
'depthwise_conv2d_transpose', \ 'depthwise_conv2d_transpose', \
'dropout', \ 'dropout', \
'fused_elemwise_activation', \ 'fused_elemwise_activation', \
'hierarchical_sigmoid', \
'hinge_loss', \ 'hinge_loss', \
'huber_loss', \ 'huber_loss', \
'im2sequence', \ 'im2sequence', \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册