提交 acb90787 编写于 作者: C chengduoZH

refine unit test

上级 263e0197
...@@ -21,29 +21,19 @@ from paddle.v2.fluid.op import Operator ...@@ -21,29 +21,19 @@ from paddle.v2.fluid.op import Operator
from paddle.v2.fluid.framework import grad_var_name from paddle.v2.fluid.framework import grad_var_name
def get_backward_op(scope, op, no_grad_set):
backward_op = core.Operator.backward(op, no_grad_set)
for input in backward_op.input_vars():
var = scope.var(input)
var.get_tensor()
for output in backward_op.output_vars():
var = scope.var(output)
var.get_tensor()
return backward_op
def _reference_layer_norm_naive(x, scale, beta, epsilon, begin_norm_axis=1): def _reference_layer_norm_naive(x, scale, beta, epsilon, begin_norm_axis=1):
old_shape = x.shape x_shape = x.shape
N = reduce(mul, old_shape[0:begin_norm_axis], 1) N = reduce(mul, x_shape[0:begin_norm_axis], 1)
D = reduce(mul, old_shape[begin_norm_axis:len(old_shape)], 1) D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1)
x.shape = [N, D] x.shape = [N, D]
mean = np.mean(x, axis=1) mean = np.mean(x, axis=1)
var = np.var(x, axis=1) + epsilon var = np.var(x, axis=1) + epsilon
output = scale.reshape([1, D]) * np.divide( output = scale.reshape([1, D]) * np.divide(
(x - mean.reshape([N, 1])), (x - mean.reshape([N, 1])),
(np.sqrt(var)).reshape([N, 1])) + beta.reshape([1, D]) (np.sqrt(var)).reshape([N, 1])) + beta.reshape([1, D])
output.shape = old_shape
x.shape = old_shape x.shape, output.shape = x_shape, x_shape
return output, mean, var return output, mean, var
...@@ -52,27 +42,25 @@ def _reference_layer_norm_grad(x, grad_y, scale, mean, var, begin_norm_axis=1): ...@@ -52,27 +42,25 @@ def _reference_layer_norm_grad(x, grad_y, scale, mean, var, begin_norm_axis=1):
scale_shape = scale.shape scale_shape = scale.shape
N = reduce(mul, x_shape[0:begin_norm_axis], 1) N = reduce(mul, x_shape[0:begin_norm_axis], 1)
D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1) D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1)
grad_y.shape = [N, D] x.shape, grad_y.shape = [N, D], [N, D]
x.shape = [N, D] var.shape, mean.shape = [N, 1], [N, 1]
mean.shape = [N, 1]
var.shape = [N, 1]
scale.shape = [1, D] scale.shape = [1, D]
# d_bias
d_bias = np.sum(grad_y, axis=0).reshape([1, D]) d_bias = np.sum(grad_y, axis=0).reshape([1, D])
# d_scale
d_scale = np.sum(((x - mean) * np.sqrt(1 / var)) * grad_y, d_scale = np.sum(((x - mean) * np.sqrt(1 / var)) * grad_y,
axis=0).reshape([1, D]) axis=0).reshape([1, D])
# dx
dx_end = scale * np.sqrt(1.0 / var) * grad_y dx_end = scale * np.sqrt(1.0 / var) * grad_y
d_mean_0 = np.sum(-np.sqrt(1.0 / var) * grad_y * scale, axis=1).reshape( d_mean_0 = np.sum(-np.sqrt(1.0 / var) * grad_y * scale, axis=1).reshape(
[N, 1]) [N, 1])
# d_mean_1 = np.sum(-1.0 / var * (x - mean) * grad_y, axis=1).reshape( # d_mean_1 = np.sum(-1.0 / var * (x - mean) * grad_y, axis=1).reshape(
# [N, 1]) * (-1.0 / D * np.sqrt(1.0 / var) * # [N, 1]) * (-1.0 / D * np.sqrt(1.0 / var) *
# np.sum(x - mean, axis=1).reshape([N, 1])).reshape([N, 1]) # np.sum(x - mean, axis=1).reshape([N, 1])).reshape([N, 1])
d_mean = 1.0 / D * d_mean_0 d_mean = 1.0 / D * d_mean_0
d_std = np.sum( d_std = np.sum(
-1.0 / var * (x - mean) * grad_y * scale, axis=1).reshape([N, 1]) * ( -(1.0 / var) * (x - mean) * grad_y * scale, axis=1).reshape([N, 1]) * (
1.0 / D * np.sqrt(1.0 / var).reshape([N, 1]) * (x - mean)) 1.0 / D * np.sqrt(1.0 / var).reshape([N, 1]) * (x - mean))
grad_x = dx_end + d_mean + d_std grad_x = dx_end + d_mean + d_std
...@@ -83,6 +71,17 @@ def _reference_layer_norm_grad(x, grad_y, scale, mean, var, begin_norm_axis=1): ...@@ -83,6 +71,17 @@ def _reference_layer_norm_grad(x, grad_y, scale, mean, var, begin_norm_axis=1):
return grad_x, d_scale, d_bias return grad_x, d_scale, d_bias
def get_backward_op(scope, op, no_grad_set):
backward_op = core.Operator.backward(op, no_grad_set)
for input in backward_op.input_vars():
var = scope.var(input)
var.get_tensor()
for output in backward_op.output_vars():
var = scope.var(output)
var.get_tensor()
return backward_op
def create_or_get_tensor(scope, var_name, var, place): def create_or_get_tensor(scope, var_name, var, place):
tensor = scope.var(var_name).get_tensor() tensor = scope.var(var_name).get_tensor()
if var is not None: if var is not None:
...@@ -145,8 +144,9 @@ class TestLayerNormdOp(OpTest): ...@@ -145,8 +144,9 @@ class TestLayerNormdOp(OpTest):
self.assertLessEqual(max_diff, max_relative_error, err_msg()) self.assertLessEqual(max_diff, max_relative_error, err_msg())
def test_forward_backward(self): def check_forward_backward(self, shape, begin_norm_axis):
def test_with_place(place, shape, begin_norm_axis=1): def test_with_place(place, shape, begin_norm_axis=1):
# setUp
assert begin_norm_axis > 0 and begin_norm_axis < len( assert begin_norm_axis > 0 and begin_norm_axis < len(
shape), 'begin_norm_axis must be between 0 and len(shape)-1.' shape), 'begin_norm_axis must be between 0 and len(shape)-1.'
# attr # attr
...@@ -158,30 +158,35 @@ class TestLayerNormdOp(OpTest): ...@@ -158,30 +158,35 @@ class TestLayerNormdOp(OpTest):
x_val = np.random.random_sample(x_shape).astype(np.float32) x_val = np.random.random_sample(x_shape).astype(np.float32)
scale_val = np.random.random_sample(scale_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32)
bias_val = np.random.random_sample(scale_shape).astype(np.float32) bias_val = np.random.random_sample(scale_shape).astype(np.float32)
y_grad = np.random.random_sample(x_shape).astype(np.float32)
# run forward # run forward
y_out, saved_mean, var_ref = _reference_layer_norm_naive( y_out, saved_mean, var_ref = _reference_layer_norm_naive(
x_val, scale_val, bias_val, epsilon, begin_norm_axis) x_val, scale_val, bias_val, epsilon, begin_norm_axis)
naive_fw = {"Y": y_out, "Mean": saved_mean, "Variance": var_ref}
# for gradient test # get gradient
y_grad = np.random.random_sample(x_shape).astype(np.float32)
x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_layer_norm_grad( x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_layer_norm_grad(
x_val, y_grad, scale_val, saved_mean, var_ref, begin_norm_axis) x_val, y_grad, scale_val, saved_mean, var_ref, begin_norm_axis)
naive_grad = {
"X": x_grad_ref,
"Scale": scale_grad_ref,
"Bias": bias_grad_ref
}
scope = core.Scope() scope = core.Scope()
# create input # create input
x_tensor = create_or_get_tensor(scope, "X", x_val, place) input_map = {"X": x_val, "Scale": scale_val, "Bias": bias_val}
scale_tensor = create_or_get_tensor(scope, "Scale", scale_val, for i_name in input_map:
place) create_or_get_tensor(scope, i_name, input_map[i_name], place)
bias_tensor = create_or_get_tensor(scope, "Bias", bias_val, place)
# create output # create output
y_tensor = create_or_get_tensor(scope, "Y", None, place) output_map = {"Y": None, "Mean": None, "Variance": None}
mean_tensor = create_or_get_tensor(scope, "Mean", None, place) output_tensor = {}
variance_tensor = create_or_get_tensor(scope, "Variance", None, for o_name in output_map:
place) output_tensor[o_name] = create_or_get_tensor(
scope, o_name, output_map[o_name], place)
layer_norm_op = Operator( layer_norm_op = Operator(
"layer_norm", "layer_norm",
...@@ -200,13 +205,10 @@ class TestLayerNormdOp(OpTest): ...@@ -200,13 +205,10 @@ class TestLayerNormdOp(OpTest):
layer_norm_op.run(scope, place) layer_norm_op.run(scope, place)
# check forward result # check forward result
if isinstance(place, core.CUDAPlace): atol = 5e-2 if isinstance(place, core.CUDAPlace) else 1e-4
atol = 5e-2 for o_tensor in output_tensor:
else: self.__assert_close(output_tensor[o_tensor], naive_fw[o_tensor],
atol = 1e-4 o_tensor, atol)
self.__assert_close(y_tensor, y_out, "Y", atol)
self.__assert_close(mean_tensor, saved_mean, "Mean", atol)
self.__assert_close(variance_tensor, var_ref, "Variance", atol)
# run backward # run backward
layer_norm_op_grad = get_backward_op(scope, layer_norm_op, set()) layer_norm_op_grad = get_backward_op(scope, layer_norm_op, set())
...@@ -216,30 +218,28 @@ class TestLayerNormdOp(OpTest): ...@@ -216,30 +218,28 @@ class TestLayerNormdOp(OpTest):
feed_dict={"Y": y_grad}) feed_dict={"Y": y_grad})
layer_norm_op_grad.run(scope, place) layer_norm_op_grad.run(scope, place)
x_grad_tensor = create_or_get_tensor(scope, # get output
grad_var_name("X"), None, grad_tensor = {}
place) for o_name in naive_grad:
scale_grad_tensor = create_or_get_tensor(scope, grad_tensor[o_name] = x_ = create_or_get_tensor(
grad_var_name("Scale"), scope, grad_var_name(o_name), None, place)
None, place)
bias_grad_tensor = create_or_get_tensor(scope,
grad_var_name("Bias"), None,
place)
# check gradient output # check gradient output
self.__assert_grad_close(x_grad_tensor, x_grad_ref, "x_grad", place) for o_grad in naive_grad:
self.__assert_grad_close(scale_grad_tensor, scale_grad_ref, self.__assert_grad_close(grad_tensor[o_grad],
"scale_grad", place) naive_grad[o_grad], o_grad + "@GRAD",
self.__assert_grad_close(bias_grad_tensor, bias_grad_ref, place)
"bias_grad", place)
places = [core.CPUPlace()] places = [core.CPUPlace()]
if core.is_compile_gpu() and core.op_support_gpu("layer_norm"): if core.is_compile_gpu() and core.op_support_gpu("layer_norm"):
places.append(core.CUDAPlace(0)) places.append(core.CUDAPlace(0))
for place in places: for place in places:
test_with_place(place, [2, 3, 4, 5], begin_norm_axis=1) test_with_place(place, shape, begin_norm_axis)
test_with_place(place, [2, 3, 4, 5], begin_norm_axis=3)
def test_check_forward_backward(self):
self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=1)
self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=3)
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册