未验证 提交 43d5cca6 编写于 作者: H hong 提交者: GitHub

Add layer norm yaml (#41589)

* add layer norm infermeta

* add layer norm yaml

* polish layer norm infer meta

* add layer norm to black list
上级 be4a2077
...@@ -259,6 +259,103 @@ void GraphSendRecvInferMeta(const MetaTensor& x, ...@@ -259,6 +259,103 @@ void GraphSendRecvInferMeta(const MetaTensor& x,
} }
} }
void LayerNormInferMeta(const MetaTensor& x,
paddle::optional<const MetaTensor&> scale,
paddle::optional<const MetaTensor&> bias,
float epsilon,
int begin_norm_axis,
bool is_test,
MetaTensor* out,
MetaTensor* mean,
MetaTensor* variance,
MetaConfig config) {
auto x_dim = x.dims();
PADDLE_ENFORCE_LT(
begin_norm_axis,
x_dim.size(),
phi::errors::InvalidArgument(
"'begin_norm_axis' must be less than the dimensions of X,"
"But received 'begin_norm_axis' is [%d],"
"received the dimensions of X is [%d].",
begin_norm_axis,
x_dim.size()));
auto matrix_dim = phi::flatten_to_2d(x_dim, begin_norm_axis);
int left = static_cast<int>(matrix_dim[0]);
int right = static_cast<int>(matrix_dim[1]);
if (scale.get_ptr() != nullptr) {
PADDLE_ENFORCE_EQ(scale->dims().size(),
1,
phi::errors::InvalidArgument(
"The dimensions of Input(Scale) must be 1, but "
"received dimensions of"
"Input(Scale) is [%d]",
scale->dims().size()));
}
if (config.is_runtime && scale.get_ptr() != nullptr) {
PADDLE_ENFORCE_EQ(
scale->dims()[0],
right,
phi::errors::InvalidArgument(
"The first dimension value of Input(Scale) must equal to be the"
"second dimension value of the flattened 2D matrix of Input(X),"
"But received the first dimension value of Input(Scale) is"
"[%d], the second dimension value of the flattened 2D matrix of"
" Input(Scale) is [%d].",
scale->dims()[0],
right));
}
if (bias.get_ptr() != nullptr) {
PADDLE_ENFORCE_EQ(bias->dims().size(),
1,
phi::errors::InvalidArgument(
"The dimensions of Input(Bias) must be 1, but "
"received dimensions of"
"Input(Bias) is [%d]",
bias->dims().size()));
}
if (config.is_runtime && bias.get_ptr() != nullptr) {
PADDLE_ENFORCE_EQ(
bias->dims()[0],
right,
phi::errors::InvalidArgument(
"The first dimension value of Input(Bias) must equal to be the"
"second dimension value of the flattened 2D matrix of Input(X),"
"But received the first dimension value of Input(Bias) is"
"[%d], the second dimension value of the flattened 2D matrix of"
" Input(Bias) is [%d].",
bias->dims()[0],
right));
}
out->set_dims(x_dim);
if (mean) {
mean->set_dims({left});
}
if (variance) {
variance->set_dims({left});
}
out->share_lod(x);
}
void LayerNormGradInferMeta(const MetaTensor& x,
paddle::optional<const MetaTensor&> y,
paddle::optional<const MetaTensor&> z,
MetaTensor* dx,
MetaTensor* dy,
MetaTensor* dz) {
if (dx) {
dx->share_meta(x);
}
if (dy && (y.get_ptr() != nullptr)) {
dy->share_meta(*y.get_ptr());
}
if (dz && (z.get_ptr() != nullptr)) {
dz->share_meta(*z.get_ptr());
}
}
void LerpInferMeta(const MetaTensor& x, void LerpInferMeta(const MetaTensor& x,
const MetaTensor& y, const MetaTensor& y,
const MetaTensor& weight, const MetaTensor& weight,
......
...@@ -60,6 +60,24 @@ void GraphSendRecvInferMeta(const MetaTensor& x, ...@@ -60,6 +60,24 @@ void GraphSendRecvInferMeta(const MetaTensor& x,
MetaTensor* out, MetaTensor* out,
MetaTensor* dst_count); MetaTensor* dst_count);
void LayerNormInferMeta(const MetaTensor& x,
paddle::optional<const MetaTensor&> scale,
paddle::optional<const MetaTensor&> bias,
float epsilon,
int begin_norm_axis,
bool is_test,
MetaTensor* out,
MetaTensor* mean,
MetaTensor* variance,
MetaConfig config = MetaConfig());
void LayerNormGradInferMeta(const MetaTensor& x,
paddle::optional<const MetaTensor&> y,
paddle::optional<const MetaTensor&> z,
MetaTensor* dx,
MetaTensor* dy,
MetaTensor* dz);
void LerpInferMeta(const MetaTensor& x, void LerpInferMeta(const MetaTensor& x,
const MetaTensor& y, const MetaTensor& y,
const MetaTensor& weight, const MetaTensor& weight,
......
...@@ -32,10 +32,10 @@ namespace phi { ...@@ -32,10 +32,10 @@ namespace phi {
template <typename T, typename Context> template <typename T, typename Context>
void LayerNormGradKernel(const Context& dev_ctx, void LayerNormGradKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& mean,
const DenseTensor& variance,
paddle::optional<const DenseTensor&> scale_opt, paddle::optional<const DenseTensor&> scale_opt,
paddle::optional<const DenseTensor&> bias_opt, paddle::optional<const DenseTensor&> bias_opt,
const DenseTensor& mean,
const DenseTensor& variance,
const DenseTensor& out_grad, const DenseTensor& out_grad,
float epsilon, float epsilon,
int begin_norm_axis, int begin_norm_axis,
......
...@@ -24,10 +24,10 @@ namespace phi { ...@@ -24,10 +24,10 @@ namespace phi {
template <typename T, typename Context> template <typename T, typename Context>
void LayerNormGradKernel(const Context &dev_ctx, void LayerNormGradKernel(const Context &dev_ctx,
const DenseTensor &x, const DenseTensor &x,
const DenseTensor &mean,
const DenseTensor &variance,
paddle::optional<const DenseTensor &> scale_opt, paddle::optional<const DenseTensor &> scale_opt,
paddle::optional<const DenseTensor &> bias_opt, paddle::optional<const DenseTensor &> bias_opt,
const DenseTensor &mean,
const DenseTensor &variance,
const DenseTensor &out_grad, const DenseTensor &out_grad,
float epsilon, float epsilon,
int begin_norm_axis, int begin_norm_axis,
......
...@@ -21,10 +21,10 @@ namespace phi { ...@@ -21,10 +21,10 @@ namespace phi {
template <typename T, typename Context> template <typename T, typename Context>
void LayerNormGradKernel(const Context& ctx, void LayerNormGradKernel(const Context& ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& mean,
const DenseTensor& variance,
paddle::optional<const DenseTensor&> scale, paddle::optional<const DenseTensor&> scale,
paddle::optional<const DenseTensor&> bias, paddle::optional<const DenseTensor&> bias,
const DenseTensor& mean,
const DenseTensor& variance,
const DenseTensor& out_grad, const DenseTensor& out_grad,
float epsilon, float epsilon,
int begin_norm_axis, int begin_norm_axis,
......
...@@ -27,7 +27,7 @@ KernelSignature LayerNormGradOpArgumentMapping( ...@@ -27,7 +27,7 @@ KernelSignature LayerNormGradOpArgumentMapping(
const ArgumentMappingContext& ctx) { const ArgumentMappingContext& ctx) {
return KernelSignature( return KernelSignature(
"layer_norm_grad", "layer_norm_grad",
{"X", "Mean", "Variance", "Scale", "Bias", GradVarName("Y")}, {"X", "Scale", "Bias", "Mean", "Variance", GradVarName("Y")},
{"epsilon", "begin_norm_axis", "is_test"}, {"epsilon", "begin_norm_axis", "is_test"},
{GradVarName("X"), GradVarName("Scale"), GradVarName("Bias")}); {GradVarName("X"), GradVarName("Scale"), GradVarName("Bias")});
} }
......
...@@ -1827,11 +1827,18 @@ class LayerNorm(layers.Layer): ...@@ -1827,11 +1827,18 @@ class LayerNorm(layers.Layer):
1:] + ', but got input shape ' + str(input_shape)) 1:] + ', but got input shape ' + str(input_shape))
if _non_static_mode(): if _non_static_mode():
pre_act, _, _ = _C_ops.layer_norm( if in_dygraph_mode():
input, self.weight, self.bias, 'epsilon', self._epsilon, pre_act, _, _, = _C_ops.final_state_layer_norm(
'begin_norm_axis', self._begin_norm_axis) input, self.weight, self.bias, self._epsilon,
return dygraph_utils._append_activation_in_dygraph( self._begin_norm_axis, False)
pre_act, act=self._act) return dygraph_utils._append_activation_in_dygraph(
pre_act, act=self._act)
else:
pre_act, _, _ = _C_ops.layer_norm(
input, self.weight, self.bias, 'epsilon', self._epsilon,
'begin_norm_axis', self._begin_norm_axis)
return dygraph_utils._append_activation_in_dygraph(
pre_act, act=self._act)
check_variable_and_dtype(input, 'input', ['float32', 'float64'], check_variable_and_dtype(input, 'input', ['float32', 'float64'],
'LayerNorm') 'LayerNorm')
......
...@@ -19,7 +19,7 @@ import paddle.fluid.core as core ...@@ -19,7 +19,7 @@ import paddle.fluid.core as core
from paddle.fluid.op import Operator from paddle.fluid.op import Operator
import paddle.fluid as fluid import paddle.fluid as fluid
from op_test import OpTest, _set_use_system_allocator from op_test import OpTest, _set_use_system_allocator
from paddle.fluid.framework import grad_var_name from paddle.fluid.framework import grad_var_name, _test_eager_guard
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import Program, program_guard from paddle.fluid import Program, program_guard
import paddle import paddle
...@@ -36,13 +36,13 @@ class TestDygraphLayerNormv2(unittest.TestCase): ...@@ -36,13 +36,13 @@ class TestDygraphLayerNormv2(unittest.TestCase):
def compute_v1(x): def compute_v1(x):
with fluid.dygraph.guard(p): with fluid.dygraph.guard(p):
ln = fluid.dygraph.LayerNorm(shape[1:]) ln = fluid.dygraph.LayerNorm(shape[1:])
y = ln(fluid.dygraph.to_variable(x)) y = ln(paddle.to_tensor(x))
return y.numpy() return y.numpy()
def compute_v2(x): def compute_v2(x):
with fluid.dygraph.guard(p): with fluid.dygraph.guard(p):
ln = paddle.nn.LayerNorm(shape[1:]) ln = paddle.nn.LayerNorm(shape[1:])
y = ln(fluid.dygraph.to_variable(x)) y = ln(paddle.to_tensor(x))
return y.numpy() return y.numpy()
x = np.random.randn(*shape).astype("float32") x = np.random.randn(*shape).astype("float32")
...@@ -50,6 +50,38 @@ class TestDygraphLayerNormv2(unittest.TestCase): ...@@ -50,6 +50,38 @@ class TestDygraphLayerNormv2(unittest.TestCase):
y2 = compute_v2(x) y2 = compute_v2(x)
self.assertTrue(np.allclose(y1, y2)) self.assertTrue(np.allclose(y1, y2))
def test_eager(self):
places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda() and core.op_support_gpu("layer_norm"):
places.append(fluid.CUDAPlace(0))
for p in places:
shape = [4, 10, 4, 4]
def compute_v1(x):
with fluid.dygraph.guard(p):
ln = fluid.dygraph.LayerNorm(shape[1:])
x1 = paddle.to_tensor(x)
x1.stop_gradient = False
y = ln(x1)
y.backward()
return y.numpy(), x1.gradient()
def compute_v2(x):
with fluid.dygraph.guard(p):
with _test_eager_guard():
ln = paddle.nn.LayerNorm(shape[1:])
x1 = paddle.to_tensor(x)
x1.stop_gradient = False
y = ln(x1)
y.backward()
return y.numpy(), x1.gradient()
x = np.random.randn(*shape).astype("float32")
y1, g1 = compute_v1(x)
y2, g2 = compute_v2(x)
self.assertTrue(np.allclose(y1, y2))
self.assertTrue(np.allclose(g1, g2))
def test_static(self): def test_static(self):
paddle.enable_static() paddle.enable_static()
places = [fluid.CPUPlace()] places = [fluid.CPUPlace()]
...@@ -94,30 +126,30 @@ class TestLayerNormFunction(unittest.TestCase): ...@@ -94,30 +126,30 @@ class TestLayerNormFunction(unittest.TestCase):
def compute_v0(x): def compute_v0(x):
with fluid.dygraph.guard(p): with fluid.dygraph.guard(p):
ln = fluid.dygraph.LayerNorm(shape[1:]) ln = fluid.dygraph.LayerNorm(shape[1:])
y = ln(fluid.dygraph.to_variable(x)) y = ln(paddle.to_tensor(x))
return y.numpy() return y.numpy()
def compute_v1(x): def compute_v1(x):
with fluid.dygraph.guard(p): with fluid.dygraph.guard(p):
x = fluid.dygraph.to_variable(x) x = paddle.to_tensor(x)
y = paddle.nn.functional.layer_norm(x, shape[1:]) y = paddle.nn.functional.layer_norm(x, shape[1:])
return y.numpy() return y.numpy()
def compute_v2(x): def compute_v2(x):
with fluid.dygraph.guard(p): with fluid.dygraph.guard(p):
x = fluid.dygraph.to_variable(x) x = paddle.to_tensor(x)
y = paddle.nn.functional.layer_norm(x, tuple(shape[1:])) y = paddle.nn.functional.layer_norm(x, tuple(shape[1:]))
return y.numpy() return y.numpy()
def compute_v3(x): def compute_v3(x):
with fluid.dygraph.guard(p): with fluid.dygraph.guard(p):
ln = fluid.dygraph.LayerNorm(shape[-1]) ln = fluid.dygraph.LayerNorm(shape[-1])
y = ln(fluid.dygraph.to_variable(x)) y = ln(paddle.to_tensor(x))
return y.numpy() return y.numpy()
def compute_v4(x): def compute_v4(x):
with fluid.dygraph.guard(p): with fluid.dygraph.guard(p):
x = fluid.dygraph.to_variable(x) x = paddle.to_tensor(x)
y = paddle.nn.functional.layer_norm(x, shape[-1]) y = paddle.nn.functional.layer_norm(x, shape[-1])
return y.numpy() return y.numpy()
...@@ -139,4 +171,5 @@ class TestLayerNormFunction(unittest.TestCase): ...@@ -139,4 +171,5 @@ class TestLayerNormFunction(unittest.TestCase):
if __name__ == '__main__': if __name__ == '__main__':
paddle.enable_static()
unittest.main() unittest.main()
...@@ -318,7 +318,13 @@ def layer_norm(x, ...@@ -318,7 +318,13 @@ def layer_norm(x,
str_normalized_shape[ str_normalized_shape[
1:] + ', but got input shape ' + str(input_shape)) 1:] + ', but got input shape ' + str(input_shape))
if in_dynamic_mode(): if in_dygraph_mode():
pre_act, _, _, = _C_ops.final_state_layer_norm(x, weight, bias, epsilon,
begin_norm_axis, False)
return dygraph_utils._append_activation_in_dygraph(pre_act, act=None)
if _in_legacy_dygraph():
pre_act, _, _ = _C_ops.layer_norm(x, weight, bias, 'epsilon', epsilon, pre_act, _, _ = _C_ops.layer_norm(x, weight, bias, 'epsilon', epsilon,
'begin_norm_axis', begin_norm_axis) 'begin_norm_axis', begin_norm_axis)
return dygraph_utils._append_activation_in_dygraph(pre_act, act=None) return dygraph_utils._append_activation_in_dygraph(pre_act, act=None)
......
...@@ -1002,6 +1002,16 @@ ...@@ -1002,6 +1002,16 @@
optional : prior_dist optional : prior_dist
backward : label_smooth_grad backward : label_smooth_grad
- api : layer_norm
args : (Tensor x, Tensor scale, Tensor bias, float epsilon, int begin_norm_axis, bool is_test)
output : Tensor(out), Tensor(mean), Tensor(variance)
infer_meta :
func : LayerNormInferMeta
kernel :
func : layer_norm
backward : layer_norm_grad
optional : scale, bias
# leaky_relu # leaky_relu
- api : leaky_relu - api : leaky_relu
args : (Tensor x, float alpha) args : (Tensor x, float alpha)
......
...@@ -723,6 +723,18 @@ ...@@ -723,6 +723,18 @@
func : label_smooth_grad func : label_smooth_grad
optional : prior_dist optional : prior_dist
- backward_api : layer_norm_grad
forward : layer_norm (Tensor x, Tensor scale, Tensor bias, float epsilon, int begin_norm_axis, bool is_test) -> Tensor(out), Tensor(mean), Tensor(variance)
args : (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, Tensor out_grad, float epsilon, int begin_norm_axis, bool is_test)
output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad)
infer_meta :
func : LayerNormGradInferMeta
param : [x, scale, bias]
kernel :
func : layer_norm_grad
data_type : out_grad
optional : scale, bias
- backward_api : leaky_relu_grad - backward_api : leaky_relu_grad
forward : leaky_relu (Tensor x, float alpha) -> Tensor(out) forward : leaky_relu (Tensor x, float alpha) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float alpha) args : (Tensor x, Tensor out_grad, float alpha)
......
{ {
"phi_apis":["conj", "dropout", "expand_as", "nll_loss", "psroi_pool", "roi_align", "roi_pool", "label_smooth"], "phi_apis":["conj", "dropout", "expand_as", "nll_loss", "psroi_pool", "roi_align", "roi_pool", "label_smooth", "layer_norm"],
"phi_kernels":["equal_all"] "phi_kernels":["equal_all"]
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册