未验证 提交 57e097ac 编写于 作者: L LiYuRio 提交者: GitHub

[Fluid API] Move instance_norm, group_norm, data_norm from fluid to static (#48448)

* move instance_norm from fluid to static

* move group_norm, data_norm to static
上级 fa051eec
......@@ -73,8 +73,6 @@ __all__ = [
'pool2d',
'pool3d',
'batch_norm',
'instance_norm',
'data_norm',
'reduce_mean',
'reduce_all',
'reduce_any',
......@@ -88,7 +86,6 @@ __all__ = [
'row_conv',
'multiplex',
'layer_norm',
'group_norm',
'spectral_norm',
'smooth_l1',
'one_hot',
......@@ -2462,349 +2459,6 @@ def batch_norm(
return helper.append_activation(batch_norm_out)
def instance_norm(
input, epsilon=1e-05, param_attr=None, bias_attr=None, name=None
):
r"""
:api_attr: Static Graph
**Instance Normalization Layer**
Can be used as a normalizer function for convolution or fully_connected operations.
The required data format for this layer is one of the following:
DataLayout: NCHW `[batch, in_channels, in_height, in_width]`
Refer to `Instance Normalization: The Missing Ingredient for
Fast Stylization <https://arxiv.org/pdf/1607.08022.pdf>`_
for more details.
:math:`input` is the input features over a mini-batch.
.. math::
\\mu_{\\beta} &\\gets \\frac{1}{HW} \\sum_{i=1}^{HW} x_i \\qquad &//\\
\\ mean\ of\ one\ feature\ map\ in\ mini-batch \\\\
\\sigma_{\\beta}^{2} &\\gets \\frac{1}{HW} \\sum_{i=1}^{HW}(x_i - \\
\\mu_{\\beta})^2 \\qquad &//\ variance\ of\ one\ feature\ map\ in\ mini-batch \\\\
\\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
\\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift
Note:
`H` means height of feature map, `W` means width of feature map.
Args:
input(Tensor): The rank of input tensor can be 2, 3, 4, 5.
The data type is float32 or float64.
epsilon(float, Default 1e-05): A value added to the denominator for
numerical stability. Default is 1e-5.
param_attr(ParamAttr|None|bool, optional): The parameter attribute for Parameter `scale`
of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
will create ParamAttr as param_attr, the name of scale can be set in ParamAttr.
If the Initializer of the param_attr is not set, the parameter is initialized
with Xavier. If the param_attr is set to False, instance_norm will not create param_attr.
Default: None.
bias_attr(ParamAttr|None|bool, optional): The parameter attribute for the bias of instance_norm.
If it is set to None or one attribute of ParamAttr, instance_norm
will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
If the Initializer of the bias_attr is not set, the bias is initialized zero.
If the bias_attr is set to False, instance_norm will not create bias_attr.
Default: None.
name(string, Default None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
A Tensor which is the result after applying instance normalization on the input,
has same shape and data type with input.
Examples:
.. code-block:: python
import paddle
paddle.enable_static()
x = paddle.static.data(name='x', shape=[3, 7, 3, 7], dtype='float32')
hidden1 = paddle.static.nn.fc(x, size=200)
hidden2 = paddle.static.nn.instance_norm(hidden1)
"""
check_variable_and_dtype(
input, 'input', ['float32', 'float64'], 'instance_norm'
)
if param_attr is False:
assert (
bias_attr is False
), "param_attr and bias_attr must be set to False at the same time in instance_norm"
helper = LayerHelper('instance_norm', **locals())
dtype = helper.input_dtype()
# use fp32 for in parameter
if dtype == core.VarDesc.VarType.FP16:
dtype = core.VarDesc.VarType.FP32
input_shape = input.shape
if len(input.shape) < 2 or len(input.shape) > 5:
raise ValueError(
'expected 2D or 3D or 4D or 5D input (got {}D input, input shape is: {})'.format(
len(input.shape), input_shape
)
)
channel_num = input_shape[1]
param_shape = [channel_num]
if param_attr != False and bias_attr != False:
# create parameter
scale = helper.create_parameter(
attr=helper.param_attr,
shape=param_shape,
dtype=dtype,
default_initializer=Constant(1.0),
)
bias = helper.create_parameter(
attr=helper.bias_attr,
shape=param_shape,
dtype=dtype,
is_bias=True,
default_initializer=Constant(0.0),
)
# create output
saved_mean = helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True
)
saved_variance = helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True
)
instance_norm_out = helper.create_variable_for_type_inference(dtype)
inputs = {"X": input}
if param_attr != False and bias_attr != False:
inputs["Scale"] = scale
inputs["Bias"] = bias
helper.append_op(
type="instance_norm",
inputs=inputs,
outputs={
"Y": instance_norm_out,
"SavedMean": saved_mean,
"SavedVariance": saved_variance,
},
attrs={
"epsilon": epsilon,
},
)
return instance_norm_out
@static_only
def data_norm(
input,
act=None,
epsilon=1e-05,
param_attr=None,
data_layout='NCHW',
in_place=False,
name=None,
moving_mean_name=None,
moving_variance_name=None,
do_model_average_for_mean_and_var=True,
slot_dim=-1,
sync_stats=False,
summary_decay_rate=0.9999999,
enable_scale_and_shift=False,
):
r"""
:api_attr: Static Graph
**Data Normalization Layer**
This op can be used as a normalizer function for conv2d and fully_connected operations.
The required data format for this layer is one of the following:
1. NHWC `[batch, in_height, in_width, in_channels]`
2. NCHW `[batch, in_channels, in_height, in_width]`
:math:`input` is the input features over a mini-batch.
.. math::
\\mu_{\\beta} &\\gets \\frac{1}{m} \\sum_{i=1}^{m} x_i \\qquad &//\\
\ mini-batch\ mean \\\\
\\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\
\\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\
\\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
\\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift
Args:
input(Tensor): The input Tensor.
act(string, Default None): Activation type, linear|relu|prelu|...
epsilon(float, Default 1e-05):
param_attr(ParamAttr): The parameter attribute for Parameter `scale`.
data_layout (str, optional): Specify the data format of the input, and the data format of the output
will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
in_place(bool, Default False): Make the input and output of batch norm reuse memory.
name(string, Default None): A name for this layer(optional). If set None, the layer
will be named automatically.
moving_mean_name(string, Default None): The name of moving_mean which store the global Mean.
moving_variance_name(string, Default None): The name of the moving_variance which store the global Variance.
do_model_average_for_mean_and_var(bool, Default True): Whether parameter mean and variance
should do model average when model average is enabled.
slot_dim(int): The embedding dimension of one slot. Slot is a set of one specific feature. In pslib mode, we
distinguish feature ids by slot and pull their embeddings from parameter server (pslib). The first
place of the embedding is the historical show number (occurence time of this feature id with a label 0).
If the input of this op is concated by slot-wise embeddings, and the show number is zero when this slot
is new or empty, the normalization result may be impractical. To avoid this, we add slot_dim to locate
the show number and judge if the show number is zero. If so, we choose to skip normalization on this
embedding.
sync_stats(bool, Default False): When running with multiple GPU cards, using allreduce to sync the
summary messages.
summary_decay_rate(float, Default 0.9999999): The decay rate when updating summary.
enable_scale_and_shift(bool, Default False): do scale&shift after normalization.
Returns:
Tensor: A tensor which is the result after applying data normalization on the input.
Examples:
.. code-block:: python
import paddle
paddle.enable_static()
x = paddle.randn(shape=[32,100])
hidden2 = paddle.static.nn.data_norm(input=x)
"""
helper = LayerHelper('data_norm', **locals())
dtype = helper.input_dtype()
input_shape = input.shape
if data_layout == 'NCHW':
channel_num = input_shape[1]
else:
if data_layout == 'NHWC':
channel_num = input_shape[-1]
else:
raise ValueError("unsupported data layout:" + data_layout)
param_shape = [channel_num]
batch_size_default = 1e4
batch_sum_default = 0.0
batch_square_sum_default = 1e4
scale_w_default = 1.0
bias_default = 0.0
if param_attr and isinstance(param_attr, dict):
batch_size_default = param_attr.get("batch_size", 1e4)
batch_sum_default = param_attr.get("batch_sum", 0.0)
batch_square_sum_default = param_attr.get("batch_square", 1e4)
if enable_scale_and_shift:
scale_w_default = param_attr.get("scale_w", 1.0)
bias_default = param_attr.get("bias", 0.0)
# create scale and shift(bias) when enable_scale_and_shift is True
if name is None:
name = "dn"
if enable_scale_and_shift:
scale_w = helper.create_parameter(
attr=ParamAttr(
name=name + '.scale_w',
initializer=Constant(value=float(scale_w_default)),
trainable=True,
),
shape=param_shape,
dtype=input.dtype,
)
bias = helper.create_parameter(
attr=ParamAttr(
name=name + '.bias',
initializer=Constant(value=float(bias_default)),
trainable=True,
),
shape=param_shape,
dtype=input.dtype,
)
# create parameter
batch_size = helper.create_parameter(
attr=ParamAttr(
name=name + '.batch_size',
initializer=Constant(value=float(batch_size_default)),
trainable=True,
),
shape=param_shape,
dtype=input.dtype,
)
batch_sum = helper.create_parameter(
attr=ParamAttr(
name=name + '.batch_sum',
initializer=Constant(value=float(batch_sum_default)),
trainable=True,
),
shape=param_shape,
dtype=input.dtype,
)
batch_square_sum = helper.create_parameter(
attr=ParamAttr(
name=name + '.batch_square_sum',
initializer=Constant(value=float(batch_square_sum_default)),
trainable=True,
),
shape=param_shape,
dtype=input.dtype,
)
means = helper.create_variable(dtype=dtype, stop_gradient=True)
scales = helper.create_variable(dtype=dtype, stop_gradient=True)
data_norm_out = input if in_place else helper.create_variable(dtype=dtype)
inputs = {
"X": input,
"BatchSize": batch_size,
"BatchSum": batch_sum,
"BatchSquareSum": batch_square_sum,
}
attrs = {
"epsilon": epsilon,
"data_layout": data_layout,
"sync_stats": sync_stats,
"summary_decay_rate": summary_decay_rate,
}
if slot_dim > 0:
attrs["slot_dim"] = slot_dim
if enable_scale_and_shift:
attrs["enable_scale_and_shift"] = enable_scale_and_shift
if enable_scale_and_shift:
inputs["scale_w"] = scale_w
inputs["bias"] = bias
helper.append_op(
type="data_norm",
inputs=inputs,
outputs={
"Y": data_norm_out,
"Means": means,
"Scales": scales,
"BatchSize": batch_size,
"BatchSum": batch_sum,
"BatchSquareSum": batch_square_sum,
},
attrs=attrs,
)
return helper.append_activation(data_norm_out)
@templatedoc()
def layer_norm(
input,
......@@ -2941,116 +2595,6 @@ def layer_norm(
return helper.append_activation(layer_norm_out)
@templatedoc()
def group_norm(
input,
groups,
epsilon=1e-05,
param_attr=None,
bias_attr=None,
act=None,
data_layout='NCHW',
name=None,
):
"""
:api_attr: Static Graph
**Group Normalization Layer**
Refer to `Group Normalization <https://arxiv.org/abs/1803.08494>`_ .
Parameters:
input(Tensor): Tensor with dimension greater than 1, the data type is float32 or float64.
groups(int): The number of groups that divided from channels, the data type
is int32.
epsilon(float, optional): The small value added to the variance to prevent
division by zero, the data type is float32. Default: 1e-05.
param_attr(ParamAttr|bool, optional): ParamAttr object that specifies weight parameter
attribute. If a bool type, only False is supported, which means there is no weight parameter.
Default: None, the default weight parameter attribute is used. For more information, please
refer to :ref:`api_guide_ParamAttr` .
bias_attr(ParamAttr|bool, optional): ParamAttr object that specifies bias parameter
attribute. If a bool type, only False is supported, which means there is no bias parameter.
Default: None, the default bias parameter attribute is used. For more information, please
refer to :ref:`api_guide_ParamAttr` .
act(str, optional): Activation to be applied to the output of group normalization.
data_layout(str, optional): Specify the data format of the input, and the data format of the output
will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, *]`.
name (str, optional): The default value is None. Normally there is no need for user to set this
property. For more information, please refer to :ref:`api_guide_Name` .
Returns:
Tensor: A Tensor has same data type and data format with `input`.
Examples:
.. code-block:: python
import paddle
paddle.enable_static()
data = paddle.static.data(name='data', shape=[2, 8, 32, 32], dtype='float32')
x = paddle.static.nn.group_norm(input=data, groups=4)
print(x.shape) # [2, 8, 32, 32]
"""
helper = LayerHelper('group_norm', **locals())
dtype = helper.input_dtype()
check_variable_and_dtype(
input, 'input', ['float32', 'float64'], 'group_norm'
)
# create intput and parameters
inputs = {'X': input}
input_shape = input.shape
if len(input_shape) < 2:
raise ValueError(
f"The dimensions of Op(fluid.layers.group_norm)'s input should be more than 1. But received {len(input_shape)}"
)
if data_layout != 'NCHW' and data_layout != 'NHWC':
raise ValueError(
"Param(data_layout) of Op(fluid.layers.group_norm) got wrong value: received "
+ data_layout
+ " but only NCHW or NHWC supported."
)
channel_num = input_shape[1] if data_layout == 'NCHW' else input_shape[-1]
param_shape = [channel_num]
if param_attr:
scale = helper.create_parameter(
attr=helper.param_attr,
shape=param_shape,
dtype=dtype,
default_initializer=Constant(1.0),
)
inputs['Scale'] = scale
if bias_attr:
bias = helper.create_parameter(
attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True
)
inputs['Bias'] = bias
# create output
mean_out = helper.create_variable(dtype=dtype, stop_gradient=True)
variance_out = helper.create_variable(dtype=dtype, stop_gradient=True)
group_norm_out = helper.create_variable(dtype=dtype)
helper.append_op(
type="group_norm",
inputs=inputs,
outputs={
"Y": group_norm_out,
"Mean": mean_out,
"Variance": variance_out,
},
attrs={
"epsilon": epsilon,
"groups": groups,
"data_layout": data_layout,
},
)
return helper.append_activation(group_norm_out)
@templatedoc()
def spectral_norm(weight, dim=0, power_iters=1, eps=1e-12, name=None):
r"""
......
......@@ -63,7 +63,7 @@ class TestBase(IPUOpTest):
)
scale = paddle.ParamAttr(trainable=True)
bias = paddle.ParamAttr(trainable=True)
out = paddle.fluid.layers.nn.group_norm(
out = paddle.static.nn.group_norm(
conv1, param_attr=scale, bias_attr=bias, **self.attrs
)
loss = paddle.mean(out)
......@@ -71,7 +71,7 @@ class TestBase(IPUOpTest):
adam.minimize(loss)
self.fetch_list = [loss.name]
else:
out = paddle.fluid.layers.nn.group_norm(
out = paddle.static.nn.group_norm(
x, param_attr=True, bias_attr=True, **self.attrs
)
self.fetch_list = [out.name]
......
......@@ -60,7 +60,7 @@ class TestBase(IPUOpTest):
)
scale = paddle.ParamAttr(trainable=True)
bias = paddle.ParamAttr(trainable=True)
out = paddle.fluid.layers.nn.instance_norm(
out = paddle.static.nn.instance_norm(
conv1, param_attr=scale, bias_attr=bias, **self.attrs
)
loss = paddle.mean(out)
......@@ -68,7 +68,7 @@ class TestBase(IPUOpTest):
adam.minimize(loss)
self.fetch_list = [loss.name]
else:
out = paddle.fluid.layers.nn.instance_norm(
out = paddle.static.nn.instance_norm(
x, param_attr=True, bias_attr=True, **self.attrs
)
self.fetch_list = [out.name]
......
......@@ -20,6 +20,7 @@ import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import AnalysisConfig, PassVersionChecker
......@@ -43,7 +44,7 @@ class TRTInstanceNormTest(InferencePassTest):
with fluid.program_guard(self.main_program, self.startup_program):
shape = [-1, self.channel, self.height, self.width]
data = fluid.data(name='in', shape=shape, dtype='float32')
instance_norm_out = fluid.layers.instance_norm(data)
instance_norm_out = paddle.static.nn.instance_norm(data)
out = fluid.layers.batch_norm(instance_norm_out, is_test=True)
shape[0] = self.bs
......
......@@ -186,7 +186,7 @@ class TensorRTSubgraphPassInstanceNormTest(InferencePassTest):
name='instance_norm_b',
initializer=fluid.initializer.Constant(value=0.0),
)
out = fluid.layers.instance_norm(
out = paddle.static.nn.instance_norm(
input=data, param_attr=param_attr, bias_attr=bias_attr
)
self.feeds = {
......
......@@ -52,7 +52,7 @@ class TestGroupNormOpError(unittest.TestCase):
def test_x_type():
input = np.random.random(2, 100, 3, 5).astype('float32')
groups = 2
fluid.layers.group_norm(input, groups)
paddle.static.nn.group_norm(input, groups)
self.assertRaises(TypeError, test_x_type)
......@@ -61,7 +61,7 @@ class TestGroupNormOpError(unittest.TestCase):
name='x2', shape=[2, 100, 3, 5], dtype='int32'
)
groups = 2
fluid.layers.group_norm(x2, groups)
paddle.static.nn.group_norm(x2, groups)
self.assertRaises(TypeError, test_x_dtype)
......@@ -219,7 +219,7 @@ class TestGroupNormException(unittest.TestCase):
data = fluid.data(name='data', shape=[None, 3, 3, 4], dtype="float64")
def attr_data_format():
out = fluid.layers.group_norm(
out = paddle.static.nn.group_norm(
input=data, groups=2, data_layout="NDHW"
)
......
......@@ -15,6 +15,7 @@
import unittest
import numpy as np
import paddle
import paddle.fluid.core as core
from paddle.fluid.op import Operator
import paddle.fluid as fluid
......@@ -517,7 +518,7 @@ class TestDataNormOpErrorr(unittest.TestCase):
with program_guard(Program(), Program()):
x2 = fluid.layers.data(name='x2', shape=[3, 4], dtype="int32")
# self.assertRaises(TypeError, fluid.data_norm, x2)
fluid.layers.data_norm(
paddle.static.nn.data_norm(
input=x2, param_attr={}, enable_scale_and_shift=True
)
......
......@@ -89,7 +89,7 @@ class TestPSPassWithBow(unittest.TestCase):
# vsum
q_sum = fluid.layers.sequence_pool(input=q_emb, pool_type='sum')
q_ss = paddle.nn.functional.softsign(q_sum)
q_ss = fluid.layers.data_norm(input=q_ss)
q_ss = paddle.static.nn.data_norm(input=q_ss)
# fc layer after conv
q_fc = fluid.layers.fc(
input=q_ss,
......
......@@ -32,6 +32,7 @@ class TestFleet1(unittest.TestCase):
def test_pslib_1(self):
"""Test cases for pslib."""
import paddle
import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker
......@@ -66,7 +67,9 @@ class TestFleet1(unittest.TestCase):
param_attr=fluid.ParamAttr(name="embedding"),
)
bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
bow = fluid.layers.data_norm(input=bow, epsilon=1e-4, name="norm")
bow = paddle.static.nn.data_norm(
input=bow, epsilon=1e-4, name="norm"
)
fc = fluid.layers.fc(input=bow, size=1, act=None)
label = fluid.layers.data(
name="click",
......
......@@ -15,6 +15,7 @@
import unittest
import numpy as np
import paddle
import paddle.fluid.core as core
import paddle.fluid as fluid
from op_test import OpTest, skip_check_grad_ci
......@@ -46,7 +47,7 @@ class TestGroupNormOpError(unittest.TestCase):
def test_x_type():
input = np.random.random(2, 100, 3, 5).astype('float32')
groups = 2
fluid.layers.group_norm(input, groups)
paddle.static.nn.group_norm(input, groups)
self.assertRaises(TypeError, test_x_type)
......@@ -55,7 +56,7 @@ class TestGroupNormOpError(unittest.TestCase):
name='x2', shape=[2, 100, 3, 5], dtype='int32'
)
groups = 2
fluid.layers.group_norm(x2, groups)
paddle.static.nn.group_norm(x2, groups)
self.assertRaises(TypeError, test_x_dtype)
......@@ -245,11 +246,11 @@ class TestGroupNormOpLargeData_With_NHWC(TestGroupNormOp):
class TestGroupNormAPI_With_NHWC(unittest.TestCase):
def test_case1(self):
data1 = fluid.data(name='data1', shape=[None, 3, 3, 4], dtype='float64')
out1 = fluid.layers.group_norm(
out1 = paddle.static.nn.group_norm(
input=data1, groups=2, data_layout="NHWC"
)
data2 = fluid.data(name='data2', shape=[None, 4, 3, 3], dtype='float64')
out2 = fluid.layers.group_norm(
out2 = paddle.static.nn.group_norm(
input=data2, groups=2, data_layout="NCHW"
)
......@@ -282,7 +283,7 @@ class TestGroupNormException(unittest.TestCase):
data = fluid.data(name='data', shape=[None, 3, 3, 4], dtype="float64")
def attr_data_format():
out = fluid.layers.group_norm(
out = paddle.static.nn.group_norm(
input=data, groups=2, data_layout="NDHW"
)
......
......@@ -636,7 +636,7 @@ class TestRaiseNoDoubleGradOp(TestCase):
with fluid.dygraph.guard():
x = fluid.layers.ones(shape=[2, 3, 2, 2], dtype='float32')
x.stop_gradient = False
y = paddle.fluid.layers.group_norm(x, groups=1)
y = paddle.static.nn.group_norm(x, groups=1)
dx = fluid.dygraph.grad(
outputs=[y], inputs=[x], create_graph=True, retain_graph=True
......
......@@ -123,8 +123,12 @@ class TestDygraphLoadStatic(unittest.TestCase):
groupnorm_in = fluid.data(
name='groupnorm_in', shape=[None, 8, 32, 32], dtype='float32'
)
groupnorm_out1 = fluid.layers.group_norm(input=groupnorm_in, groups=4)
groupnorm_out2 = fluid.layers.group_norm(input=groupnorm_in, groups=4)
groupnorm_out1 = paddle.static.nn.group_norm(
input=groupnorm_in, groups=4
)
groupnorm_out2 = paddle.static.nn.group_norm(
input=groupnorm_in, groups=4
)
'''
spec_norm = fluid.data(name='spec_norm', shape=[2, 8, 32, 32], dtype='float32')
spe_norm_out_1 = fluid.layers.spectral_norm(weight=spec_norm, dim=1, power_iters=2)
......
......@@ -120,7 +120,7 @@ class InstanceNorm(fluid.dygraph.Layer):
)
return out
else:
return fluid.layers.instance_norm(
return paddle.static.nn.instance_norm(
input,
epsilon=self.epsilon,
param_attr=fluid.ParamAttr(self.scale.name),
......
......@@ -239,11 +239,11 @@ class TestInstanceNormOpError(unittest.TestCase):
x1 = fluid.create_lod_tensor(
np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()
)
self.assertRaises(TypeError, fluid.layers.instance_norm, x1)
self.assertRaises(TypeError, paddle.static.nn.instance_norm, x1)
# the input dtype of instance_norm must be float32 or float64
x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="int32")
self.assertRaises(TypeError, fluid.layers.instance_norm, x2)
self.assertRaises(TypeError, paddle.static.nn.instance_norm, x2)
class TestInstanceNormOpErrorCase1(unittest.TestCase):
......
......@@ -1880,7 +1880,7 @@ class TestLayer(LayerTest):
lod_level=1,
append_batch_size=False,
)
ret = layers.group_norm(
ret = paddle.static.nn.group_norm(
input=X,
groups=2,
param_attr=fluid.initializer.Uniform(low=-0.5, high=0.5),
......@@ -1953,7 +1953,7 @@ class TestLayer(LayerTest):
X = fluid.layers.data(
name='X', shape=shape, dtype='float32', append_batch_size=False
)
ret = layers.instance_norm(input=X)
ret = paddle.static.nn.instance_norm(input=X)
static_ret = self.get_static_graph_result(
feed={'X': input}, fetch_list=[ret]
)[0]
......
......@@ -35,7 +35,7 @@ class TestInstanceNormDoubleGradCheck(unittest.TestCase):
eps = 0.005
atol = 1e-4
x = layers.create_parameter(dtype=dtype, shape=shape, name='x')
z = fluid.layers.instance_norm(input=x)
z = paddle.static.nn.instance_norm(input=x)
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
gradient_checker.double_grad_check(
[x], z, x_init=x_arr, atol=atol, place=place, eps=eps
......@@ -63,7 +63,7 @@ class TestInstanceNormDoubleGradCheckWithoutParamBias(
eps = 0.005
atol = 1e-4
x = layers.create_parameter(dtype=dtype, shape=shape, name='x')
z = fluid.layers.instance_norm(
z = paddle.static.nn.instance_norm(
input=x, param_attr=False, bias_attr=False
)
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
......
......@@ -13,6 +13,9 @@
# limitations under the License.
from .common import fc # noqa: F401
from .common import instance_norm # noqa: F401
from .common import data_norm # noqa: F401
from .common import group_norm # noqa: F401
from .common import deform_conv2d # noqa: F401
from .common import conv3d # noqa: F401
from .common import conv2d_transpose # noqa: F401
......@@ -25,9 +28,6 @@ from ...fluid.layers import cond # noqa: F401
from ...fluid.layers import conv2d # noqa: F401
from ...fluid.layers import create_parameter # noqa: F401
from ...fluid.layers import crf_decoding # noqa: F401
from ...fluid.layers import data_norm # noqa: F401
from ...fluid.layers import group_norm # noqa: F401
from ...fluid.layers import instance_norm # noqa: F401
from ...fluid.layers import layer_norm # noqa: F401
from ...fluid.layers import multi_box_head # noqa: F401
from .loss import nce # noqa: F401
......
......@@ -13,8 +13,10 @@
# limitations under the License.
import paddle
from paddle.fluid.initializer import Normal
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal, Constant
from paddle.fluid.framework import static_only, Variable, _non_static_mode
from paddle.fluid.layers.layer_function_generator import templatedoc
from paddle.fluid.data_feeder import check_dtype
......@@ -177,6 +179,459 @@ def fc(
)
def instance_norm(
input, epsilon=1e-05, param_attr=None, bias_attr=None, name=None
):
r"""
:api_attr: Static Graph
**Instance Normalization Layer**
Can be used as a normalizer function for convolution or fully_connected operations.
The required data format for this layer is one of the following:
DataLayout: NCHW `[batch, in_channels, in_height, in_width]`
Refer to `Instance Normalization: The Missing Ingredient for
Fast Stylization <https://arxiv.org/pdf/1607.08022.pdf>`_
for more details.
:math:`input` is the input features over a mini-batch.
.. math::
\\mu_{\\beta} &\\gets \\frac{1}{HW} \\sum_{i=1}^{HW} x_i \\qquad &//\\
\\ mean\ of\ one\ feature\ map\ in\ mini-batch \\\\
\\sigma_{\\beta}^{2} &\\gets \\frac{1}{HW} \\sum_{i=1}^{HW}(x_i - \\
\\mu_{\\beta})^2 \\qquad &//\ variance\ of\ one\ feature\ map\ in\ mini-batch \\\\
\\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
\\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift
Note:
`H` means height of feature map, `W` means width of feature map.
Args:
input(Tensor): The rank of input tensor can be 2, 3, 4, 5.
The data type is float32 or float64.
epsilon(float, Default 1e-05): A value added to the denominator for
numerical stability. Default is 1e-5.
param_attr(ParamAttr|None|bool, optional): The parameter attribute for Parameter `scale`
of instance_norm. If it is set to None or one attribute of ParamAttr, instance_norm
will create ParamAttr as param_attr, the name of scale can be set in ParamAttr.
If the Initializer of the param_attr is not set, the parameter is initialized
with Xavier. If the param_attr is set to False, instance_norm will not create param_attr.
Default: None.
bias_attr(ParamAttr|None|bool, optional): The parameter attribute for the bias of instance_norm.
If it is set to None or one attribute of ParamAttr, instance_norm
will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
If the Initializer of the bias_attr is not set, the bias is initialized zero.
If the bias_attr is set to False, instance_norm will not create bias_attr.
Default: None.
name(string, Default None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
A Tensor which is the result after applying instance normalization on the input,
has same shape and data type with input.
Examples:
.. code-block:: python
import paddle
paddle.enable_static()
x = paddle.static.data(name='x', shape=[3, 7, 3, 7], dtype='float32')
hidden1 = paddle.static.nn.fc(x, size=200)
hidden2 = paddle.static.nn.instance_norm(hidden1)
"""
check_variable_and_dtype(
input, 'input', ['float32', 'float64'], 'instance_norm'
)
if param_attr is False:
assert (
bias_attr is False
), "param_attr and bias_attr must be set to False at the same time in instance_norm"
helper = LayerHelper('instance_norm', **locals())
dtype = helper.input_dtype()
# use fp32 for in parameter
if dtype == paddle.framework.core.VarDesc.VarType.FP16:
dtype = paddle.framework.core.VarDesc.VarType.FP32
input_shape = input.shape
if len(input.shape) < 2 or len(input.shape) > 5:
raise ValueError(
'expected 2D or 3D or 4D or 5D input (got {}D input, input shape is: {})'.format(
len(input.shape), input_shape
)
)
channel_num = input_shape[1]
param_shape = [channel_num]
if param_attr and bias_attr:
# create parameter
scale = helper.create_parameter(
attr=helper.param_attr,
shape=param_shape,
dtype=dtype,
default_initializer=Constant(1.0),
)
bias = helper.create_parameter(
attr=helper.bias_attr,
shape=param_shape,
dtype=dtype,
is_bias=True,
default_initializer=Constant(0.0),
)
# create output
saved_mean = helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True
)
saved_variance = helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True
)
instance_norm_out = helper.create_variable_for_type_inference(dtype)
inputs = {"X": input}
if param_attr and bias_attr:
inputs["Scale"] = scale
inputs["Bias"] = bias
helper.append_op(
type="instance_norm",
inputs=inputs,
outputs={
"Y": instance_norm_out,
"SavedMean": saved_mean,
"SavedVariance": saved_variance,
},
attrs={
"epsilon": epsilon,
},
)
return instance_norm_out
@static_only
def data_norm(
input,
act=None,
epsilon=1e-05,
param_attr=None,
data_layout='NCHW',
in_place=False,
name=None,
moving_mean_name=None,
moving_variance_name=None,
do_model_average_for_mean_and_var=True,
slot_dim=-1,
sync_stats=False,
summary_decay_rate=0.9999999,
enable_scale_and_shift=False,
):
r"""
:api_attr: Static Graph
**Data Normalization Layer**
This op can be used as a normalizer function for conv2d and fully_connected operations.
The required data format for this layer is one of the following:
1. NHWC `[batch, in_height, in_width, in_channels]`
2. NCHW `[batch, in_channels, in_height, in_width]`
:math:`input` is the input features over a mini-batch.
.. math::
\\mu_{\\beta} &\\gets \\frac{1}{m} \\sum_{i=1}^{m} x_i \\qquad &//\\
\ mini-batch\ mean \\\\
\\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\
\\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\
\\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
\\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift
Args:
input(Tensor): The input Tensor.
act(string, Default None): Activation type, linear|relu|prelu|...
epsilon(float, Default 1e-05):
param_attr(ParamAttr): The parameter attribute for Parameter `scale`.
data_layout (str, optional): Specify the data format of the input, and the data format of the output
will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
in_place(bool, Default False): Make the input and output of batch norm reuse memory.
name(string, Default None): A name for this layer(optional). If set None, the layer
will be named automatically.
moving_mean_name(string, Default None): The name of moving_mean which store the global Mean.
moving_variance_name(string, Default None): The name of the moving_variance which store the global Variance.
do_model_average_for_mean_and_var(bool, Default True): Whether parameter mean and variance
should do model average when model average is enabled.
slot_dim(int): The embedding dimension of one slot. Slot is a set of one specific feature. In pslib mode, we
distinguish feature ids by slot and pull their embeddings from parameter server (pslib). The first
place of the embedding is the historical show number (occurence time of this feature id with a label 0).
If the input of this op is concated by slot-wise embeddings, and the show number is zero when this slot
is new or empty, the normalization result may be impractical. To avoid this, we add slot_dim to locate
the show number and judge if the show number is zero. If so, we choose to skip normalization on this
embedding.
sync_stats(bool, Default False): When running with multiple GPU cards, using allreduce to sync the
summary messages.
summary_decay_rate(float, Default 0.9999999): The decay rate when updating summary.
enable_scale_and_shift(bool, Default False): do scale&shift after normalization.
Returns:
Tensor: A tensor which is the result after applying data normalization on the input.
Examples:
.. code-block:: python
import paddle
paddle.enable_static()
x = paddle.randn(shape=[32,100])
hidden2 = paddle.static.nn.data_norm(input=x)
"""
helper = LayerHelper('data_norm', **locals())
dtype = helper.input_dtype()
input_shape = input.shape
if data_layout == 'NCHW':
channel_num = input_shape[1]
else:
if data_layout == 'NHWC':
channel_num = input_shape[-1]
else:
raise ValueError("unsupported data layout:" + data_layout)
param_shape = [channel_num]
batch_size_default = 1e4
batch_sum_default = 0.0
batch_square_sum_default = 1e4
scale_w_default = 1.0
bias_default = 0.0
if param_attr and isinstance(param_attr, dict):
batch_size_default = param_attr.get("batch_size", 1e4)
batch_sum_default = param_attr.get("batch_sum", 0.0)
batch_square_sum_default = param_attr.get("batch_square", 1e4)
if enable_scale_and_shift:
scale_w_default = param_attr.get("scale_w", 1.0)
bias_default = param_attr.get("bias", 0.0)
# create scale and shift(bias) when enable_scale_and_shift is True
if name is None:
name = "dn"
if enable_scale_and_shift:
scale_w = helper.create_parameter(
attr=ParamAttr(
name=name + '.scale_w',
initializer=Constant(value=float(scale_w_default)),
trainable=True,
),
shape=param_shape,
dtype=input.dtype,
)
bias = helper.create_parameter(
attr=ParamAttr(
name=name + '.bias',
initializer=Constant(value=float(bias_default)),
trainable=True,
),
shape=param_shape,
dtype=input.dtype,
)
# create parameter
batch_size = helper.create_parameter(
attr=ParamAttr(
name=name + '.batch_size',
initializer=Constant(value=float(batch_size_default)),
trainable=True,
),
shape=param_shape,
dtype=input.dtype,
)
batch_sum = helper.create_parameter(
attr=ParamAttr(
name=name + '.batch_sum',
initializer=Constant(value=float(batch_sum_default)),
trainable=True,
),
shape=param_shape,
dtype=input.dtype,
)
batch_square_sum = helper.create_parameter(
attr=ParamAttr(
name=name + '.batch_square_sum',
initializer=Constant(value=float(batch_square_sum_default)),
trainable=True,
),
shape=param_shape,
dtype=input.dtype,
)
means = helper.create_variable(dtype=dtype, stop_gradient=True)
scales = helper.create_variable(dtype=dtype, stop_gradient=True)
data_norm_out = input if in_place else helper.create_variable(dtype=dtype)
inputs = {
"X": input,
"BatchSize": batch_size,
"BatchSum": batch_sum,
"BatchSquareSum": batch_square_sum,
}
attrs = {
"epsilon": epsilon,
"data_layout": data_layout,
"sync_stats": sync_stats,
"summary_decay_rate": summary_decay_rate,
}
if slot_dim > 0:
attrs["slot_dim"] = slot_dim
if enable_scale_and_shift:
attrs["enable_scale_and_shift"] = enable_scale_and_shift
if enable_scale_and_shift:
inputs["scale_w"] = scale_w
inputs["bias"] = bias
helper.append_op(
type="data_norm",
inputs=inputs,
outputs={
"Y": data_norm_out,
"Means": means,
"Scales": scales,
"BatchSize": batch_size,
"BatchSum": batch_sum,
"BatchSquareSum": batch_square_sum,
},
attrs=attrs,
)
return helper.append_activation(data_norm_out)
@templatedoc()
def group_norm(
input,
groups,
epsilon=1e-05,
param_attr=None,
bias_attr=None,
act=None,
data_layout='NCHW',
name=None,
):
"""
:api_attr: Static Graph
**Group Normalization Layer**
Refer to `Group Normalization <https://arxiv.org/abs/1803.08494>`_ .
Parameters:
input(Tensor): Tensor with dimension greater than 1, the data type is float32 or float64.
groups(int): The number of groups that divided from channels, the data type
is int32.
epsilon(float, optional): The small value added to the variance to prevent
division by zero, the data type is float32. Default: 1e-05.
param_attr(ParamAttr|bool, optional): ParamAttr object that specifies weight parameter
attribute. If a bool type, only False is supported, which means there is no weight parameter.
Default: None, the default weight parameter attribute is used. For more information, please
refer to :ref:`api_guide_ParamAttr` .
bias_attr(ParamAttr|bool, optional): ParamAttr object that specifies bias parameter
attribute. If a bool type, only False is supported, which means there is no bias parameter.
Default: None, the default bias parameter attribute is used. For more information, please
refer to :ref:`api_guide_ParamAttr` .
act(str, optional): Activation to be applied to the output of group normalization.
data_layout(str, optional): Specify the data format of the input, and the data format of the output
will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, *]`.
name (str, optional): The default value is None. Normally there is no need for user to set this
property. For more information, please refer to :ref:`api_guide_Name` .
Returns:
Tensor: A Tensor has same data type and data format with `input`.
Examples:
.. code-block:: python
import paddle
paddle.enable_static()
data = paddle.static.data(name='data', shape=[2, 8, 32, 32], dtype='float32')
x = paddle.static.nn.group_norm(input=data, groups=4)
print(x.shape) # [2, 8, 32, 32]
"""
helper = LayerHelper('group_norm', **locals())
dtype = helper.input_dtype()
check_variable_and_dtype(
input, 'input', ['float32', 'float64'], 'group_norm'
)
# create intput and parameters
inputs = {'X': input}
input_shape = input.shape
if len(input_shape) < 2:
raise ValueError(
f"The dimensions of Op(static.nn.group_norm)'s input should be more than 1. But received {len(input_shape)}"
)
if data_layout != 'NCHW' and data_layout != 'NHWC':
raise ValueError(
"Param(data_layout) of Op(static.nn.group_norm) got wrong value: received "
+ data_layout
+ " but only NCHW or NHWC supported."
)
channel_num = input_shape[1] if data_layout == 'NCHW' else input_shape[-1]
param_shape = [channel_num]
if param_attr:
scale = helper.create_parameter(
attr=helper.param_attr,
shape=param_shape,
dtype=dtype,
default_initializer=Constant(1.0),
)
inputs['Scale'] = scale
if bias_attr:
bias = helper.create_parameter(
attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True
)
inputs['Bias'] = bias
# create output
mean_out = helper.create_variable(dtype=dtype, stop_gradient=True)
variance_out = helper.create_variable(dtype=dtype, stop_gradient=True)
group_norm_out = helper.create_variable(dtype=dtype)
helper.append_op(
type="group_norm",
inputs=inputs,
outputs={
"Y": group_norm_out,
"Mean": mean_out,
"Variance": variance_out,
},
attrs={
"epsilon": epsilon,
"groups": groups,
"data_layout": data_layout,
},
)
return helper.append_activation(group_norm_out)
def conv3d(
input,
num_filters,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册