From c7b5ac4bb6aaf94adef0406bec8605ff3e8c30b6 Mon Sep 17 00:00:00 2001 From: zhang wenhui Date: Wed, 2 Sep 2020 17:23:56 +0800 Subject: [PATCH] fix norm bug, test=develop (#26827) * fix norm bug, test=develop * fix norm bug, test=develop * fix norm bug, test=develop * fix norm bug, test=develop * fix norm bug, test=develop --- .../tests/unittests/test_batch_norm_op_v2.py | 18 ++++ .../tests/unittests/test_group_norm_op_v2.py | 19 +++-- .../unittests/test_instance_norm_op_v2.py | 6 ++ python/paddle/nn/functional/norm.py | 21 +++-- python/paddle/nn/layer/norm.py | 84 +++++++++++++------ 5 files changed, 113 insertions(+), 35 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py index 5c705378e5..2af0b31d6f 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py @@ -43,6 +43,21 @@ class TestBatchNorm(unittest.TestCase): x_data_4 = np.random.random(size=(2, 1, 3, 3)).astype('float32') x_data_3 = np.random.random(size=(2, 1, 3)).astype('float32') + def error1d_dataformat(): + x_data_4 = np.random.random(size=(2, 1, 3, 3)).astype('float32') + batch_norm1d = paddle.nn.BatchNorm1d(1, data_format='NCDHW') + batch_norm1d(fluid.dygraph.to_variable(x_data_4)) + + def error2d_dataformat(): + x_data_3 = np.random.random(size=(2, 1, 3)).astype('float32') + batch_norm2d = paddle.nn.BatchNorm2d(1, data_format='NCDHW') + batch_norm2d(fluid.dygraph.to_variable(x_data_3)) + + def error3d_dataformat(): + x_data_4 = np.random.random(size=(2, 1, 3, 3)).astype('float32') + batch_norm3d = paddle.nn.BatchNorm3d(1, data_format='NCL') + batch_norm3d(fluid.dygraph.to_variable(x_data_4)) + def error1d(): x_data_4 = np.random.random(size=(2, 1, 3, 3)).astype('float32') batch_norm1d = paddle.nn.BatchNorm1d(1) @@ -62,6 +77,9 @@ class TestBatchNorm(unittest.TestCase): self.assertRaises(ValueError, error1d) self.assertRaises(ValueError, error2d) self.assertRaises(ValueError, error3d) + self.assertRaises(ValueError, error1d_dataformat) + self.assertRaises(ValueError, error2d_dataformat) + self.assertRaises(ValueError, error3d_dataformat) def test_dygraph(self): places = [fluid.CPUPlace()] diff --git a/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py index 654e8d6f12..a46b9b0ca7 100644 --- a/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py +++ b/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py @@ -35,24 +35,33 @@ class TestDygraphGroupNormv2(unittest.TestCase): def compute_v1(x): with fluid.dygraph.guard(p): - gn = fluid.dygraph.GroupNorm(channels=2, groups=2) + gn = fluid.dygraph.GroupNorm(channels=6, groups=2) y = gn(fluid.dygraph.to_variable(x)) return y.numpy() def compute_v2(x): with fluid.dygraph.guard(p): - gn = paddle.nn.GroupNorm(num_channels=2, num_groups=2) + gn = paddle.nn.GroupNorm(num_channels=6, num_groups=2) y = gn(fluid.dygraph.to_variable(x)) return y.numpy() + def test_weight_bias_false(): + with fluid.dygraph.guard(p): + gn = paddle.nn.GroupNorm( + num_channels=6, + num_groups=2, + weight_attr=False, + bias_attr=False) + x = np.random.randn(*shape).astype("float32") y1 = compute_v1(x) y2 = compute_v2(x) self.assertTrue(np.allclose(y1, y2)) + test_weight_bias_false() def test_static(self): places = [fluid.CPUPlace()] - if core.is_compiled_with_cuda() and core.op_support_gpu("layer_norm"): + if core.is_compiled_with_cuda() and core.op_support_gpu("group_norm"): places.append(fluid.CUDAPlace(0)) for p in places: exe = fluid.Executor(p) @@ -60,7 +69,7 @@ class TestDygraphGroupNormv2(unittest.TestCase): def compute_v1(x_np): with program_guard(Program(), Program()): - gn = fluid.dygraph.GroupNorm(channels=2, groups=2) + gn = fluid.dygraph.GroupNorm(channels=6, groups=2) x = fluid.data(name='x', shape=x_np.shape, dtype=x_np.dtype) y = gn(x) exe.run(fluid.default_startup_program()) @@ -69,7 +78,7 @@ class TestDygraphGroupNormv2(unittest.TestCase): def compute_v2(x_np): with program_guard(Program(), Program()): - gn = paddle.nn.GroupNorm(num_channels=2, num_groups=2) + gn = paddle.nn.GroupNorm(num_channels=6, num_groups=2) x = fluid.data(name='x', shape=x_np.shape, dtype=x_np.dtype) y = gn(x) exe.run(fluid.default_startup_program()) diff --git a/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py index b02ba1a584..c45c144e3a 100644 --- a/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py +++ b/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py @@ -48,7 +48,13 @@ class TestInstanceNorm(unittest.TestCase): instance_norm3d = paddle.nn.BatchNorm3d(1) instance_norm3d(fluid.dygraph.to_variable(x_data_4)) + def weight_bias_false(): + x_data_4 = np.random.random(size=(2, 1, 3, 3)).astype('float32') + instance_norm3d = paddle.nn.BatchNorm3d( + 1, weight_attr=False, bias_attr=False) + with fluid.dygraph.guard(p): + weight_bias_false() self.assertRaises(ValueError, error1d) self.assertRaises(ValueError, error2d) self.assertRaises(ValueError, error3d) diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py index e9c1a21ecf..f63fc33525 100644 --- a/python/paddle/nn/functional/norm.py +++ b/python/paddle/nn/functional/norm.py @@ -165,7 +165,7 @@ def batch_norm(x, w = paddle.to_tensor(weight_data) b = paddle.to_tensor(bias_data) batch_norm_out = paddle.nn.functional.batch_norm(x, rm, rv, w, b) - print batch_norm_out + print(batch_norm_out.numpy()) """ assert len(x.shape) >= 2, "input dim must be larger than 1" @@ -176,6 +176,15 @@ def batch_norm(x, mean_out = running_mean variance_out = running_var + true_data_format = ['NC', 'NCL', 'NCHW', 'NCWH', 'NCDHW'] + if data_format not in true_data_format: + raise ValueError( + "data_format must be one of 'NC', 'NCL', 'NCHW', 'NCWH', 'NCDHW', but receive {}". + format(data_format)) + + if data_format != 'NCWH': + data_format = 'NCHW' + if in_dygraph_mode(): # for dygraph need tuple attrs = ("momentum", momentum, "epsilon", epsilon, "data_layout", @@ -270,7 +279,7 @@ def layer_norm(x, layer_norm = paddle.nn.functional.layer_norm(x, x.shape[1:]) layer_norm_out = layer_norm(x) - print(layer_norm_out.numpy) + print(layer_norm_out.numpy()) """ input_shape = list(x.shape) input_ndim = len(input_shape) @@ -302,10 +311,10 @@ def layer_norm(x, # create output helper = LayerHelper('layer_norm', **locals()) mean_out = helper.create_variable_for_type_inference( - dtype=x.type, stop_gradient=True) + dtype=x.dtype, stop_gradient=True) variance_out = helper.create_variable_for_type_inference( - dtype=x.type, stop_gradient=True) - layer_norm_out = helper.create_variable_for_type_inference(x.type) + dtype=x.dtype, stop_gradient=True) + layer_norm_out = helper.create_variable_for_type_inference(x.dtype) helper.append_op( type="layer_norm", @@ -362,7 +371,7 @@ def instance_norm(x, x = paddle.to_tensor(x_data) instance_norm_out = paddle.nn.functional.instancenorm(x) - print(instance_norm_out.numpy) + print(instance_norm_out.numpy()) """ diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py index 4d25418579..8bdb09c769 100644 --- a/python/paddle/nn/layer/norm.py +++ b/python/paddle/nn/layer/norm.py @@ -78,7 +78,7 @@ class _InstanceNormBase(layers.Layer): super(_InstanceNormBase, self).__init__() if weight_attr == False or bias_attr == False: - assert weight_attr == param_attr, "weight_attr and bias_attr must be set to Fasle at the same time in InstanceNorm" + assert weight_attr == bias_attr, "weight_attr and bias_attr must be set to Fasle at the same time in InstanceNorm" self._epsilon = epsilon self._weight_attr = weight_attr self._bias_attr = bias_attr @@ -176,7 +176,7 @@ class InstanceNorm1d(_InstanceNormBase): instance_norm = paddle.nn.InstanceNorm1d(2) instance_norm_out = instance_norm(x) - print(instance_norm_out.numpy) + print(instance_norm_out.numpy()) """ @@ -253,7 +253,7 @@ class InstanceNorm2d(_InstanceNormBase): instance_norm = paddle.nn.InstanceNorm2d(2) instance_norm_out = instance_norm(x) - print(instance_norm_out.numpy) + print(instance_norm_out.numpy()) """ def _check_input_dim(self, input): @@ -329,7 +329,7 @@ class InstanceNorm3d(_InstanceNormBase): instance_norm = paddle.nn.InstanceNorm3d(2) instance_norm_out = instance_norm(x) - print(instance_norm_out.numpy) + print(instance_norm_out.numpy()) """ def _check_input_dim(self, input): @@ -346,8 +346,8 @@ class GroupNorm(layers.Layer): Refer to `Group Normalization `_ . Parameters: - num_channels(int): The number of channels of input. num_groups(int): The number of groups that divided from channels. + num_channels(int): The number of channels of input. epsilon(float, optional): The small value added to the variance to prevent division by zero. Default: 1e-05. weight_attr(ParamAttr|bool, optional): The parameter attribute for the learnable @@ -375,19 +375,19 @@ class GroupNorm(layers.Layer): np.random.seed(123) x_data = np.random.random(size=(2, 6, 2, 2)).astype('float32') x = paddle.to_tensor(x_data) - group_norm = paddle.nn.GroupNorm(num_channels=3, num_groups=6) + group_norm = paddle.nn.GroupNorm(num_channels=6, num_groups=6) group_norm_out = group_norm(x) - print(group_norm_out.numpy) + print(group_norm_out.numpy()) """ def __init__(self, - num_channels, num_groups, + num_channels, epsilon=1e-05, weight_attr=None, bias_attr=None, - data_layout='NCHW', + data_format='NCHW', name=None): super(GroupNorm, self).__init__() self._weight_attr = weight_attr @@ -395,18 +395,33 @@ class GroupNorm(layers.Layer): self._epsilon = epsilon self._num_channels = num_channels self._num_groups = num_groups - if data_layout != 'NCHW': + if data_format != 'NCHW': raise ValueError("unsupported data layout:" + data_layout) param_shape = [self._num_channels] - self.weight = self.create_parameter( - attr=self._weight_attr or False, - shape=param_shape, - default_initializer=Constant(1.0)) + if weight_attr == False: + self.weight = self.create_parameter( + attr=None, shape=param_shape, default_initializer=Constant(1.0)) + self.weight.stop_gradient = True + else: + self.weight = self.create_parameter( + attr=self._weight_attr, + shape=param_shape, + default_initializer=Constant(1.0)) + self.weight.stop_gradient = self._weight_attr != None and self._weight_attr.learning_rate == 0. - self.bias = self.create_parameter( - attr=self._weight_attr or False, shape=param_shape, is_bias=True) + if bias_attr == False: + self.bias = self.create_parameter( + attr=None, + shape=param_shape, + default_initializer=Constant(0.0), + is_bias=True) + self.bias.stop_gradient = True + else: + self.bias = self.create_parameter( + attr=self._bias_attr, shape=param_shape, is_bias=True) + self.bias.stop_gradient = self._bias_attr != None and self._bias_attr.learning_rate == 0. def forward(self, input): inputs = {'X': input} @@ -500,7 +515,7 @@ class LayerNorm(layers.Layer): layer_norm = paddle.nn.LayerNorm(x_data.shape[1:]) layer_norm_out = layer_norm(x) - print(layer_norm_out.numpy) + print(layer_norm_out.numpy()) """ def __init__(self, @@ -603,8 +618,7 @@ class _BatchNormBase(layers.Layer): initializer=Constant(0.0), trainable=False, do_model_average=True), - shape=param_shape, - dtype=self._dtype) + shape=param_shape) self._mean.stop_gradient = True self._variance = self.create_parameter( @@ -613,8 +627,7 @@ class _BatchNormBase(layers.Layer): initializer=Constant(1.0), trainable=False, do_model_average=True), - shape=param_shape, - dtype=self._dtype) + shape=param_shape) self._variance.stop_gradient = True self._data_format = data_format @@ -628,8 +641,13 @@ class _BatchNormBase(layers.Layer): def _check_input_dim(self, input): raise NotImplementedError("BatchNorm Base error") + def _check_data_format(self, input): + raise NotImplementedError("BatchNorm Base data format error") + def forward(self, input): + self._check_data_format(self._data_format) + self._check_input_dim(input) if not self.training and not self._track_running_stats: @@ -730,9 +748,15 @@ class BatchNorm1d(_BatchNormBase): batch_norm = paddle.nn.BatchNorm1d(1) batch_norm_out = batch_norm(x) - print(batch_norm_out.numpy) + print(batch_norm_out.numpy()) """ + def _check_data_format(self, input): + if input == 'NCHW' or input == 'NC' or input == 'NCL': + self._data_format = 'NCHW' + else: + raise ValueError('expected NC , NCL or None for data_format input') + def _check_input_dim(self, input): if len(input.shape) != 2 and len(input.shape) != 3: raise ValueError('expected 2D or 3D input (got {}D input)'.format( @@ -816,9 +840,15 @@ class BatchNorm2d(_BatchNormBase): batch_norm = paddle.nn.BatchNorm2d(1) batch_norm_out = batch_norm(x) - print(batch_norm_out.numpy) + print(batch_norm_out.numpy()) """ + def _check_data_format(self, input): + if input == 'NCHW' or input == 'NCWH': + self._data_format = input + else: + raise ValueError('expected NCHW or NCWH for data_format input') + def _check_input_dim(self, input): if len(input.shape) != 4: raise ValueError('expected 4D input (got {}D input)'.format( @@ -902,9 +932,15 @@ class BatchNorm3d(_BatchNormBase): batch_norm = paddle.nn.BatchNorm3d(1) batch_norm_out = batch_norm(x) - print(batch_norm_out.numpy) + print(batch_norm_out.numpy()) """ + def _check_data_format(self, input): + if input == 'NCHW' or input == 'NCDHW': + self._data_format = 'NCHW' + else: + raise ValueError('expected NCDHW or None for data_format input') + def _check_input_dim(self, input): if len(input.shape) != 5: raise ValueError('expected 5D input (got {}D input)'.format( -- GitLab