未验证 提交 4da1c4f1 编写于 作者: S SunGaofeng 提交者: GitHub

fix g_param shape mismatch in WeightNormParamAttr (#18940)

* fix g_param shape mismatch in WeightNormParamAttr

* add comment to show why insert reshape in startup_program
test=develop
上级 af63b118
......@@ -167,7 +167,7 @@ paddle.fluid.layers.unsqueeze (ArgSpec(args=['input', 'axes', 'name'], varargs=N
paddle.fluid.layers.lod_reset (ArgSpec(args=['x', 'y', 'target_lod'], varargs=None, keywords=None, defaults=(None, None)), ('document', '74498d37dd622ac472cb36887fce09ea'))
paddle.fluid.layers.lod_append (ArgSpec(args=['x', 'level'], varargs=None, keywords=None, defaults=None), ('document', '37663c7c179e920838a250ea0e28d909'))
paddle.fluid.layers.lrn (ArgSpec(args=['input', 'n', 'k', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(5, 1.0, 0.0001, 0.75, None)), ('document', '73d297256da8954617996958d26ee93d'))
paddle.fluid.layers.pad (ArgSpec(args=['x', 'paddings', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None)), ('document', '2f189f8ef61f1c23779e1593b78755c0'))
paddle.fluid.layers.pad (ArgSpec(args=['x', 'paddings', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None)), ('document', '36b6e58678956585e5b30aa3de123a60'))
paddle.fluid.layers.pad_constant_like (ArgSpec(args=['x', 'y', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None)), ('document', '95aa1972983f30fe9b5a3713e523e20f'))
paddle.fluid.layers.label_smooth (ArgSpec(args=['label', 'prior_dist', 'epsilon', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 0.1, 'float32', None)), ('document', '214f1dfbe95a628600bbe99e836319cf'))
paddle.fluid.layers.roi_pool (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0)), ('document', 'ceedc8c22752c623d6e1ea2e8df0f43f'))
......@@ -994,7 +994,7 @@ paddle.fluid.CUDAPinnedPlace ('paddle.fluid.core_avx.CUDAPinnedPlace', ('documen
paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core_avx.CUDAPinnedPlace) -> None
paddle.fluid.ParamAttr ('paddle.fluid.param_attr.ParamAttr', ('document', 'fa47fa251f727c4a4f638d61e3c7c141'))
paddle.fluid.ParamAttr.__init__ (ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.WeightNormParamAttr ('paddle.fluid.param_attr.WeightNormParamAttr', ('document', '48ab4f49c7eeeade5958b731b6a96aa0'))
paddle.fluid.WeightNormParamAttr ('paddle.fluid.param_attr.WeightNormParamAttr', ('document', 'b5ae1698ea72d5a9428000b916a67379'))
paddle.fluid.WeightNormParamAttr.__init__ (ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.DataFeeder ('paddle.fluid.data_feeder.DataFeeder', ('document', 'a39802654f20692ad49c340cef7c6556'))
paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
......
......@@ -177,19 +177,24 @@ class LayerHelperBase(object):
elif dim == 0:
out_shape = [x.shape[0]] + [1] * (len(x.shape) - 1)
reshape = __reshape_op(x, shape=[x.shape[0], -1], block=block)
norm = __norm_op(reshape, dim=1, block=block)
norm = __norm_op(reshape, dim=[1], block=block)
__reshape_op(norm, out=out, shape=out_shape, block=block)
elif dim == len(x.shape) - 1:
out_shape = [1] * (len(x.shape) - 1) + [x.shape[-1]]
reshape = __reshape_op(x, shape=[-1, x.shape[-1]], block=block)
norm = __norm_op(reshape, dim=0, block=block)
norm = __norm_op(reshape, dim=[0], block=block)
__reshape_op(norm, out=out, shape=out_shape, block=block)
else:
perm = list(range(len(x.shape)))
perm[0], perm[dim] = dim, 0
transpose = __transpose_op(x, perm, block=block)
norm = __norm_op(transpose, dim=0, block=block)
__transpose_op(norm, perm, out=out, block=block)
out_shape = [transpose.shape[0]] + [1] * (len(transpose.shape) -
1)
reshape = __reshape_op(
transpose, shape=[transpose.shape[0], -1], block=block)
norm = __norm_op(reshape, dim=[1], block=block)
reshape2 = __reshape_op(norm, shape=out_shape, block=block)
__transpose_op(reshape2, perm, out=out, block=block)
return out
def __weight_normalize(g, v, dim):
......@@ -240,6 +245,13 @@ class LayerHelperBase(object):
dim=attr.dim,
block=self.startup_program.global_block())
# keep g_param shape to be consistent with that in main_program
__reshape_op(
g_param,
g_param_shape,
out=g_param,
block=self.startup_program.global_block())
# Add weight normalization to main_program
g_param = self.main_program.global_block().create_parameter(
dtype=dtype, shape=g_param_shape, **g_param_attr._to_kwargs())
......
......@@ -7300,9 +7300,9 @@ def pad(x, paddings, pad_value=0., name=None):
padded width is specified by :attr:`paddings`.
Specifically, the number of values padded before the contents of :attr:`x`
in dimension :attr:`i` is indicated by :attr:`paddings[i]`, and the number
in dimension :attr:`i` is indicated by :attr:`paddings[2i]`, and the number
of values padded after the contents of :attr:`x` in dimension :attr:`i` is
indicated by :attr:`paddings[i+1]`.
indicated by :attr:`paddings[2i+1]`.
See below for an example.
......
......@@ -180,14 +180,14 @@ class ParamAttr(object):
class WeightNormParamAttr(ParamAttr):
"""
Used for weight Norm. Weight Norm is a reparameterization of the weight vectors
in a neural network that decouples the length of those weight vectors from
in a neural network that decouples the magnitude of those weight vectors from
their direction. Weight Norm has been implemented as discussed in this
paper: `Weight Normalization: A Simple Reparameterization to Accelerate
Training of Deep Neural Networks
<https://arxiv.org/pdf/1602.07868.pdf>`_.
Args:
dim(list): The parameter's name. Default None.
dim(int): Dimension over which to compute the norm. Default None.
name(str): The parameter's name. Default None.
initializer(Initializer): The method to initial this parameter. Default None.
learning_rate(float): The parameter's learning rate. The learning rate when
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册