From 4da1c4f15dc66e4437942b56caaed5dfe95740ed Mon Sep 17 00:00:00 2001 From: SunGaofeng Date: Mon, 5 Aug 2019 20:54:16 +0800 Subject: [PATCH] fix g_param shape mismatch in WeightNormParamAttr (#18940) * fix g_param shape mismatch in WeightNormParamAttr * add comment to show why insert reshape in startup_program test=develop --- paddle/fluid/API.spec | 4 ++-- python/paddle/fluid/layer_helper_base.py | 20 ++++++++++++++++---- python/paddle/fluid/layers/nn.py | 4 ++-- python/paddle/fluid/param_attr.py | 4 ++-- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 266e26f5ab..6a2e51e0f6 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -167,7 +167,7 @@ paddle.fluid.layers.unsqueeze (ArgSpec(args=['input', 'axes', 'name'], varargs=N paddle.fluid.layers.lod_reset (ArgSpec(args=['x', 'y', 'target_lod'], varargs=None, keywords=None, defaults=(None, None)), ('document', '74498d37dd622ac472cb36887fce09ea')) paddle.fluid.layers.lod_append (ArgSpec(args=['x', 'level'], varargs=None, keywords=None, defaults=None), ('document', '37663c7c179e920838a250ea0e28d909')) paddle.fluid.layers.lrn (ArgSpec(args=['input', 'n', 'k', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(5, 1.0, 0.0001, 0.75, None)), ('document', '73d297256da8954617996958d26ee93d')) -paddle.fluid.layers.pad (ArgSpec(args=['x', 'paddings', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None)), ('document', '2f189f8ef61f1c23779e1593b78755c0')) +paddle.fluid.layers.pad (ArgSpec(args=['x', 'paddings', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None)), ('document', '36b6e58678956585e5b30aa3de123a60')) paddle.fluid.layers.pad_constant_like (ArgSpec(args=['x', 'y', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None)), ('document', '95aa1972983f30fe9b5a3713e523e20f')) paddle.fluid.layers.label_smooth (ArgSpec(args=['label', 'prior_dist', 'epsilon', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 0.1, 'float32', None)), ('document', '214f1dfbe95a628600bbe99e836319cf')) paddle.fluid.layers.roi_pool (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0)), ('document', 'ceedc8c22752c623d6e1ea2e8df0f43f')) @@ -994,7 +994,7 @@ paddle.fluid.CUDAPinnedPlace ('paddle.fluid.core_avx.CUDAPinnedPlace', ('documen paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core_avx.CUDAPinnedPlace) -> None paddle.fluid.ParamAttr ('paddle.fluid.param_attr.ParamAttr', ('document', 'fa47fa251f727c4a4f638d61e3c7c141')) paddle.fluid.ParamAttr.__init__ (ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.WeightNormParamAttr ('paddle.fluid.param_attr.WeightNormParamAttr', ('document', '48ab4f49c7eeeade5958b731b6a96aa0')) +paddle.fluid.WeightNormParamAttr ('paddle.fluid.param_attr.WeightNormParamAttr', ('document', 'b5ae1698ea72d5a9428000b916a67379')) paddle.fluid.WeightNormParamAttr.__init__ (ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.DataFeeder ('paddle.fluid.data_feeder.DataFeeder', ('document', 'a39802654f20692ad49c340cef7c6556')) paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) diff --git a/python/paddle/fluid/layer_helper_base.py b/python/paddle/fluid/layer_helper_base.py index cbfd4f45f9..5e4eac6b5c 100644 --- a/python/paddle/fluid/layer_helper_base.py +++ b/python/paddle/fluid/layer_helper_base.py @@ -177,19 +177,24 @@ class LayerHelperBase(object): elif dim == 0: out_shape = [x.shape[0]] + [1] * (len(x.shape) - 1) reshape = __reshape_op(x, shape=[x.shape[0], -1], block=block) - norm = __norm_op(reshape, dim=1, block=block) + norm = __norm_op(reshape, dim=[1], block=block) __reshape_op(norm, out=out, shape=out_shape, block=block) elif dim == len(x.shape) - 1: out_shape = [1] * (len(x.shape) - 1) + [x.shape[-1]] reshape = __reshape_op(x, shape=[-1, x.shape[-1]], block=block) - norm = __norm_op(reshape, dim=0, block=block) + norm = __norm_op(reshape, dim=[0], block=block) __reshape_op(norm, out=out, shape=out_shape, block=block) else: perm = list(range(len(x.shape))) perm[0], perm[dim] = dim, 0 transpose = __transpose_op(x, perm, block=block) - norm = __norm_op(transpose, dim=0, block=block) - __transpose_op(norm, perm, out=out, block=block) + out_shape = [transpose.shape[0]] + [1] * (len(transpose.shape) - + 1) + reshape = __reshape_op( + transpose, shape=[transpose.shape[0], -1], block=block) + norm = __norm_op(reshape, dim=[1], block=block) + reshape2 = __reshape_op(norm, shape=out_shape, block=block) + __transpose_op(reshape2, perm, out=out, block=block) return out def __weight_normalize(g, v, dim): @@ -240,6 +245,13 @@ class LayerHelperBase(object): dim=attr.dim, block=self.startup_program.global_block()) + # keep g_param shape to be consistent with that in main_program + __reshape_op( + g_param, + g_param_shape, + out=g_param, + block=self.startup_program.global_block()) + # Add weight normalization to main_program g_param = self.main_program.global_block().create_parameter( dtype=dtype, shape=g_param_shape, **g_param_attr._to_kwargs()) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 7456fccdb5..e16a250bde 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -7300,9 +7300,9 @@ def pad(x, paddings, pad_value=0., name=None): padded width is specified by :attr:`paddings`. Specifically, the number of values padded before the contents of :attr:`x` - in dimension :attr:`i` is indicated by :attr:`paddings[i]`, and the number + in dimension :attr:`i` is indicated by :attr:`paddings[2i]`, and the number of values padded after the contents of :attr:`x` in dimension :attr:`i` is - indicated by :attr:`paddings[i+1]`. + indicated by :attr:`paddings[2i+1]`. See below for an example. diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py index 1778f4b55e..2d70495f9c 100644 --- a/python/paddle/fluid/param_attr.py +++ b/python/paddle/fluid/param_attr.py @@ -180,14 +180,14 @@ class ParamAttr(object): class WeightNormParamAttr(ParamAttr): """ Used for weight Norm. Weight Norm is a reparameterization of the weight vectors - in a neural network that decouples the length of those weight vectors from + in a neural network that decouples the magnitude of those weight vectors from their direction. Weight Norm has been implemented as discussed in this paper: `Weight Normalization: A Simple Reparameterization to Accelerate Training of Deep Neural Networks `_. Args: - dim(list): The parameter's name. Default None. + dim(int): Dimension over which to compute the norm. Default None. name(str): The parameter's name. Default None. initializer(Initializer): The method to initial this parameter. Default None. learning_rate(float): The parameter's learning rate. The learning rate when -- GitLab