From ee21f2f6019108ab7ab011455a082a0d944fccc2 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Tue, 23 Jan 2018 14:43:52 +0800 Subject: [PATCH] Change default value of drop_rate in img_conv_group to 0 --- python/paddle/v2/fluid/nets.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/python/paddle/v2/fluid/nets.py b/python/paddle/v2/fluid/nets.py index ee6f70b899..6cc372bcb6 100644 --- a/python/paddle/v2/fluid/nets.py +++ b/python/paddle/v2/fluid/nets.py @@ -52,7 +52,7 @@ def img_conv_group(input, conv_act=None, param_attr=None, conv_with_batchnorm=False, - conv_batchnorm_drop_rate=None, + conv_batchnorm_drop_rate=0, pool_stride=1, pool_type=None): """ @@ -120,21 +120,21 @@ def sequence_conv_pool(input, def glu(input, dim=-1): """ - The gated linear unit composed by split, sigmoid activation and elementwise - multiplication. Specifically, Split the input into two equal sized parts - :math:`a` and :math:`b` along the given dimension and then compute as + The gated linear unit composed by split, sigmoid activation and elementwise + multiplication. Specifically, Split the input into two equal sized parts + :math:`a` and :math:`b` along the given dimension and then compute as following: .. math:: {GLU}(a, b)= a \otimes \sigma(b) - Refer to `Language Modeling with Gated Convolutional Networks + Refer to `Language Modeling with Gated Convolutional Networks `_. - + Args: input (Variable): The input variable which is a Tensor or LoDTensor. - dim (int): The dimension along which to split. If :math:`dim < 0`, the + dim (int): The dimension along which to split. If :math:`dim < 0`, the dimension to split along is :math:`rank(input) + dim`. Returns: @@ -157,24 +157,24 @@ def dot_product_attention(querys, keys, values): """ The dot-product attention. - Attention mechanism can be seen as mapping a query and a set of key-value - pairs to an output. The output is computed as a weighted sum of the values, - where the weight assigned to each value is computed by a compatibility + Attention mechanism can be seen as mapping a query and a set of key-value + pairs to an output. The output is computed as a weighted sum of the values, + where the weight assigned to each value is computed by a compatibility function (dot-product here) of the query with the corresponding key. - - The dot-product attention can be implemented through (batch) matrix + + The dot-product attention can be implemented through (batch) matrix multipication as follows: .. math:: Attention(Q, K, V)= softmax(QK^\mathrm{T})V - Refer to `Attention Is All You Need + Refer to `Attention Is All You Need `_. - Note that batch data containing sequences with different lengths is not + Note that batch data containing sequences with different lengths is not supported by this because of the (batch) matrix multipication. - + Args: query (Variable): The input variable which is a Tensor or LoDTensor. key (Variable): The input variable which is a Tensor or LoDTensor. -- GitLab