未验证 提交 346689c6 编写于 作者: L LielinJiang 提交者: GitHub

Register conv_transpose Op version for compatible Op upgrades (#26745)

* fix bug

* add version check

* fix docs, test=document_fix

* fix formula, test=document_fix
上级 8bcb1f29
......@@ -17,6 +17,7 @@ limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
#ifdef PADDLE_WITH_MKLDNN
......@@ -567,3 +568,14 @@ REGISTER_OP_CPU_KERNEL(
ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext,
double>);
REGISTER_OP_VERSION(conv_transpose)
.AddCheckpoint(
R"ROC(
Upgrade convtranspose add a new attribute [output_padding].
)ROC",
paddle::framework::compatible::OpVersionDesc().NewAttr(
"output_padding",
"In order to add additional size to one side of each dimension "
"in the output",
{}));
......@@ -807,10 +807,10 @@ def conv_transpose2d(x,
stride=1,
padding=0,
output_padding=0,
groups=1,
dilation=1,
data_format='NCHW',
groups=1,
output_size=None,
data_format='NCHW',
name=None):
"""
......@@ -883,28 +883,27 @@ def conv_transpose2d(x,
stride(int|list|tuple, optional): The stride size. It means the stride in transposed convolution.
If stride is a tuple, it must contain two integers, (stride_height, stride_width).
Otherwise, stride_height = stride_width = stride. Default: stride = 1.
padding(int|list|str|tuple, optional): The padding size. The padding argument effectively adds
`dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a
string, either 'VALID' or 'SAME' supported, which is the padding algorithm.
If `padding` is a tuple or list, it could be in three forms:
`[pad_height, pad_width]` or
`[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and
when `data_format` is `'NCHW'`,
`padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `'NHWC'`, `padding` can be in the form
padding(str|int|list|tuple, optional): The padding size. It means the number of zero-paddings
on both sides for each dimension. If `padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If padding size is a tuple or list,
it could be in three forms: `[pad_height, pad_width]` or
`[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
and when `data_format` is `"NCHW"`, `pool_padding` can be in the form
`[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `"NHWC"`, `pool_padding` can be in the form
`[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
Default: padding = 0.
output_padding(int|list|tuple, optional): Additional size added to one side
of each dimension in the output shape. Default: 0.
dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points.
If dilation is a tuple, it must contain two integers, (dilation_height, dilation_width).
Otherwise, dilation_height = dilation_width = dilation. Default: dilation = 1.
groups(int, optional): The groups number of the Conv2d transpose layer. Inspired by
grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
when group=2, the first half of the filters is only connected to the
first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels.
Default: groups = 1.
dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points.
If dilation is a tuple, it must contain two integers, (dilation_height, dilation_width).
Otherwise, dilation_height = dilation_width = dilation. Default: dilation = 1.
output_size(int|tuple|list, optional): The output image size. If output size is a
tuple, it must contain two integers, (image_height, image_width). None if use
filter_size, padding, and stride to calculate output_size.
......@@ -950,7 +949,7 @@ def conv_transpose2d(x,
paddle.disable_static()
x_var = paddle.to_tensor(x)
w_var = paddle.to_tensor(w)
y_var = F.conv2d_transpose(x_var, w_var)
y_var = F.conv_transpose2d(x_var, w_var)
y_np = y_var.numpy()
print(y_np.shape)
......@@ -1160,19 +1159,17 @@ def conv3d(x,
Examples:
.. code-block:: python
from paddle import fluid
import paddle.nn.functional as F
import paddle.fluid.dygraph as dg
import numpy as np
import paddle
import paddle.nn.functional as F
x = np.random.randn(2, 3, 8, 8, 8).astype(np.float32)
w = np.random.randn(6, 3, 3, 3, 3).astype(np.float32)
place = fluid.CPUPlace()
with dg.guard(place):
x_var = dg.to_variable(x)
w_var = dg.to_variable(w)
y_var = F.conv3d(x_var, w_var, act="relu")
paddle.disable_static()
x_var = paddle.to_tensor(x)
w_var = paddle.to_tensor(w)
y_var = F.conv3d(x_var, w_var)
y_np = y_var.numpy()
print(y_np.shape)
......@@ -1260,8 +1257,8 @@ def conv_transpose3d(x,
output_padding=0,
groups=1,
dilation=1,
data_format='NCDHW',
output_size=None,
data_format='NCDHW',
name=None):
"""
The convolution3d transpose layer calculates the output based on the input,
......@@ -1338,37 +1335,37 @@ def conv_transpose3d(x,
If stride is a tuple, it must contain three integers, (stride_depth, stride_height,
stride_width). Otherwise, stride_depth = stride_height = stride_width = stride.
Default: stride = 1.
padding(int|list|str|tuple, optional): The padding size. The padding argument effectively
adds `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a string,
either 'VALID' or 'SAME' supported, which is the padding algorithm. If `padding`
is a tuple or list, it could be in three forms: `[pad_depth, pad_height, pad_width]` or
padding (string|int|list|tuple, optional): The padding size. It means the number of zero-paddings
on both sides for each dimension. If `padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If padding size is a tuple or list,
it could be in three forms: `[pad_depth, pad_height, pad_width]` or
`[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
and when `data_format` is `'NCDHW'`, `padding` can be in the form
and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
`[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `'NDHWC'`, `padding` can be in the form
when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
`[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
Default: padding = 0.
output_padding(int|list|tuple, optional): Additional size added to one side
of each dimension in the output shape. Default: 0.
dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points.
If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height,
dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation.
Default: dilation = 1.
groups(int, optional): The groups number of the Conv3d transpose layer. Inspired by
grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
when group=2, the first half of the filters is only connected to the
first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels.
Default: groups=1
data_format (str, optional): Specify the data format of the input, and the data format of the output
will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points.
If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height,
dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation.
Default: dilation = 1.
output_size(int|list|tuple, optional): The output image size. If output size is a
tuple, it must contain three integers, (image_depth, image_height, image_width). This
parameter only works when filter_size is None. If output_size and filter_size are
specified at the same time, They should follow the formula above. Default: None.
Output_size and filter_size should not be None at the same time.
data_format (str, optional): Specify the data format of the input, and the data format of the output
will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
......
......@@ -784,30 +784,30 @@ def kl_div(input, label, reduction='mean', name=None):
import numpy as np
import paddle.nn.functional as F
paddle.enable_imperative()
paddle.disable_static()
shape = (5, 20)
input = np.random.uniform(-10, 10, shape).astype('float32')
target = np.random.uniform(-10, 10, shape).astype('float32')
# 'batchmean' reduction, loss shape will be [N]
pred_loss = F.kl_div(paddle.to_variable(input),
paddle.to_variable(target), reduction='batchmean')
pred_loss = F.kl_div(paddle.to_tensor(input),
paddle.to_tensor(target), reduction='batchmean')
# shape=[5]
# 'mean' reduction, loss shape will be [1]
pred_loss = F.kl_div(paddle.to_variable(input),
paddle.to_variable(target), reduction='mean')
pred_loss = F.kl_div(paddle.to_tensor(input),
paddle.to_tensor(target), reduction='mean')
# shape=[1]
# 'sum' reduction, loss shape will be [1]
pred_loss = F.kl_div(paddle.to_variable(input),
paddle.to_variable(target), reduction='sum')
pred_loss = F.kl_div(paddle.to_tensor(input),
paddle.to_tensor(target), reduction='sum')
# shape=[1]
# 'none' reduction, loss shape is same with input shape
pred_loss = F.kl_div(paddle.to_variable(input),
paddle.to_variable(target), reduction='none')
pred_loss = F.kl_div(paddle.to_tensor(input),
paddle.to_tensor(target), reduction='none')
# shape=[5, 20]
"""
......
......@@ -99,7 +99,8 @@ class _ConvNd(layers.Layer):
raise ValueError("in_channels must be divisible by groups.")
if padding_mode in {'reflect', 'replicate', 'circular'}:
_paired_padding = utils.convert_to_list(padding, 2, 'padding')
_paired_padding = utils.convert_to_list(padding, dims,
'padding')
self._reversed_padding_repeated_twice = _reverse_repeat_list(
_paired_padding, 2)
......@@ -318,62 +319,80 @@ class Conv2d(_ConvNd):
output of the convolution, and the corresponding activation function is
applied to the final result.
For each input :math:`X`, the equation is:
.. math::
Out = \\sigma (W \\ast X + b)
Out = \sigma (W \\ast X + b)
Where:
* :math:`X`: Input value, a ``Tensor`` with NCHW format.
* :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] .
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Parameters:
in_channels(int): The number of channels in the input image.
out_channels(int): The number of channels produced by convolution.
kernel_size (int|list|tuple): The size of convolution kernel.
stride (int|list|tuple, optional): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: 1.
in_channels(int): The number of input channels in the input image.
out_channels(int): The number of output channels produced by the convolution.
kernel_size(int|list|tuple, optional): The size of the convolving kernel.
stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must
contain three integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. The default value is 1.
padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
1. a string in ['valid', 'same'].
2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding`on both sides
2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding`
3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...].
4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions.
5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0).
The default value is 0.
padding_mode (str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'`` .
dilation (int|list|tuple, optional): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: 1.
groups (int, optional): The groups number of the Conv2d Layer. According to grouped
dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must
contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
dilation_D = dilation_H = dilation_W = dilation. The default value is 1.
groups(int, optional): The groups number of the Conv3d Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only
connected to the second half of the input channels. Default: 1.
weight_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter)
connected to the second half of the input channels. The default value is 1.
padding_mode(str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'``.
weight_attr(ParamAttr, optional): The parameter attribute for learnable parameters/weights
of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with :math:`Normal(0.0, std)`,
and the :math:`std` is :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. Default: None.
bias_attr (ParamAttr|bool, optional): The attribute for the bias of conv2d.
will create ParamAttr as param_attr. If it is set to None, the parameter
is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is
:math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. The default value is None.
bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of conv2d.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
data_format (str, optional): Data format that specifies the layout of input.
is not set, the bias is initialized zero. The default value is None.
data_format(str, optional): Data format that specifies the layout of input.
It can be "NCHW" or "NHWC". Default: "NCHW".
Attribute:
**weight** (Parameter): the learnable weights of filter of this layer.
**bias** (Parameter or None): the learnable bias of this layer.
Shape:
- x: :math:`(N, C_{in}, H_{in}, W_{in})`
- output: :math:`(N, C_{out}, H_{out}, W_{out})`
Where
.. math::
H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (kernel_size[0] - 1) + 1))}{strides[0]} + 1 \\\\
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (kernel_size[1] - 1) + 1))}{strides[1]} + 1
H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (kernel\_size[0] - 1) + 1))}{strides[0]} + 1
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (kernel\_size[1] - 1) + 1))}{strides[1]} + 1
Examples:
.. code-block:: python
import numpy as np
import paddle
import paddle.nn as nn
......@@ -646,35 +665,29 @@ class ConvTranspose2d(_ConvNd):
The details of convolution transpose layer, please refer to the following explanation and references
`conv2dtranspose <http://www.matthewzeiler.com/wp-content/uploads/2017/07/cvpr2010.pdf>`_ .
For each input :math:`X`, the equation is:
.. math::
Out = \sigma (W \\ast X + b)
Where:
* :math:`X`: Input value, a ``Tensor`` with NCHW format.
* :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] .
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Example:
- Input:
Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
Filter shape: :math:`(C_{in}, C_{out}, H_f, W_f)`
- Output:
Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
Where
.. math::
H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\
W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 \\\\
H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[0] ) \\\\
W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] )
Parameters:
in_channels(int): The number of channels in the input image.
out_channels(int): The number of channels produced by the convolution.
kernel_size(int|list|uple): The kernel size. If kernel_size is a tuple,
it must contain two integers, (kernel_size_H, kernel_size_W).
Otherwise, the kernel will be a square.
output_padding(int|list|tuple, optional): Additional size added to one side
of each dimension in the output shape. Default: 0.
stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: 1.
padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
1. a string in ['valid', 'same'].
2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` on both sides
......@@ -682,9 +695,8 @@ class ConvTranspose2d(_ConvNd):
4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions.
5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0).
The default value is 0.
stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: 1.
output_padding(int|list|tuple, optional): Additional size added to one side
of each dimension in the output shape. Default: 0.
dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: 1.
......@@ -694,29 +706,46 @@ class ConvTranspose2d(_ConvNd):
first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels.
Default: 1.
weight_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter)
weight_attr(ParamAttr, optional): The parameter attribute for learnable weights(Parameter)
of conv2d_transpose. If it is set to None or one attribute of ParamAttr, conv2d_transpose
will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|bool, optional): The attribute for the bias of conv2d_transpose.
bias_attr(ParamAttr|bool, optional): The attribute for the bias of conv2d_transpose.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv2d_transpose
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
data_format (str, optional): Data format that specifies the layout of input.
data_format(str, optional): Data format that specifies the layout of input.
It can be "NCHW" or "NHWC". Default: "NCHW".
Attribute:
**weight** (Parameter): the learnable weights of filters of this layer.
**bias** (Parameter or None): the learnable bias of this layer.
Shape:
- x: :math:`(N, C_{in}, H_{in}, W_{in})`
- output: :math:`(N, C_{out}, H_{out}, W_{out})`
Where
.. math::
H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (kernel_size[0] - 1) + 1 \\\\
W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (kernel_size[1] - 1) + 1 \\\\
H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (kernel\_size[0] - 1) + 1
W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (kernel\_size[1] - 1) + 1
H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[0] )
W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] )
Examples:
.. code-block:: python
import numpy as np
import paddle
import paddle.nn as nn
......@@ -791,66 +820,86 @@ class Conv3d(_ConvNd):
provided, bias is added to the output of the convolution, and the
corresponding activation function is applied to the final result.
For each input :math:`X`, the equation is:
.. math::
Out = \sigma (W \\ast X + b)
In the above equation:
* :math:`X`: Input value, a tensor with NCDHW or NDHWC format.
* :math:`W`: Filter value, a tensor with MCDHW format.
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Parameters:
in_channels(int): The number of input channels in the input image.
out_channels(int): The number of output channels produced by the convolution.
kernel_size (int|list|tuple, optional): The size of the convolving kernel.
stride (int|list|tuple, optional): The stride size. If stride is a tuple, it must
kernel_size(int|list|tuple, optional): The size of the convolving kernel.
stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must
contain three integers, (stride_D, stride_H, stride_W). Otherwise, the
stride_D = stride_H = stride_W = stride. The default value is 1.
padding (int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
1. a string in ['valid', 'same'].
2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding`
3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...].
4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions.
5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0).
The default value is 0.
dilation (int|list|tuple, optional): The dilation size. If dilation is a tuple, it must
dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must
contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
dilation_D = dilation_H = dilation_W = dilation. The default value is 1.
groups (int, optional): The groups number of the Conv3d Layer. According to grouped
groups(int, optional): The groups number of the Conv3d Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only
connected to the second half of the input channels. The default value is 1.
padding_mode (str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'``.
weight_attr (ParamAttr, optional): The parameter attribute for learnable parameters/weights
padding_mode(str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'``.
weight_attr(ParamAttr, optional): The parameter attribute for learnable parameters/weights
of conv3d. If it is set to None or one attribute of ParamAttr, conv3d
will create ParamAttr as param_attr. If it is set to None, the parameter
is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is
:math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. The default value is None.
bias_attr (ParamAttr|bool, optional): The parameter attribute for the bias of conv3d.
bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of conv3d.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv3d
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. The default value is None.
data_format (str, optional): Data format that specifies the layout of input.
data_format(str, optional): Data format that specifies the layout of input.
It can be "NCDHW" or "NDHWC". Default: "NCDHW".
Attribute:
**weight** (Parameter): the learnable weights of filters of this layer.
**bias** (Parameter): the learnable bias of this layer.
Shape:
- x: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
- output: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
Where
.. math::
D_{out}&= \\frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{strides[0]} + 1 \\\\
H_{out}&= \\frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{strides[1]} + 1 \\\\
W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1
D_{out}&= \\frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (kernel\_size[0] - 1) + 1))}{strides[0]} + 1
H_{out}&= \\frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (kernel\_size[1] - 1) + 1))}{strides[1]} + 1
W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (kernel\_size[2] - 1) + 1))}{strides[2]} + 1
Raises:
ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch.
Examples:
.. code-block:: python
import numpy as np
import paddle
......@@ -936,17 +985,22 @@ class ConvTranspose3d(_ConvNd):
the output of the convolution, and the corresponding activation function
is applied to the final result.
For each input :math:`X`, the equation is:
.. math::
Out = \sigma (W \\ast X + b)
In the above equation:
* :math:`X`: Input value, a tensor with NCDHW format.
* :math:`W`: Filter value, a tensor with MCDHW format.
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Example:
**Note**:
The conv_transpose3d can be seen as the backward of the conv3d. For conv3d,
when stride > 1, conv3d maps multiple input shape to the same output shape,
so for conv_transpose3d, when stride > 1, input shape maps multiple output shape.
......@@ -957,6 +1011,7 @@ class ConvTranspose3d(_ConvNd):
and :math:`H^\prime_{out} + strides[1]`, and the :math:`W_{out}` of the output size must
between :math:`W^\prime_{out}` and :math:`W^\prime_{out} + strides[2]`,
conv_transpose3d can compute the kernel size automatically.
Parameters:
in_channels(int): The number of channels in the input image.
out_channels(int): The number of channels produced by the convolution.
......@@ -985,11 +1040,11 @@ class ConvTranspose3d(_ConvNd):
first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels.
The default value is 1.
weight_attr (ParamAttr, optional): The parameter attribute for learnable parameters/weights
weight_attr(ParamAttr, optional): The parameter attribute for learnable parameters/weights
of conv3d_transpose. If it is set to None or one attribute of ParamAttr, conv3d_transpose
will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. The default value is None.
bias_attr (ParamAttr|bool, optional): The parameter attribute for the bias of conv3d_transpose.
bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of conv3d_transpose.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv3d_transpose
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
......@@ -999,24 +1054,38 @@ class ConvTranspose3d(_ConvNd):
filter_size, padding, and stride to calculate output_size.
if output_size and filter_size are specified at the same time, They
should follow the formula above. Default: None.
data_format (str, optional): Data format that specifies the layout of input.
data_format(str, optional): Data format that specifies the layout of input.
It can be "NCDHW" or "NDHWC". Default: "NCDHW".
Attribute:
**weight** (Parameter): the learnable weights of filters of this layer.
**bias** (Parameter): the learnable bias of this layer.
Shape:
- x: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
- output: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
Where
.. math::
D^\prime_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (kernel_size[0] - 1) + 1 \\\\
H^\prime_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (kernel_size[1] - 1) + 1 \\\\
W^\prime_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (kernel_size[2] - 1) + 1 \\\\
D^\prime_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (kernel\_size[0] - 1) + 1
H^\prime_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (kernel\_size[1] - 1) + 1
W^\prime_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (kernel\_size[2] - 1) + 1
Raises:
ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch.
Examples:
.. code-block:: python
import numpy as np
import paddle
import paddle.nn as nn
......@@ -1024,7 +1093,7 @@ class ConvTranspose3d(_ConvNd):
paddle.disable_static()
x_var = paddle.to_tensor(x)
conv = nn.Conv3DTranspose(4, 6, (3, 3, 3))
conv = nn.ConvTranspose3d(4, 6, (3, 3, 3))
y_var = conv(x_var)
y_np = y_var.numpy()
print(y_np.shape)
......
......@@ -634,9 +634,12 @@ class KLDivLoss(fluid.dygraph.Layer):
Default is ``'mean'``.
Shape:
- input: (N, *) where * means, any number of additional dimensions.
- label: (N, *), same shape as input
- output: tensor with shape: (1) by default.
- input (Tensor): (N, *), where * means, any number of additional dimensions.
- label (Tensor): (N, *), same shape as input.
- output (Tensor): tensor with shape: [1] by default.
Examples:
......@@ -646,7 +649,7 @@ class KLDivLoss(fluid.dygraph.Layer):
import numpy as np
import paddle.nn as nn
paddle.enable_imperative()
paddle.disable_static()
shape = (5, 20)
x = np.random.uniform(-10, 10, shape).astype('float32')
......@@ -654,26 +657,26 @@ class KLDivLoss(fluid.dygraph.Layer):
# 'batchmean' reduction, loss shape will be [N]
kldiv_criterion = nn.KLDivLoss(reduction='batchmean')
pred_loss = kldiv_criterion(paddle.to_variable(x),
paddle.to_variable(target))
pred_loss = kldiv_criterion(paddle.to_tensor(x),
paddle.to_tensor(target))
# shape=[5]
# 'mean' reduction, loss shape will be [1]
kldiv_criterion = nn.KLDivLoss(reduction='mean')
pred_loss = kldiv_criterion(paddle.to_variable(x),
paddle.to_variable(target))
pred_loss = kldiv_criterion(paddle.to_tensor(x),
paddle.to_tensor(target))
# shape=[1]
# 'sum' reduction, loss shape will be [1]
kldiv_criterion = nn.KLDivLoss(reduction='sum')
pred_loss = kldiv_criterion(paddle.to_variable(x),
paddle.to_variable(target))
pred_loss = kldiv_criterion(paddle.to_tensor(x),
paddle.to_tensor(target))
# shape=[1]
# 'none' reduction, loss shape is same with X shape
kldiv_criterion = nn.KLDivLoss(reduction='none')
pred_loss = kldiv_criterion(paddle.to_variable(x),
paddle.to_variable(target))
pred_loss = kldiv_criterion(paddle.to_tensor(x),
paddle.to_tensor(target))
# shape=[5, 20]
"""
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册