diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index f6b4348d250a9d99541932fcfefd386584d0d1e5..3a4f93929f51b2181482267735f9c1145622f928 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -39,13 +39,16 @@ __all__ = [
'chunk_eval',
'sequence_conv',
'conv2d',
+ 'conv3d',
'sequence_pool',
'sequence_softmax',
'softmax',
'pool2d',
+ 'pool3d',
'batch_norm',
'beam_search_decode',
'conv2d_transpose',
+ 'conv3d_transpose',
'sequence_expand',
'lstm_unit',
'reduce_sum',
@@ -1385,13 +1388,12 @@ def conv3d(input,
The convolution3D layer calculates the output based on the input, filter
and strides, paddings, dilations, groups parameters. Input(Input) and
- Output(Output) are in NCHW format. Where N is batch size, C is the number of
- channels, H is the height of the feature, and W is the width of the feature.
- The details of convolution layer, please refer UFLDL's `convolution,
- `_ .
- If bias attribution and activation type are provided, bias is added to the
- output of the convolution, and the corresponding activation function is
- applied to the final result.
+ Output(Output) are in NCDHW format. Where N is batch size C is the number of
+ channels, D is the depth of the feature, H is the height of the feature,
+ and W is the width of the feature. Convlution3D is similar with Convlution2D
+ but adds one dimension(depth). If bias attribution and activation type are
+ provided, bias is added to the output of the convolution, and the
+ corresponding activation function is applied to the final result.
For each input :math:`X`, the equation is:
@@ -1401,8 +1403,8 @@ def conv3d(input,
In the above equation:
- * :math:`X`: Input value, a tensor with NCHW format.
- * :math:`W`: Filter value, a tensor with MCHW format.
+ * :math:`X`: Input value, a tensor with NCDHW format.
+ * :math:`W`: Filter value, a tensor with MCDHW format.
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function.
@@ -1433,16 +1435,16 @@ def conv3d(input,
num_filters(int): The number of filter. It is as same as the output
image channel.
filter_size (int|tuple|None): The filter size. If filter_size is a tuple,
- it must contain two integers, (filter_size_D, filter_size_H, filter_size_W).
+ it must contain three integers, (filter_size_D, filter_size_H, filter_size_W).
Otherwise, the filter will be a square.
stride (int|tuple): The stride size. If stride is a tuple, it must
- contain two integers, (stride_D, stride_H, stride_W). Otherwise, the
+ contain three integers, (stride_D, stride_H, stride_W). Otherwise, the
stride_D = stride_H = stride_W = stride. Default: stride = 1.
padding (int|tuple): The padding size. If padding is a tuple, it must
- contain two integers, (padding_D, padding_H, padding_W). Otherwise, the
+ contain three integers, (padding_D, padding_H, padding_W). Otherwise, the
padding_D = padding_H = padding_W = padding. Default: padding = 0.
dilation (int|tuple): The dilation size. If dilation is a tuple, it must
- contain two integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
+ contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
dilation_D = dilation_H = dilation_W = dilation. Default: dilation = 1.
groups (int): The groups number of the Conv3d Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
@@ -1528,7 +1530,7 @@ def conv3d(input,
'use_mkldnn': use_mkldnn
})
- pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=3)
+ pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2)
return helper.append_activation(pre_act)
@@ -1720,12 +1722,84 @@ def pool2d(input,
if not isinstance(use_cudnn, bool):
raise ValueError("use_cudnn should be True or False")
- helper = LayerHelper('pool2d', **locals())
+ l_type = 'conv2d'
+
+ helper = LayerHelper(l_type, **locals())
+ dtype = helper.input_dtype()
+ pool_out = helper.create_tmp_variable(dtype)
+
+ helper.append_op(
+ type=l_type,
+ inputs={"X": input},
+ outputs={"Out": pool_out},
+ attrs={
+ "pooling_type": pool_type,
+ "ksize": pool_size,
+ "global_pooling": global_pooling,
+ "strides": pool_stride,
+ "paddings": pool_padding,
+ "use_cudnn": use_cudnn,
+ "ceil_mode": ceil_mode,
+ "use_mkldnn": use_mkldnn
+ })
+
+ return pool_out
+
+
+def pool3d(input,
+ pool_size=-1,
+ pool_type="max",
+ pool_stride=1,
+ pool_padding=0,
+ global_pooling=False,
+ use_cudnn=True,
+ ceil_mode=False,
+ use_mkldnn=False,
+ name=None):
+ """
+ This function adds the operator for pooling in 3-dimensions, using the
+ pooling configurations mentioned in input parameters.
+
+ Args:
+ input (Variable): ${input_comment}
+ pool_size (int): ${ksize_comment}
+ pool_type (str): ${pooling_type_comment}
+ pool_stride (int): stride of the pooling layer.
+ pool_padding (int): padding size.
+ global_pooling (bool): ${global_pooling_comment}
+ use_cudnn (bool): ${use_cudnn_comment}
+ ceil_mode (bool): ${ceil_mode_comment}
+ use_mkldnn (bool): ${use_mkldnn_comment}
+ name (str): A name for this layer(optional). If set None, the layer
+ will be named automatically.
+
+ Returns:
+ Variable: output of pool3d layer.
+ """
+ if pool_type not in ["max", "avg"]:
+ raise ValueError(
+ "Unknown pool_type: '%s'. It can only be 'max' or 'avg'.",
+ str(pool_type))
+
+ if global_pooling is False and pool_size == -1:
+ raise ValueError(
+ "When the global_pooling is False, pool_size must be passed "
+ "and be a valid value. Received pool_size: " + str(pool_size))
+
+ pool_size = utils.convert_to_list(pool_size, 3, 'pool_size')
+ pool_padding = utils.convert_to_list(pool_padding, 3, 'pool_padding')
+ pool_stride = utils.convert_to_list(pool_stride, 3, 'pool_stride')
+
+ if not isinstance(use_cudnn, bool):
+ raise ValueError("use_cudnn should be True or False")
+
+ l_type = "pool3d"
+ helper = LayerHelper(l_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_tmp_variable(dtype)
helper.append_op(
- type="pool2d",
+ type=l_type,
inputs={"X": input},
outputs={"Out": pool_out},
attrs={
@@ -2146,6 +2220,173 @@ def conv2d_transpose(input,
return out
+def conv3d_transpose(input,
+ num_filters,
+ output_size=None,
+ filter_size=None,
+ padding=0,
+ stride=1,
+ dilation=1,
+ groups=None,
+ param_attr=None,
+ bias_attr=None,
+ use_cudnn=True,
+ act=None,
+ name=None):
+ """
+ **Convlution3D transpose layer**
+
+ The convolution3D transpose layer calculates the output based on the input,
+ filter, and dilations, strides, paddings. Input(Input) and output(Output)
+ are in NCDHW format. Where N is batch size, C is the number of channels,
+ D is the depth of the feature, H is the height of the feature, and W
+ is the width of the feature. Parameters(dilations, strides, paddings) are
+ two elements. These two elements represent height and width, respectively.
+ The details of convolution transpose layer, please refer to the following
+ explanation and references `therein `_.
+
+ For each input :math:`X`, the equation is:
+
+ .. math::
+
+ Out = W \\ast X
+
+ In the above equation:
+
+ * :math:`X`: Input value, a tensor with NCDHW format.
+ * :math:`W`: Filter value, a tensor with MCDHW format.
+ * :math:`\\ast` : Convolution transpose operation.
+ * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be
+ different.
+
+ Example:
+
+ - Input:
+
+ Input shape: $(N, C_{in}, D_{in}, H_{in}, W_{in})$
+
+ Filter shape: $(C_{in}, C_{out}, D_f, H_f, W_f)$
+
+ - Output:
+
+ Output shape: $(N, C_{out}, D_{out}, H_{out}, W_{out})$
+
+ Where
+
+ .. math::
+
+ D_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (D_f - 1) + 1 \\\\
+ H_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (H_f - 1) + 1 \\\\
+ W_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1
+
+ Args:
+ input(Variable): The input image with [N, C, D, H, W] format.
+ num_filters(int): The number of the filter. It is as same as the output
+ image channel.
+ output_size(int|tuple|None): The output image size. If output size is a
+ tuple, it must contain three integers, (image_D, image_H, image_W). This
+ parameter only works when filter_size is None.
+ filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
+ it must contain three integers, (filter_size_D, filter_size_H, filter_size_W).
+ Otherwise, the filter will be a square. None if use output size to
+ calculate filter_size.
+ padding(int|tuple): The padding size. If padding is a tuple, it must
+ contain three integers, (padding_D, padding_H, padding_W). Otherwise, the
+ padding_D = padding_H = padding_W = padding. Default: padding = 0.
+ stride(int|tuple): The stride size. If stride is a tuple, it must
+ contain three integers, (stride_D, stride_H, stride_W). Otherwise, the
+ stride_D = stride_H = stride_W = stride. Default: stride = 1.
+ dilation(int|tuple): The dilation size. If dilation is a tuple, it must
+ contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
+ dilation_D = dilation_H = dilation_W = dilation. Default: dilation = 1.
+ groups(int): The groups number of the Conv3d transpose layer. Inspired by
+ grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
+ when group=2, the first half of the filters is only connected to the
+ first half of the input channels, while the second half of the
+ filters is only connected to the second half of the input channels.
+ Default: groups=1
+ param_attr(ParamAttr): The parameters to the Conv3d_transpose Layer.
+ Default: None
+ bias_attr(ParamAttr): Bias parameter for the Conv3d layer. Default: None
+ use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn
+ library is installed. Default: True
+ act(str): Activation type. Default: None
+ name(str|None): A name for this layer(optional). If set None, the layer
+ will be named automatically.
+
+ Returns:
+ Variable: The tensor variable storing the convolution transpose result.
+
+ Raises:
+ ValueError: If the shapes of input, filter_size, stride, padding and
+ groups mismatch.
+
+ Examples:
+ .. code-block:: python
+
+ data = fluid.layers.data(
+ name='data', shape=[3, 12, 32, 32], dtype='float32')
+ conv2d_transpose = fluid.layers.conv3d_transpose(
+ input=data, num_filters=2, filter_size=3)
+ """
+ l_type = "conv3d_transpose"
+ helper = LayerHelper(l_type, **locals())
+ if not isinstance(input, Variable):
+ raise TypeError("Input of conv3d_transpose must be Variable")
+ input_channel = input.shape[1]
+
+ padding = utils.convert_to_list(padding, 3, 'padding')
+ stride = utils.convert_to_list(stride, 3, 'stride')
+ dilation = utils.convert_to_list(dilation, 3, 'dilation')
+
+ if not isinstance(use_cudnn, bool):
+ raise ValueError("use_cudnn should be True or False")
+
+ if filter_size is None:
+ if output_size is None:
+ raise ValueError("output_size must be set when filter_size is None")
+ if isinstance(output_size, int):
+ output_size = [output_size, output_size]
+
+ d_in = input.shape[2]
+ h_in = input.shape[3]
+ w_in = input.shape[4]
+
+ filter_size_d = (output_size[0] - (d_in - 1) * stride[0] + 2 *
+ padding[0] - 1) / dilation[0] + 1
+ filter_size_h = (output_size[1] - (h_in - 1) * stride[1] + 2 *
+ padding[1] - 1) / dilation[1] + 1
+ filter_size_w = (output_size[2] - (w_in - 1) * stride[2] + 2 *
+ padding[2] - 1) / dilation[2] + 1
+ filter_size = [filter_size_d, filter_size_h, filter_size_w]
+ else:
+ filter_size = utils.convert_to_list(filter_size, 3,
+ 'conv3d_transpose.filter_size')
+
+ groups = 1 if groups is None else groups
+ filter_shape = [input_channel, num_filters / groups] + filter_size
+ img_filter = helper.create_parameter(
+ dtype=input.dtype, shape=filter_shape, attr=helper.param_attr)
+
+ pre_bias = helper.create_tmp_variable(dtype=input.dtype)
+ helper.append_op(
+ type=l_type,
+ inputs={'Input': [input],
+ 'Filter': [img_filter]},
+ outputs={'Output': pre_bias},
+ attrs={
+ 'strides': stride,
+ 'paddings': padding,
+ 'dilations': dilation,
+ 'groups': groups,
+ 'use_cudnn': use_cudnn
+ })
+
+ pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2)
+ out = helper.append_activation(pre_act)
+ return out
+
+
def sequence_expand(x, y, ref_level=-1, name=None):
"""Sequence Expand Layer. This layer will expand the input variable **x**
according to specified level lod of **y**. Please note that lod level of