提交 3ab32532 编写于 作者: C chengduoZH

Add conv3d Python API

上级 431491a2
......@@ -1305,8 +1305,6 @@ def conv2d(input,
conv2d = fluid.layers.conv2d(
input=data, num_filters=2, filter_size=3, act="relu")
"""
if stride is None:
stride = [1, 1]
num_channels = input.shape[1]
......@@ -1369,6 +1367,172 @@ def conv2d(input,
return helper.append_activation(pre_act)
def conv3d(input,
num_filters,
filter_size,
stride=1,
padding=0,
dilation=1,
groups=None,
param_attr=None,
bias_attr=None,
use_cudnn=True,
use_mkldnn=False,
act=None,
name=None):
"""
**Convlution3D Layer**
The convolution3D layer calculates the output based on the input, filter
and strides, paddings, dilations, groups parameters. Input(Input) and
Output(Output) are in NCHW format. Where N is batch size, C is the number of
channels, H is the height of the feature, and W is the width of the feature.
The details of convolution layer, please refer UFLDL's `convolution,
<http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_ .
If bias attribution and activation type are provided, bias is added to the
output of the convolution, and the corresponding activation function is
applied to the final result.
For each input :math:`X`, the equation is:
.. math::
Out = \sigma (W \\ast X + b)
In the above equation:
* :math:`X`: Input value, a tensor with NCHW format.
* :math:`W`: Filter value, a tensor with MCHW format.
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be
different.
Example:
- Input:
Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
Filter shape: :math:`(C_{out}, C_{in}, D_f, H_f, W_f)`
- Output:
Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
Where
.. math::
D_{out}&= \\frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{strides[0]} + 1 \\\\
H_{out}&= \\frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{strides[1]} + 1 \\\\
W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1
Args:
input (Variable): The input image with [N, C, D, H, W] format.
num_filters(int): The number of filter. It is as same as the output
image channel.
filter_size (int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_D, filter_size_H, filter_size_W).
Otherwise, the filter will be a square.
stride (int|tuple): The stride size. If stride is a tuple, it must
contain two integers, (stride_D, stride_H, stride_W). Otherwise, the
stride_D = stride_H = stride_W = stride. Default: stride = 1.
padding (int|tuple): The padding size. If padding is a tuple, it must
contain two integers, (padding_D, padding_H, padding_W). Otherwise, the
padding_D = padding_H = padding_W = padding. Default: padding = 0.
dilation (int|tuple): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
dilation_D = dilation_H = dilation_W = dilation. Default: dilation = 1.
groups (int): The groups number of the Conv3d Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only
connected to the second half of the input channels. Default: groups=1
param_attr (ParamAttr): The parameters to the Conv3d Layer. Default: None
bias_attr (ParamAttr): Bias parameter for the Conv3d layer. Default: None
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
use_mkldnn (bool): Use mkldnn kernels or not.
act (str): Activation type. Default: None
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: The tensor variable storing the convolution and \
non-linearity activation result.
Raises:
ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch.
Examples:
.. code-block:: python
data = fluid.layers.data(
name='data', shape=[3, 12, 32, 32], dtype='float32')
conv2d = fluid.layers.conv3d(
input=data, num_filters=2, filter_size=3, act="relu")
"""
l_type = 'conv3d'
helper = LayerHelper(l_type, **locals())
dtype = helper.input_dtype()
num_channels = input.shape[1]
if groups is None:
num_filter_channels = num_channels
else:
if num_channels % groups != 0:
raise ValueError("num_channels must be divisible by groups.")
num_filter_channels = num_channels / groups
filter_size = utils.convert_to_list(filter_size, 3, 'filter_size')
stride = utils.convert_to_list(stride, 3, 'stride')
padding = utils.convert_to_list(padding, 3, 'padding')
dilation = utils.convert_to_list(dilation, 3, 'dilation')
if not isinstance(use_cudnn, bool):
raise ValueError("use_cudnn should be True or False")
input_shape = input.shape
filter_shape = [num_filters, num_filter_channels] + filter_size
def _get_default_param_initializer():
std = (2.0 / (filter_size[0]**3 * num_channels))**0.5
return Normal(0.0, std, 0)
filter_param = helper.create_parameter(
attr=helper.param_attr,
shape=filter_shape,
dtype=dtype,
default_initializer=_get_default_param_initializer())
pre_bias = helper.create_tmp_variable(dtype)
helper.append_op(
type=l_type,
inputs={
'Input': input,
'Filter': filter_param,
},
outputs={"Output": pre_bias},
attrs={
'strides': stride,
'paddings': padding,
'dilations': dilation,
'groups': groups,
'use_cudnn': use_cudnn,
'use_mkldnn': use_mkldnn
})
pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=3)
return helper.append_activation(pre_act)
def sequence_pool(input, pool_type):
"""
This function add the operator for sequence pooling.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册