From 9b1a17a8356ce2c9180500eabc4fa6e8b2ac7849 Mon Sep 17 00:00:00 2001 From: chengduo <zhaochengduo@baidu.com> Date: Tue, 23 Jan 2018 10:20:57 +0800 Subject: [PATCH] Refine conv2d_transpose layer doc (#6920) * refine conv2d_transpose layer doc * fix conv2d_transpose doc * fix doc --- paddle/operators/conv_transpose_op.cc | 10 +- paddle/operators/conv_transpose_op.h | 9 +- python/paddle/v2/fluid/layers/nn.py | 175 ++++++++++++++++---------- 3 files changed, 121 insertions(+), 73 deletions(-) diff --git a/paddle/operators/conv_transpose_op.cc b/paddle/operators/conv_transpose_op.cc index a2382a7e42..089290a506 100644 --- a/paddle/operators/conv_transpose_op.cc +++ b/paddle/operators/conv_transpose_op.cc @@ -160,8 +160,8 @@ Example: Output shape: $(N, C_{out}, H_{out}, W_{out})$ Where $$ - H_{out} = (H_{in} - 1) * strides[0] - 2 * paddings[0] + H_f \\ - W_{out} = (W_{in} - 1) * strides[1] - 2 * paddings[1] + W_f + H_{out} = (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\ + W_{out} = (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 $$ )DOC"); } @@ -249,9 +249,9 @@ Example: Output shape: $(N, C_{out}, D_{out}, H_{out}, W_{out})$ Where $$ - D_{out} = (D_{in} - 1) * strides[0] - 2 * paddings[0] + D_f \\ - H_{out} = (H_{in} - 1) * strides[1] - 2 * paddings[1] + H_f \\ - W_{out} = (W_{in} - 1) * strides[2] - 2 * paddings[2] + W_f + D_{out} = (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (D_f - 1) + 1 \\ + H_{out} = (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (H_f - 1) + 1 \\ + W_{out} = (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1 $$ )DOC"); } diff --git a/paddle/operators/conv_transpose_op.h b/paddle/operators/conv_transpose_op.h index a42ade41b1..8c0d57afcd 100644 --- a/paddle/operators/conv_transpose_op.h +++ b/paddle/operators/conv_transpose_op.h @@ -141,9 +141,9 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> { if (data_dim == 2U) { // col2im: col_matrix -> dy // from (c * k_h * k_w, h * w) to (c, o_h, o_w) - col2im(dev_ctx, col, std::vector<int>{dilations[0], dilations[1]}, - strides, std::vector<int>{paddings[0], paddings[1], paddings[0], - paddings[1]}, + col2im(dev_ctx, col, dilations, strides, + std::vector<int>{paddings[0], paddings[1], paddings[0], + paddings[1]}, &output_batch); } else if (data_dim == 3U) { // col2vol: col_matrix -> dy @@ -247,8 +247,7 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> { if (data_dim == 2U) { // im2col: dy -> col matrix // from (c, o_h, o_w) to (c * k_h * k_w, h * w) - im2col(dev_ctx, output_grad_batch, - std::vector<int>{dilations[0], dilations[1]}, strides, + im2col(dev_ctx, output_grad_batch, dilations, strides, std::vector<int>{paddings[0], paddings[1], paddings[0], paddings[1]}, &col); diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index a01ccfa635..0721198816 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -790,8 +790,8 @@ def conv2d(input, <http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_ . If bias attribution and activation type are provided, bias is added to the output of the convolution, and the corresponding activation function is applied to the final result. - For each input :math:`X`, the equation is: + For each input :math:`X`, the equation is: .. math:: @@ -799,51 +799,54 @@ def conv2d(input, In the above equation: - * :math:`X`: Input value, a tensor with NCHW format. - * :math:`W`: Filter value, a tensor with MCHW format. - * :math:`\\ast`: Convolution operation. - * :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. - * :math:`\\sigma`: Activation function. - * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. + * :math:`X`: Input value, a tensor with NCHW format. + * :math:`W`: Filter value, a tensor with MCHW format. + * :math:`\\ast`: Convolution operation. + * :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. + * :math:`\\sigma`: Activation function. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. Example: - Input: - Input shape: $(N, C_{in}, H_{in}, W_{in})$ + - Input: + + Input shape: $(N, C_{in}, H_{in}, W_{in})$ - Filter shape: $(C_{out}, C_{in}, H_f, W_f)$ + Filter shape: $(C_{out}, C_{in}, H_f, W_f)$ + + - Output: + Output shape: $(N, C_{out}, H_{out}, W_{out})$ - Output: - Output shape: $(N, C_{out}, H_{out}, W_{out})$ Where - .. math:: + + .. math:: H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\ W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1 Args: - input(Variable): The input image with [N, C, H, W] format. - num_filters(int): The number of filter. It is as same as the output - image channel. - filter_size(int|tuple|None): The filter size. If filter_size is a tuple, - it must contain two integers, (filter_size_H, filter_size_W). - Otherwise, the filter will be a square. - stride(int|tuple): The stride size. If stride is a tuple, it must - contain two integers, (stride_H, stride_W). Otherwise, the - stride_H = stride_W = stride. Default: stride = 1. - padding(int|tuple): The padding size. If padding is a tuple, it must - contain two integers, (padding_H, padding_W). Otherwise, the - padding_H = padding_W = padding. Default: padding = 0. - groups(int): The groups number of the Conv2d Layer. According to grouped - convolution in Alex Krizhevsky's Deep CNN paper: when group=2, - the first half of the filters is only connected to the first half - of the input channels, while the second half of the filters is only - connected to the second half of the input channels. Default: groups=1 - param_attr(ParamAttr): The parameters to the Conv2d Layer. Default: None - bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None - use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn - library is installed. Default: True - act(str): Activation type. Default: None + input(Variable): The input image with [N, C, H, W] format. + num_filters(int): The number of filter. It is as same as the output + image channel. + filter_size(int|tuple|None): The filter size. If filter_size is a tuple, + it must contain two integers, (filter_size_H, filter_size_W). + Otherwise, the filter will be a square. + stride(int|tuple): The stride size. If stride is a tuple, it must + contain two integers, (stride_H, stride_W). Otherwise, the + stride_H = stride_W = stride. Default: stride = 1. + padding(int|tuple): The padding size. If padding is a tuple, it must + contain two integers, (padding_H, padding_W). Otherwise, the + padding_H = padding_W = padding. Default: padding = 0. + groups(int): The groups number of the Conv2d Layer. According to grouped + convolution in Alex Krizhevsky's Deep CNN paper: when group=2, + the first half of the filters is only connected to the first half + of the input channels, while the second half of the filters is only + connected to the second half of the input channels. Default: groups=1 + param_attr(ParamAttr): The parameters to the Conv2d Layer. Default: None + bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None + use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. Default: True + act(str): Activation type. Default: None Returns: Variable: The tensor variable storing the convolution and \ @@ -858,7 +861,6 @@ def conv2d(input, data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32') conv2d = fluid.layers.conv2d(input=data, num_filters=2, filter_size=3, act="relu") """ - if stride is None: stride = [1, 1] helper = LayerHelper('conv2d', **locals()) @@ -1212,38 +1214,85 @@ def conv2d_transpose(input, use_cudnn=True, name=None): """ - The transpose of conv2d layer. + **Convlution2D transpose layer** + + The convolution2D transpose layer calculates the output based on the input, + filter, and dilations, strides, paddings. Input(Input) and output(Output) + are in NCHW format. Where N is batch size, C is the number of channels, + H is the height of the feature, and W is the width of the feature. + Parameters(dilations, strides, paddings) are two elements. These two elements + represent height and width, respectively. The details of convolution transpose + layer, please refer to the following explanation and references `therein <http://www.matthewzeiler.com/wp-content/uploads/2017/07/cvpr2010.pdf>`_. + + For each input :math:`X`, the equation is: + + .. math:: + + Out = W \\ast X + + In the above equation: + + * :math:`X`: Input value, a tensor with NCHW format. + * :math:`W`: Filter value, a tensor with MCHW format. + * :math:`\\ast` : Convolution transpose operation. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. + + Example: - This layer is also known as deconvolution layer. + - Input: + + Input shape: $(N, C_{in}, H_{in}, W_{in})$ + + Filter shape: $(C_{in}, C_{out}, H_f, W_f)$ + + - Output: + + Output shape: $(N, C_{out}, H_{out}, W_{out})$ + + Where + + .. math:: + + H_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\ + W_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 Args: - input(Variable): The input image with [N, C, H, W] format. - num_filters(int): The number of filter. It is as same as the output - image channel. - output_size(int|tuple|None): The output image size. If output size is a - tuple, it must contain two integers, (image_H, image_W). This - parameter only works when filter_size is None. - filter_size(int|tuple|None): The filter size. If filter_size is a tuple, - it must contain two integers, (filter_size_H, filter_size_W). - Otherwise, the filter will be a square. None if use output size to - calculate filter_size - padding(int|tuple): The padding size. If padding is a tuple, it must - contain two integers, (padding_H, padding_W). Otherwise, the - padding_H = padding_W = padding. - stride(int|tuple): The stride size. If stride is a tuple, it must - contain two integers, (stride_H, stride_W). Otherwise, the - stride_H = stride_W = stride. - dilation(int|tuple): The dilation size. If dilation is a tuple, it must - contain two integers, (dilation_H, dilation_W). Otherwise, the - dilation_H = dilation_W = dilation. - param_attr: Parameter Attribute. - use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn - library is installed. Default: True - name(str|None): A name for this layer(optional). If set None, the layer - will be named automatically. + input(Variable): The input image with [N, C, H, W] format. + num_filters(int): The number of the filter. It is as same as the output + image channel. + output_size(int|tuple|None): The output image size. If output size is a + tuple, it must contain two integers, (image_H, image_W). This + parameter only works when filter_size is None. + filter_size(int|tuple|None): The filter size. If filter_size is a tuple, + it must contain two integers, (filter_size_H, filter_size_W). + Otherwise, the filter will be a square. None if use output size to + calculate filter_size. + padding(int|tuple): The padding size. If padding is a tuple, it must + contain two integers, (padding_H, padding_W). Otherwise, the + padding_H = padding_W = padding. Default: padding = 0. + stride(int|tuple): The stride size. If stride is a tuple, it must + contain two integers, (stride_H, stride_W). Otherwise, the + stride_H = stride_W = stride. Default: stride = 1. + dilation(int|tuple): The dilation size. If dilation is a tuple, it must + contain two integers, (dilation_H, dilation_W). Otherwise, the + dilation_H = dilation_W = dilation. Default: dilation = 1. + param_attr(ParamAttr): The parameters to the Conv2d_transpose Layer. Default: None + use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. Default: True + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Returns: - Variable: Output image. + Variable: The tensor variable storing the convolution transpose result. + + Raises: + ValueError: If the shapes of input, filter_size, stride, padding and groups mismatch. + + Examples: + .. code-block:: python + + data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32') + conv2d_transpose = fluid.layers.conv2d_transpose(input=data, num_filters=2, filter_size=3) """ helper = LayerHelper("conv2d_transpose", **locals()) if not isinstance(input, Variable): -- GitLab