Merge pull request #5951 from chengduoZH/fix_conv_doc

fix conv and conv_trans op doc

Merge pull request #5951 from chengduoZH/fix_conv_doc
fix conv and conv_trans op doc
5a3d1362 · chengduo · GitHub · 1b6dcc2f · c339e1b7 · 5a3d1362
5 changed file
--- a/paddle/operators/conv_op.cc
+++ b/paddle/operators/conv_op.cc
@@ -97,7 +97,7 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
      .SetDefault({0, 0});
  AddAttr<int>(
      "groups",
-      "(int default:1), the group size of convolution operator. "
+      "(int default:1), the groups number of the convolution operator. "
      "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
      "when group=2, the first half of the filters is only connected to the "
      "first half of the input channels, while the second half of the filters "
@@ -112,23 +112,29 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
 Convolution Operator.
 The convolution operation calculates the output based on the input, filter
-and strides, paddings, groups, dilations parameters. The size of each dimension of the
+and strides, paddings, dilations, groups parameters. The size of each dimension of the
 parameters is checked in the infer-shape.
-Input(Input, Filter) and output(Output) are in NCHW format. Where N is batch
+Input(Input) and Output(Output) are in NCHW format. Where N is batch
 size, C is the number of channels, H is the height of the feature, and W is
-the width of the feature. Parameters(ksize, strides, paddings, dilations) are two elements.
+the width of the feature.
-These two elements represent height and width, respectively.
+Filters(Input) is MCHW format. Where M is the number of output image channels, C is
+the number of input image channels, H is the height of the filter, and W
+is the width of the filter.
+Parameters(strides, paddings, dilations) are two elements. These two elements represent
+height and width, respectively.
 The input(X) size and output(Out) size may be different.
 Example:
  Input:
-       Input shape: (N, C_in, H_in, W_in)
+       Input shape: $(N, C_{in}, H_{in}, W_{in})$
-       Filter shape: (C_out, C_in, H_f, W_f)
+       Filter shape: $(C_{out}, C_{in}, H_f, W_f)$
  Output:
-       Output shape: (N, C_out, H_out, W_out)
+       Output shape: $(N, C_{out}, H_{out}, W_{out})$
-  where
+  Where
-       H_out = (H_in + 2 * paddings[0] - (dilations[0]*(filter_size[0] - 1) + 1)) / strides[0] + 1;
+$$
-       W_out = (W_in + 2 * paddings[1] - (dilations[1]*(filter_size[1] - 1) + 1)) / strides[1] + 1;
+       H_{out}= \frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]}+ 1 \\
+       W_{out}= \frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]}+ 1
+$$
 )DOC");
 }
@@ -165,7 +171,7 @@ Conv3DOpMaker::Conv3DOpMaker(framework::OpProto* proto,
      .SetDefault({0, 0, 0});
  AddAttr<int>(
      "groups",
-      "(int default:1), the group size of convolution operator. "
+      "(int default:1), the groups number of the convolution operator. "
      "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
      "when group=2, the first half of the filters is only connected to the "
      "first half of the input channels, while the second half of the filters "
@@ -174,32 +180,37 @@ Conv3DOpMaker::Conv3DOpMaker(framework::OpProto* proto,
  AddAttr<std::vector<int>>("dilations",
                            "(vector<int> default:{1, 1, 1}), the "
                            "dilations(d_dilation, h_dilation, w_dilation) of "
-                            "convolution operator. Currently, conv3d doesn't "
+                            "convolution operator.")
-                            "support dilation.")
      .SetDefault({1, 1, 1});
  AddComment(R"DOC(
 Convolution3D Operator.
 The convolution operation calculates the output based on the input, filter
-and strides, paddings, groups parameters. The size of each dimension of the
+and strides, paddings, dilations, groups parameters. The size of each dimension of the
 parameters is checked in the infer-shape.
-Input(Input, Filter) and output(Output) are in NCDHW format. Where N is batch
+Input(Input) and output(Output) are in NCDHW format, where N is batch
 size, C is the number of channels,D is the depth of the feature, H is the height of
-the feature, and W is the width of the feature. Parameters(ksize, strides, paddings)
+the feature, and W is the width of the feature.
-are three elements. These three elements represent depth, height and width, respectively.
+Filters(Input) is MCDHW format, where M is the number of output image channels,
+C is the number of input image channels, D is the depth of the filter,
+H is the height of the filter, and W is the width of the filter.
+Parameters(strides, paddings, dilations) are three elements. These three elements
+represent depth, height and width, respectively.
 The input(X) size and output(Out) size may be different.
 Example:
  Input:
-       Input shape: (N, C_in, D_in, H_in, W_in)
+       Input shape: $(N, C_{in}, D_{in}, H_{in}, W_{in})$
-       Filter shape: (C_out, C_in, D_f, H_f, W_f)
+       Filter shape: $(C_{out}, C_{in}, D_f, H_f, W_f)$
  Output:
-       Output shape: (N, C_out, D_out, H_out, W_out)
+       Output shape: $(N, C_{out}, D_{out}, H_{out}, W_{out})$
-  where
+  Where
-       D_out = (D_in - filter_size[0] + 2 * paddings[0]) / strides[0] + 1;
+  $$
-       H_out = (H_in - filter_size[1] + 2 * paddings[1]) / strides[1] + 1;
+       D_{out}= \frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{ strides[0]}+ 1 \\
-       W_out = (W_in - filter_size[2] + 2 * paddings[2]) / strides[2] + 1;
+       H_{out}= \frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{ strides[1]}+ 1 \\
+       W_{out}= \frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{ strides[2]}+ 1
+  $$
 )DOC");
 }

--- a/paddle/operators/conv_transpose_op.cc
+++ b/paddle/operators/conv_transpose_op.cc
@@ -39,7 +39,7 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
                 "ConvTransposeOp input dimension and strides dimension should "
                 "be consistent.");
  PADDLE_ENFORCE_EQ(paddings.size(), strides.size(),
-                    "ConvTransposeOp paddings dimension and Conv strides "
+                    "ConvTransposeOp paddings dimension and strides "
                    "dimension should be the same.");
  PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[0],
                    "In ConvTransposeOp, The input channel should be the same "
@@ -62,13 +62,14 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(
      "The format of input tensor is NCHW. Where N is batch size, C is the "
      "number of input channels, H is the height of the feature, and "
      "W is the width of the feature.");
-  AddInput("Filter",
+  AddInput(
-           "(Tensor) The filter tensor of convolution transpose operator. "
+      "Filter",
-           "The format of the filter tensor is CMHW, where C is the number of "
+      "(Tensor) The filter tensor of convolution transpose operator. "
-           "output image channels, M is the number of input image channels, "
+      "The format of the filter tensor is MCHW, where M is the number of "
-           "H is the height of the filter, and W is the width of the filter. "
+      "input feature channels, C is the number of "
-           "We enforce groups number == 1 and padding == 0 in "
+      "output feature channels,"
-           "the convolution transpose scenario.");
+      "H is the height of the filter, and W is the width of the filter. "
+      "We enforce groups number == 1 in the convolution transpose scenario.");
  AddOutput("Output",
            "(Tensor) The output tensor of convolution transpose operator. "
            "The format of output tensor is also NCHW.");
@@ -88,21 +89,26 @@ Convolution2D Transpose Operator.
 The convolution transpose operation calculates the output based on the input, filter
 and strides, paddings, groups parameters. The size of each dimension of the
 parameters is checked in the infer-shape.
+Input(Input) and output(Output) are in NCHW format. Where N is batchsize, C is the
-Input(Input, Filter) and output(Output) are in NCHW format. Where N is batch
+number of channels, H is the height of the feature, and W is the width of the feature.
-size, C is the number of channels, H is the height of the feature, and 
+Filter(Input) is in MCHW format. Where M is the number of input feature channels,
-W is the width of the feature. Parameters(ksize, strides, paddings) are two elements.
+C is the number of output feature channels, H is the height of the filter,
-These two elements represent height and width, respectively.
+and W is the width of the filter.
+Parameters(strides, paddings) are two elements. These two elements represent height
+and width, respectively.
 The input(X) size and output(Out) size may be different.
 Example:
  Input:
-       Input shape: (N, C_in, H_in, W_in)
+       Input shape: $(N, C_{in}, H_{in}, W_{in})$
-       Filter shape: (C_in, C_out, H_f, W_f)
+       Filter shape: $(C_{in}, C_{out}, H_f, W_f)$
  Output:
-       Output shape: (N, C_out, H_out, W_out)
+       Output shape: $(N, C_{out}, H_{out}, W_{out})$
-  where
+  Where
-       H_out = (H_in - 1) * strides[0] - 2 * paddings[0] + H_f;
+  $$
-       W_out = (W_in - 1) * strides[1] - 2 * paddings[1] + W_f;
+       H_{out} = (H_{in} - 1) * strides[0] - 2 * paddings[0] + H_f \\
+       W_{out} = (W_{in} - 1) * strides[1] - 2 * paddings[1] + W_f
+  $$
 )DOC");
 }
@@ -117,8 +123,9 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(
           "W is the width of the feature.");
  AddInput("Filter",
           "(Tensor) The filter tensor of convolution transpose operator."
-           "The format of the filter tensor is CMDHW, where C is the number of "
+           "The format of the filter tensor is MCDHW, where M is the number of "
-           "output image channels, M is the number of input image channels, D "
+           "input feature channels, C is the number of "
+           "output feature channels, D "
           "is the depth of the filter, H is the height of the filter, and "
           "W is the width of the filter."
           "We enforce groups number == 1 and padding == 0 in "
@@ -144,23 +151,28 @@ Convolution3D Transpose Operator.
 The convolution transpose operation calculates the output based on the input, filter
 and strides, paddings, groups parameters. The size of each dimension of the
 parameters is checked in the infer-shape.
+Input(Input) and output(Output) are in NCDHW format. Where N is batch size, C is the
-Input(Input, Filter) and output(Output) are in NCDHW format. Where N is batch
+number of channels, D is the depth of the feature, H is the height of the feature,
-size, C is the number of channels, D is the depth of the feature, 
+and W is the width of the feature.
-H is the height of the feature, and W is the width of the feature. 
+Filter(Input) is in MCDHW format. Where M is the number of input feature channels,
-Parameters(ksize, strides, paddings) are three elements.
+C is the number of output feature channels, D is the depth of the filter,H is the
-These three elements represent depth, height and width, respectively.
+height of the filter, and W is the width of the filter.
+Parameters(strides, paddings) are three elements. These three elements represent
+depth, height and width, respectively.
 The input(X) size and output(Out) size may be different.
-Example:
+Example:   
  Input:
-       Input shape: (N, C_in, D_in, H_in, W_in)
+       Input shape: $(N, C_{in}, D_{in}, H_{in}, W_{in})$
-       Filter shape: (C_in, C_out, D_f, H_f, W_f)
+       Filter shape: $(C_{in}, C_{out}, D_f, H_f, W_f)$
  Output:
-       Output shape: (N, C_out, D_out, H_out, W_out)
+       Output shape: $(N, C_{out}, D_{out}, H_{out}, W_{out})$
-  where
+  Where
-       D_out = (D_in - 1) * strides[0] - 2 * paddings[0] + D_f;
+  $$
-       H_out = (H_in - 1) * strides[1] - 2 * paddings[1] + H_f;
+       D_{out} = (D_{in} - 1) * strides[0] - 2 * paddings[0] + D_f \\
-       W_out = (W_in - 1) * strides[2] - 2 * paddings[2] + W_f;
+       H_{out} = (H_{in} - 1) * strides[1] - 2 * paddings[1] + H_f \\
+       W_{out} = (W_{in} - 1) * strides[2] - 2 * paddings[2] + W_f
+  $$
 )DOC");
 }

--- a/paddle/operators/conv_transpose_op.h
+++ b/paddle/operators/conv_transpose_op.h
@@ -63,7 +63,6 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
    std::vector<int> strides = context.Attr<std::vector<int>>("strides");
    std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
-    // TODO(Zhuoyuan): Paddings can be added in future.
    // groups will alway be disabled in conv2dtranspose.
    const int batch_size = static_cast<int>(input->dims()[0]);

--- a/paddle/operators/pool_op.cc
+++ b/paddle/operators/pool_op.cc
@@ -105,7 +105,7 @@ Pool2dOpMaker::Pool2dOpMaker(framework::OpProto *proto,
  // TypedAttrChecker don't support vector type.)
  AddAttr<std::vector<int>>(
      "paddings",
-      "(vector<int>, defalut {0,0}), paddings(height, width) of pooling "
+      "(vector<int>, default {0,0}), paddings(height, width) of pooling "
      "operator."
      "If global_pooling = true, paddings and ksize will be ignored.")
      .SetDefault({0, 0});  // TODO(Chengduo): Add checker. (Currently,
@@ -122,15 +122,15 @@ Parameters(ksize, strides, paddings) are two elements.
 These two elements represent height and width, respectively.
 The input(X) size and output(Out) size may be different.
-Example:
+Example:   
  Input:
       X shape: $(N, C, H_{in}, W_{in})$
  Output:
       Out shape: $(N, C, H_{out}, W_{out})$
-  where 
+  Where
       $$ 
-       H_{out} = (H_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\
+       H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\
-       W_{out} = (W_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1
+       W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1
       $$
 )DOC");
@@ -177,7 +177,7 @@ Pool3dOpMaker::Pool3dOpMaker(framework::OpProto *proto,
                               // TypedAttrChecker don't support vector type.)
  AddAttr<std::vector<int>>(
      "paddings",
-      "(vector<int>, defalut {0,0,0}), paddings(depth, height, "
+      "(vector<int>, default {0,0,0}), paddings(depth, height, "
      "width) of pooling operator. "
      "If global_pooling = true, ksize and paddings will be ignored.")
      .SetDefault({0, 0, 0});  // TODO(Chengduo): Add checker. (Currently,
@@ -199,12 +199,12 @@ Example:
       X shape: $(N, C, D_{in}, H_{in}, W_{in})$
  Output:
       Out shape: $(N, C, D_{out}, H_{out}, W_{out})$
-  where
+  Where
-       $$
+  $$
-       D_{out} = (D_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\
+       D_{out} = \frac{(D_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\
-       H_{out} = (H_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1 \\
+       H_{out} = \frac{(H_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 \\
-       W_{out} = (W_{in} - ksize[2] + 2 * paddings[2]) / strides[2] + 1
+       W_{out} = \frac{(W_{in} - ksize[2] + 2 * paddings[2])}{strides[2]} + 1
-       $$
+  $$
 )DOC");
 }

--- a/paddle/operators/pool_with_index_op.cc
+++ b/paddle/operators/pool_with_index_op.cc
@@ -142,7 +142,7 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
    // TypedAttrChecker don't support vector type.)
    AddAttr<std::vector<int>>(
        "paddings",
-        "(vector<int>, defalut:{0, 0}), paddings(height, width) of pooling "
+        "(vector<int>, default:{0, 0}), paddings(height, width) of pooling "
        "operator. "
        "If global_pooling = true, paddings and will be ignored.")
        .SetDefault({0, 0});  // TODO(Chengduo): Add checker. (Currently,
@@ -166,10 +166,10 @@ Example:
  Output:
       Out shape: $(N, C, H_{out}, W_{out})$
       Mask shape: $(N, C, H_{out}, W_{out})$
-  where
+  Where
       $$
-       H_{out} = (H_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\
+       H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\
-       W_{out} = (W_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1
+       W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1
       $$
 )DOC");
@@ -220,7 +220,7 @@ class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
    // TypedAttrChecker don't support vector type.)
    AddAttr<std::vector<int>>(
        "paddings",
-        "(vector, defalut {0,0,0}), paddings(depth, "
+        "(vector, default {0,0,0}), paddings(depth, "
        "height, width) of pooling operator. "
        "If global_pooling = true, paddings and ksize will be ignored.")
        .SetDefault({0, 0, 0});  // TODO(Chengduo): Add checker. (Currently,
@@ -244,11 +244,11 @@ Example:
  Output:
       Out shape: $(N, C, D_{out}, H_{out}, W_{out})$
       Mask shape: $(N, C, D_{out}, H_{out}, W_{out})$
-  where
+  Where
       $$
-       D_{out} = (D_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\
+       D_{out} = \frac{(D_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\
-       H_{out} = (H_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1 \\
+       H_{out} = \frac{(H_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 \\
-       W_{out} = (W_{in} - ksize[2] + 2 * paddings[2]) / strides[2] + 1
+       W_{out} = \frac{(W_{in} - ksize[2] + 2 * paddings[2])}{strides[2]} + 1
       $$
 )DOC");