From 9abc0e04c1974ad16bf27d783dcb6b53da315a73 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Mon, 27 Nov 2017 19:04:07 +0800 Subject: [PATCH] fix conv and conv_trans op doc --- paddle/operators/conv_op.cc | 61 ++++++++++------- paddle/operators/conv_transpose_op.cc | 90 +++++++++++++++----------- paddle/operators/conv_transpose_op.h | 1 - paddle/operators/pool_op.cc | 24 +++---- paddle/operators/pool_with_index_op.cc | 18 +++--- 5 files changed, 108 insertions(+), 86 deletions(-) diff --git a/paddle/operators/conv_op.cc b/paddle/operators/conv_op.cc index 7a36a9b21a..462e6d9cbc 100644 --- a/paddle/operators/conv_op.cc +++ b/paddle/operators/conv_op.cc @@ -97,7 +97,7 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto, .SetDefault({0, 0}); AddAttr( "groups", - "(int default:1), the group size of convolution operator. " + "(int default:1), the groups number of the convolution operator. " "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: " "when group=2, the first half of the filters is only connected to the " "first half of the input channels, while the second half of the filters " @@ -112,23 +112,29 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto, Convolution Operator. The convolution operation calculates the output based on the input, filter -and strides, paddings, groups, dilations parameters. The size of each dimension of the +and strides, paddings, dilations, groups parameters. The size of each dimension of the parameters is checked in the infer-shape. -Input(Input, Filter) and output(Output) are in NCHW format. Where N is batch +Input(Input) and Output(Output) are in NCHW format. Where N is batch size, C is the number of channels, H is the height of the feature, and W is -the width of the feature. Parameters(ksize, strides, paddings, dilations) are two elements. -These two elements represent height and width, respectively. +the width of the feature. +Filters(Input) is MCHW format. Where M is the number of output image channels, C is +the number of input image channels, H is the height of the filter, and W +is the width of the filter. +Parameters(strides, paddings, dilations) are two elements. These two elements represent +height and width, respectively. The input(X) size and output(Out) size may be different. Example: Input: - Input shape: (N, C_in, H_in, W_in) - Filter shape: (C_out, C_in, H_f, W_f) + Input shape: $(N, C_{in}, H_{in}, W_{in})$ + Filter shape: $(C_{out}, C_{in}, H_f, W_f)$ Output: - Output shape: (N, C_out, H_out, W_out) - where - H_out = (H_in + 2 * paddings[0] - (dilations[0]*(filter_size[0] - 1) + 1)) / strides[0] + 1; - W_out = (W_in + 2 * paddings[1] - (dilations[1]*(filter_size[1] - 1) + 1)) / strides[1] + 1; + Output shape: $(N, C_{out}, H_{out}, W_{out})$ + Where +$$ + H_{out}= \frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]}+ 1 \\ + W_{out}= \frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]}+ 1 +$$ )DOC"); } @@ -165,7 +171,7 @@ Conv3DOpMaker::Conv3DOpMaker(framework::OpProto* proto, .SetDefault({0, 0, 0}); AddAttr( "groups", - "(int default:1), the group size of convolution operator. " + "(int default:1), the groups number of the convolution operator. " "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: " "when group=2, the first half of the filters is only connected to the " "first half of the input channels, while the second half of the filters " @@ -174,32 +180,37 @@ Conv3DOpMaker::Conv3DOpMaker(framework::OpProto* proto, AddAttr>("dilations", "(vector default:{1, 1, 1}), the " "dilations(d_dilation, h_dilation, w_dilation) of " - "convolution operator. Currently, conv3d doesn't " - "support dilation.") + "convolution operator.") .SetDefault({1, 1, 1}); AddComment(R"DOC( Convolution3D Operator. The convolution operation calculates the output based on the input, filter -and strides, paddings, groups parameters. The size of each dimension of the +and strides, paddings, dilations, groups parameters. The size of each dimension of the parameters is checked in the infer-shape. -Input(Input, Filter) and output(Output) are in NCDHW format. Where N is batch +Input(Input) and output(Output) are in NCDHW format, where N is batch size, C is the number of channels,D is the depth of the feature, H is the height of -the feature, and W is the width of the feature. Parameters(ksize, strides, paddings) -are three elements. These three elements represent depth, height and width, respectively. +the feature, and W is the width of the feature. +Filters(Input) is MCDHW format, where M is the number of output image channels, +C is the number of input image channels, D is the depth of the filter, +H is the height of the filter, and W is the width of the filter. +Parameters(strides, paddings, dilations) are three elements. These three elements +represent depth, height and width, respectively. The input(X) size and output(Out) size may be different. Example: Input: - Input shape: (N, C_in, D_in, H_in, W_in) - Filter shape: (C_out, C_in, D_f, H_f, W_f) + Input shape: $(N, C_{in}, D_{in}, H_{in}, W_{in})$ + Filter shape: $(C_{out}, C_{in}, D_f, H_f, W_f)$ Output: - Output shape: (N, C_out, D_out, H_out, W_out) - where - D_out = (D_in - filter_size[0] + 2 * paddings[0]) / strides[0] + 1; - H_out = (H_in - filter_size[1] + 2 * paddings[1]) / strides[1] + 1; - W_out = (W_in - filter_size[2] + 2 * paddings[2]) / strides[2] + 1; + Output shape: $(N, C_{out}, D_{out}, H_{out}, W_{out})$ + Where + $$ + D_{out}= \frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{ strides[0]}+ 1 \\ + H_{out}= \frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{ strides[1]}+ 1 \\ + W_{out}= \frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{ strides[2]}+ 1 + $$ )DOC"); } diff --git a/paddle/operators/conv_transpose_op.cc b/paddle/operators/conv_transpose_op.cc index 3e55ef036a..678b192dea 100644 --- a/paddle/operators/conv_transpose_op.cc +++ b/paddle/operators/conv_transpose_op.cc @@ -39,7 +39,7 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { "ConvTransposeOp input dimension and strides dimension should " "be consistent."); PADDLE_ENFORCE_EQ(paddings.size(), strides.size(), - "ConvTransposeOp paddings dimension and Conv strides " + "ConvTransposeOp paddings dimension and strides " "dimension should be the same."); PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[0], "In ConvTransposeOp, The input channel should be the same " @@ -62,24 +62,25 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker( "The format of input tensor is NCHW. Where N is batch size, C is the " "number of input channels, H is the height of the feature, and " "W is the width of the feature."); - AddInput("Filter", - "(Tensor) The filter tensor of convolution transpose operator. " - "The format of the filter tensor is CMHW, where C is the number of " - "output image channels, M is the number of input image channels, " - "H is the height of the filter, and W is the width of the filter. " - "We enforce groups number == 1 and padding == 0 in " - "the convolution transpose scenario."); + AddInput( + "Filter", + "(Tensor) The filter tensor of convolution transpose operator. " + "The format of the filter tensor is MCHW, where M is the number of " + "input feature channels, C is the number of " + "output feature channels," + "H is the height of the filter, and W is the width of the filter. " + "We enforce groups number == 1 in the convolution transpose scenario."); AddOutput("Output", "(Tensor) The output tensor of convolution transpose operator. " "The format of output tensor is also NCHW."); AddAttr>( "strides", - "(vector defalut:{1, 1}), the strides(h_stride, w_stride) of " + "(vector default:{1, 1}), the strides(h_stride, w_stride) of " "convolution transpose operator.") .SetDefault({1, 1}); AddAttr>( "paddings", - "(vector defalut:{0, 0}), the paddings(h_pad, w_pad) of convolution " + "(vector default:{0, 0}), the paddings(h_pad, w_pad) of convolution " "transpose operator.") .SetDefault({0, 0}); AddComment(R"DOC( @@ -88,21 +89,26 @@ Convolution2D Transpose Operator. The convolution transpose operation calculates the output based on the input, filter and strides, paddings, groups parameters. The size of each dimension of the parameters is checked in the infer-shape. - -Input(Input, Filter) and output(Output) are in NCHW format. Where N is batch -size, C is the number of channels, H is the height of the feature, and -W is the width of the feature. Parameters(ksize, strides, paddings) are two elements. -These two elements represent height and width, respectively. +Input(Input) and output(Output) are in NCHW format. Where N is batchsize, C is the +number of channels, H is the height of the feature, and W is the width of the feature. +Filter(Input) is in MCHW format. Where M is the number of input feature channels, +C is the number of output feature channels, H is the height of the filter, +and W is the width of the filter. +Parameters(strides, paddings) are two elements. These two elements represent height +and width, respectively. The input(X) size and output(Out) size may be different. + Example: Input: - Input shape: (N, C_in, H_in, W_in) - Filter shape: (C_in, C_out, H_f, W_f) + Input shape: $(N, C_{in}, H_{in}, W_{in})$ + Filter shape: $(C_{in}, C_{out}, H_f, W_f)$ Output: - Output shape: (N, C_out, H_out, W_out) - where - H_out = (H_in - 1) * strides[0] - 2 * paddings[0] + filter_size[0]; - W_out = (W_in - 1) * strides[1] - 2 * paddings[1] + filter_size[1]; + Output shape: $(N, C_{out}, H_{out}, W_{out})$ + Where + $$ + H_{out} = (H_{in} - 1) * strides[0] - 2 * paddings[0] + H_f \\ + W_{out} = (W_{in} - 1) * strides[1] - 2 * paddings[1] + W_f + $$ )DOC"); } @@ -117,8 +123,9 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker( "W is the width of the feature."); AddInput("Filter", "(Tensor) The filter tensor of convolution transpose operator." - "The format of the filter tensor is CMDHW, where C is the number of " - "output image channels, M is the number of input image channels, D " + "The format of the filter tensor is MCDHW, where M is the number of " + "input feature channels, C is the number of " + "output feature channels, D " "is the depth of the filter, H is the height of the filter, and " "W is the width of the filter." "We enforce groups number == 1 and padding == 0 in " @@ -130,12 +137,12 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker( "the number of channels, D is the depth of the feature, H is the " "height of the feature, and W is the width of the feature."); AddAttr>("strides", - "(vector defalut:{1, 1, 1}), the " + "(vector default:{1, 1, 1}), the " "strides{d_stride, h_stride, w_stride} of " "convolution transpose operator.") .SetDefault({1, 1, 1}); AddAttr>("paddings", - "(vector defalut:{0, 0, 0}), paddings(d_pad, " + "(vector default:{0, 0, 0}), paddings(d_pad, " "h_pad, w_pad) of convolution transpose operator.") .SetDefault({0, 0, 0}); AddComment(R"DOC( @@ -144,23 +151,28 @@ Convolution3D Transpose Operator. The convolution transpose operation calculates the output based on the input, filter and strides, paddings, groups parameters. The size of each dimension of the parameters is checked in the infer-shape. - -Input(Input, Filter) and output(Output) are in NCDHW format. Where N is batch -size, C is the number of channels, D is the depth of the feature, -H is the height of the feature, and W is the width of the feature. -Parameters(ksize, strides, paddings) are three elements. -These three elements represent depth, height and width, respectively. +Input(Input) and output(Output) are in NCDHW format. Where N is batch size, C is the +number of channels, D is the depth of the feature, H is the height of the feature, +and W is the width of the feature. +Filter(Input) is in MCDHW format. Where M is the number of input feature channels, +C is the number of output feature channels, D is the depth of the filter,H is the +height of the filter, and W is the width of the filter. +Parameters(strides, paddings) are three elements. These three elements represent +depth, height and width, respectively. The input(X) size and output(Out) size may be different. -Example: + +Example: Input: - Input shape: (N, C_in, D_in, H_in, W_in) - Filter shape: (C_in, C_out, D_f, H_f, W_f) + Input shape: $(N, C_{in}, D_{in}, H_{in}, W_{in})$ + Filter shape: $(C_{in}, C_{out}, D_f, H_f, W_f)$ Output: - Output shape: (N, C_out, D_out, H_out, W_out) - where - D_out = (D_in - 1) * strides[0] - 2 * paddings[0] + filter_size[0]; - H_out = (H_in - 1) * strides[1] - 2 * paddings[1] + filter_size[1]; - W_out = (W_in - 1) * strides[2] - 2 * paddings[2] + filter_size[2]; + Output shape: $(N, C_{out}, D_{out}, H_{out}, W_{out})$ + Where + $$ + D_{out} = (D_{in} - 1) * strides[0] - 2 * paddings[0] + D_f \\ + H_{out} = (H_{in} - 1) * strides[1] - 2 * paddings[1] + H_f \\ + W_{out} = (W_{in} - 1) * strides[2] - 2 * paddings[2] + W_f + $$ )DOC"); } diff --git a/paddle/operators/conv_transpose_op.h b/paddle/operators/conv_transpose_op.h index 0fc0735788..1cacb770e6 100644 --- a/paddle/operators/conv_transpose_op.h +++ b/paddle/operators/conv_transpose_op.h @@ -63,7 +63,6 @@ class GemmConvTransposeKernel : public framework::OpKernel { std::vector strides = context.Attr>("strides"); std::vector paddings = context.Attr>("paddings"); - // TODO(Zhuoyuan): Paddings can be added in future. // groups will alway be disabled in conv2dtranspose. const int batch_size = static_cast(input->dims()[0]); diff --git a/paddle/operators/pool_op.cc b/paddle/operators/pool_op.cc index d8c58618cf..e26ffd86e5 100644 --- a/paddle/operators/pool_op.cc +++ b/paddle/operators/pool_op.cc @@ -105,7 +105,7 @@ Pool2dOpMaker::Pool2dOpMaker(framework::OpProto *proto, // TypedAttrChecker don't support vector type.) AddAttr>( "paddings", - "(vector, defalut {0,0}), paddings(height, width) of pooling " + "(vector, default {0,0}), paddings(height, width) of pooling " "operator." "If global_pooling = true, paddings and ksize will be ignored.") .SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently, @@ -122,15 +122,15 @@ Parameters(ksize, strides, paddings) are two elements. These two elements represent height and width, respectively. The input(X) size and output(Out) size may be different. -Example: +Example: Input: X shape: $(N, C, H_{in}, W_{in})$ Output: Out shape: $(N, C, H_{out}, W_{out})$ - where + Where $$ - H_{out} = (H_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\ - W_{out} = (W_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1 + H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\ + W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 $$ )DOC"); @@ -177,7 +177,7 @@ Pool3dOpMaker::Pool3dOpMaker(framework::OpProto *proto, // TypedAttrChecker don't support vector type.) AddAttr>( "paddings", - "(vector, defalut {0,0,0}), paddings(depth, height, " + "(vector, default {0,0,0}), paddings(depth, height, " "width) of pooling operator. " "If global_pooling = true, ksize and paddings will be ignored.") .SetDefault({0, 0, 0}); // TODO(Chengduo): Add checker. (Currently, @@ -199,12 +199,12 @@ Example: X shape: $(N, C, D_{in}, H_{in}, W_{in})$ Output: Out shape: $(N, C, D_{out}, H_{out}, W_{out})$ - where - $$ - D_{out} = (D_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\ - H_{out} = (H_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1 \\ - W_{out} = (W_{in} - ksize[2] + 2 * paddings[2]) / strides[2] + 1 - $$ + Where + $$ + D_{out} = \frac{(D_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\ + H_{out} = \frac{(H_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 \\ + W_{out} = \frac{(W_{in} - ksize[2] + 2 * paddings[2])}{strides[2]} + 1 + $$ )DOC"); } diff --git a/paddle/operators/pool_with_index_op.cc b/paddle/operators/pool_with_index_op.cc index 4958fa6454..b9c42a6912 100644 --- a/paddle/operators/pool_with_index_op.cc +++ b/paddle/operators/pool_with_index_op.cc @@ -142,7 +142,7 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { // TypedAttrChecker don't support vector type.) AddAttr>( "paddings", - "(vector, defalut:{0, 0}), paddings(height, width) of pooling " + "(vector, default:{0, 0}), paddings(height, width) of pooling " "operator. " "If global_pooling = true, paddings and will be ignored.") .SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently, @@ -166,10 +166,10 @@ Example: Output: Out shape: $(N, C, H_{out}, W_{out})$ Mask shape: $(N, C, H_{out}, W_{out})$ - where + Where $$ - H_{out} = (H_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\ - W_{out} = (W_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1 + H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\ + W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 $$ )DOC"); @@ -220,7 +220,7 @@ class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { // TypedAttrChecker don't support vector type.) AddAttr>( "paddings", - "(vector, defalut {0,0,0}), paddings(depth, " + "(vector, default {0,0,0}), paddings(depth, " "height, width) of pooling operator. " "If global_pooling = true, paddings and ksize will be ignored.") .SetDefault({0, 0, 0}); // TODO(Chengduo): Add checker. (Currently, @@ -244,11 +244,11 @@ Example: Output: Out shape: $(N, C, D_{out}, H_{out}, W_{out})$ Mask shape: $(N, C, D_{out}, H_{out}, W_{out})$ - where + Where $$ - D_{out} = (D_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\ - H_{out} = (H_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1 \\ - W_{out} = (W_{in} - ksize[2] + 2 * paddings[2]) / strides[2] + 1 + D_{out} = \frac{(D_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\ + H_{out} = \frac{(H_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 \\ + W_{out} = \frac{(W_{in} - ksize[2] + 2 * paddings[2])}{strides[2]} + 1 $$ )DOC"); -- GitLab