diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 7dae8fe2f99f9ec1233d0a0f6180cc9f287fc150..ad941bde2be3bbbc6d910fff262ea4cb3878f8be 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -112,7 +112,9 @@ set(DEPS_OPS cond_op cross_entropy_op softmax_with_cross_entropy_op - sum_op) + sum_op + pool_op + pool_with_index_op) op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc @@ -121,6 +123,8 @@ op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op) op_library(cross_entropy_op DEPS cross_entropy) op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax) op_library(sum_op DEPS net_op) +op_library(pool_op DEPS pooling) +op_library(pool_with_index_op DEPS pooling) list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) foreach(src ${GENERAL_OPS}) diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt index 2fd559e90a22d01cbaf89c0fbd0f011bfdf66596..1a2f623ce7917b1e60656743e699271eab9c7011 100644 --- a/paddle/operators/math/CMakeLists.txt +++ b/paddle/operators/math/CMakeLists.txt @@ -1,14 +1,16 @@ if(WITH_GPU) - nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc im2col.cu pooling.cc pooling.cu DEPS cblas device_context operator) + nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc im2col.cu DEPS cblas device_context operator) nv_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) nv_library(softmax SRCS softmax.cc softmax.cu DEPS operator) nv_library(cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS operator) + nv_library(pooling SRCS pooling.cc pooling.cu DEPS device_context) nv_library(vol2col SRCS vol2col.cc vol2col.cu DEPS device_context) else() - cc_library(math_function SRCS math_function.cc im2col.cc pooling.cc DEPS cblas device_context operator) + cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context operator) cc_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) cc_library(softmax SRCS softmax.cc DEPS operator) cc_library(cross_entropy SRCS cross_entropy.cc DEPS operator) + cc_library(pooling SRCS pooling.cc DEPS device_context) cc_library(vol2col SRCS vol2col.cc DEPS device_context) endif() diff --git a/paddle/operators/pool_op.cc b/paddle/operators/pool_op.cc index ba3b5ed2075ceca284b49ecddb90ba5950b820c3..c6d9aae13322ebcc9ebbe394d9b9831bd67fe632 100644 --- a/paddle/operators/pool_op.cc +++ b/paddle/operators/pool_op.cc @@ -22,157 +22,181 @@ int OutputSizePool(int input_size, int filter_size, int padding, int stride) { return output_size; } -class PoolOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), - "X(Input) of Pooling should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Out(Output) of Pooling should not be null."); - - auto in_x_dims = ctx->GetInputDim("X"); - - std::string pooling_type = ctx->Attrs().Get("poolingType"); - std::vector ksize = ctx->Attrs().Get>("ksize"); - std::vector strides = ctx->Attrs().Get>("strides"); - std::vector paddings = ctx->Attrs().Get>("paddings"); - - PADDLE_ENFORCE(pooling_type == "max" || pooling_type == "avg", - "pooling_type should be 'max' or 'avg'"); - PADDLE_ENFORCE(in_x_dims.size() == 4 || in_x_dims.size() == 5, - "Pooling intput should be 4-D or 5-D"); - - if (ctx->Attrs().Get("globalPooling")) { - ksize.resize(static_cast(in_x_dims.size()) - 2); - for (size_t i = 0; i < ksize.size(); ++i) - ksize[i] = static_cast(in_x_dims[i + 2]); - } - - PADDLE_ENFORCE(in_x_dims.size() - ksize.size() == 2U, - "Input size and Pooling size should be consistent."); - PADDLE_ENFORCE(ksize.size() == 2 || ksize.size() == 3, - "Pooling size should be 2 elements. or 3 elements."); - PADDLE_ENFORCE_EQ(ksize.size(), strides.size(), - "strides size and pooling size should be the same."); - PADDLE_ENFORCE_EQ(ksize.size(), paddings.size(), - "paddings size and pooling size should be the same."); - - std::vector output_shape({in_x_dims[0], in_x_dims[1]}); - for (size_t i = 0; i < ksize.size(); ++i) { - output_shape.push_back( - OutputSizePool(in_x_dims[i + 2], ksize[i], paddings[i], strides[i])); - } - ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); +void PoolOp::InferShape(framework::InferShapeContext *ctx) const { + PADDLE_ENFORCE(ctx->HasInput("X"), "X(Input) of Pooling should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Out(Output) of Pooling should not be null."); + + auto in_x_dims = ctx->GetInputDim("X"); + + std::string pooling_type = ctx->Attrs().Get("poolingType"); + std::vector ksize = ctx->Attrs().Get>("ksize"); + std::vector strides = ctx->Attrs().Get>("strides"); + std::vector paddings = ctx->Attrs().Get>("paddings"); + + PADDLE_ENFORCE(in_x_dims.size() == 4 || in_x_dims.size() == 5, + "Pooling intput should be 4-D or 5-D tensor."); + + if (ctx->Attrs().Get("globalPooling")) { + ksize.resize(static_cast(in_x_dims.size()) - 2); + for (size_t i = 0; i < ksize.size(); ++i) + ksize[i] = static_cast(in_x_dims[i + 2]); } -}; - -class PoolOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), - "X(Input) of Pooling should not be null."); - PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), - "Input@Grad of Pooling should not be null."); - ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); + + PADDLE_ENFORCE(in_x_dims.size() - ksize.size() == 2U, + "Input size and pooling size should be consistent."); + PADDLE_ENFORCE_EQ(ksize.size(), strides.size(), + "Strides size and pooling size should be the same."); + PADDLE_ENFORCE_EQ(ksize.size(), paddings.size(), + "Paddings size and pooling size should be the same."); + + std::vector output_shape({in_x_dims[0], in_x_dims[1]}); + for (size_t i = 0; i < ksize.size(); ++i) { + output_shape.push_back( + OutputSizePool(in_x_dims[i + 2], ksize[i], paddings[i], strides[i])); } -}; - -class Pool2dOpMaker : public framework::OpProtoAndCheckerMaker { - public: - Pool2dOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput( - "X", - "The input tensor of pooling operator. " - "The format of input tensor is NCHW. Where N is batch size, C is the " - "number of channels, H and W is the height and width of feature."); - AddOutput("Out", - "The output tensor of pooling operator." - "The format of output tensor is also NCHW."); - - AddAttr("poolingType", - "PoolingType of pooling operator." - "Str constant equal to 'max' or 'avg'.") - .InEnum({"max", "avg"}); - AddAttr>( - "ksize", - "Pooling size(depth, height, width) of pooling operator." - "If globalPooling = true, ksize is ignored and need not be " - "specified."); // TODO(Add checker) - AddAttr( - "globalPooling", - "Whether to use the globalPooling." - "Bool constant equal to false or true." - "Default false." - "If globalPooling = true, ksize is ignored and need not be specified.") - .SetDefault(false); - AddAttr>("strides", - "Strides(height, width) of pooling operator." - "Default {1,1}") - .SetDefault({1, 1}); // TODO(Add checker) - AddAttr>("paddings", - "Paddings(height, width) of pooling operator." - "Default {0,0}.") - .SetDefault({0, 0}); // TODO(Add checker) - AddComment(R"DOC( + ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); +} + +void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); + PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), + "Input(X@GRAD) should not be null."); + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); +} + +Pool2dOpMaker::Pool2dOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput( + "X", + "(Tensor) The input tensor of pooling operator. " + "The format of input tensor is NCHW. Where N is batch size, C is the " + "number of channels, H and W is the height and width of feature."); + AddOutput("Out", + "(Tensor) The output tensor of pooling operator." + "The format of output tensor is also NCHW." + "Where N is batch size, C is " + "the number of channels, H and W is the height and " + "width of feature."); + + AddAttr("poolingType", + "PoolingType of pooling operator." + "Str constant equal to 'max' or 'avg'.") + .InEnum({"max", "avg"}); + + AddAttr>( + "ksize", + "The pooling window size(height, width) of pooling operator." + "If globalPooling = true, ksize is ignored and need not be " + "specified."); // TODO(Chengduo): Add checker. (Currently, + // TypedAttrChecker don't support vector type.) + AddAttr( + "globalPooling", + "Whether to use the globalPooling." + "Bool constant equal to false or true." + "Default false." + "If globalPooling = true, ksize is ignored and need not be specified.") + .SetDefault(false); + AddAttr>("strides", + "The strides(height, width) of pooling window." + "Default {1,1}.") + .SetDefault({1, 1}); // TODO(Chengduo): Add checker. (Currently, + // TypedAttrChecker don't support vector type.) + AddAttr>("paddings", + "The zero padding(height, width) size on both sides" + "Default {0,0}.") + .SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently, + // TypedAttrChecker don't support vector type.) + + AddComment(R"DOC( The pooling2d operation calculates the output based on the input, poolingType and ksize, strides, paddings parameters. +Input(X) and output(Out) are in NCHW format. Where N is batch size, C is the +number of channels, H and W is the height and width of feature. +Parameters(ksize, strides, paddings) are two elements. +These two elements represent height and width, respectively. +The input(X) size and output(Out) size may be different. + +Example: + Input: + X shape: (N, C, H_in, W_in) + Output: + Out shape: (N, C, H_out, W_out) + Mask shape: (N, C, H_out, W_out) + where + H_out = (H_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1; + W_out = (W_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1; )DOC"); - } -}; - -class Pool3dOpMaker : public framework::OpProtoAndCheckerMaker { - public: - Pool3dOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", - "The input tensor of pooling operator. " - "The format of input tensor is NCDHW. Where N is batch size, C is " - "the " - "number of channels, D, H and W is the depth, height and width of " - "feature."); - AddOutput("Out", - "The output tensor of pooling operator." - "The format of output tensor is also NCDHW."); - - AddAttr("poolingType", - "PoolingType of pooling operator." - "str constant equal to 'max' or 'avg'.") - .InEnum({"max", "avg"}); - AddAttr>( - "ksize", - "Pooling size(depth, height, width) of pooling operator." - "If globalPooling = true, ksize is ignored and need not be " - "specified."); // TODO(Add checker) - AddAttr( - "globalPooling", - "Whether to use the globalPooling." - "Bool constant equal to false or true." - "Default false." - "If globalPooling = true, ksize is ignored and need not be specified.") - .SetDefault(false); - AddAttr>( - "strides", - "Strides(depth, height, width) of pooling operator." - "Default {1,1,1}.") - .SetDefault({1, 1, 1}); // TODO(Add checker) - AddAttr>( - "paddings", - "Paddings(depth, height, width) of pooling operator." - "Default {0,0,0}.") - .SetDefault({0, 0, 0}); // TODO(Add checker) - AddComment(R"DOC( +} + +Pool3dOpMaker::Pool3dOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput( + "X", + "(Tensor) The input tensor of pooling operator. " + "The format of input tensor is NCDHW. Where N is batch size, C is " + "the number of channels, D, H and W is the depth, height and width of " + "feature."); + AddOutput("Out", + "(Tensor) The output tensor of pooling operator." + "The format of output tensor is also NCDHW." + "Where N is batch size, C is " + "the number of channels, D, H and W is the depth, height and " + "width of feature."); + + AddAttr("poolingType", + "PoolingType of pooling operator." + "Str constant equal to 'max' or 'avg'.") + .InEnum({"max", "avg"}); + + AddAttr>( + "ksize", + "The pooling window size(depth, height, width) of pooling operator." + "If globalPooling = true, ksize is ignored and need not be " + "specified."); // TODO(Chengduo): Add checker. (Currently, + // TypedAttrChecker don't support vector type.) + AddAttr( + "globalPooling", + "Whether to use the globalPooling." + "Bool constant equal to false or true." + "Default false." + "If globalPooling = true, ksize is ignored and need not be specified.") + .SetDefault(false); + AddAttr>("strides", + "Strides(depth, height, width) of pooling operator." + "Default {1,1,1}.") + .SetDefault({1, 1, 1}); // TODO(Chengduo): Add checker. (Currently, + // TypedAttrChecker don't support vector type.) + AddAttr>( + "paddings", + "Paddings(depth, height, width) of pooling operator." + "Default {0,0,0}.") + .SetDefault({0, 0, 0}); // TODO(Chengduo): Add checker. (Currently, + // TypedAttrChecker don't support vector type.) + + AddComment(R"DOC( The pooling3d operation calculates the output based on the input, poolingType and ksize, strides, paddings parameters. +Input(X) and output(Out) are in NCDHW format. Where N is batch +size, C is the number of channels, D, H and W is the depth, height and +width of feature. Parameters(ksize, strides, paddings) are three elements. +These three elements represent depth, height and width, respectively. +The input(X) size and output(Out) size may be different. + +Example: + Input: + X shape: (N, C, D_in, H_in, W_in) + Output: + Out shape: (N, C, D_out, H_out, W_out) + Mask shape: (N, C, D_out, H_out, W_out) + where + D_out = (D_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1; + H_out = (H_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1; + W_out = (W_in - ksize[2] + 2 * paddings[2]) / strides[2] + 1; )DOC"); - } -}; +} } // namespace operators } // namespace paddle diff --git a/paddle/operators/pool_op.h b/paddle/operators/pool_op.h index c2bc358def42959f2cc8f61cb00436fae1b7514b..e5016d573dde0a9c8a90cddf14f68706b69fade5 100644 --- a/paddle/operators/pool_op.h +++ b/paddle/operators/pool_op.h @@ -24,6 +24,34 @@ namespace operators { using Tensor = framework::Tensor; +class PoolOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override; +}; + +class PoolOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override; +}; + +class Pool2dOpMaker : public framework::OpProtoAndCheckerMaker { + public: + Pool2dOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker); +}; + +class Pool3dOpMaker : public framework::OpProtoAndCheckerMaker { + public: + Pool3dOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker); +}; + template class PoolKernel : public framework::OpKernel { public: diff --git a/paddle/operators/pool_with_index_op.cc b/paddle/operators/pool_with_index_op.cc index 7b6afcfd1f7e30624cb6859228892677cba58856..005ee886934b193064cc739638398b3535db9274 100644 --- a/paddle/operators/pool_with_index_op.cc +++ b/paddle/operators/pool_with_index_op.cc @@ -43,7 +43,7 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel { std::vector paddings = ctx->Attrs().Get>("paddings"); PADDLE_ENFORCE(in_x_dims.size() == 4 || in_x_dims.size() == 5, - "Pooling intput should be 4-D or 5-D"); + "Pooling intput should be 4-D or 5-D tensor."); if (ctx->Attrs().Get("globalPooling")) { ksize.resize(static_cast(in_x_dims.size()) - 2); @@ -52,7 +52,7 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel { } PADDLE_ENFORCE(in_x_dims.size() - ksize.size() == 2U, - "Intput size and pooling size should be consistent."); + "Input size and pooling size should be consistent."); PADDLE_ENFORCE_EQ(ksize.size(), strides.size(), "Strides size and pooling size should be the same."); PADDLE_ENFORCE_EQ(ksize.size(), paddings.size(), @@ -74,6 +74,7 @@ class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Mask"), "Input(Mask) must not be null."); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), "Input(X@GRAD) should not be null."); @@ -88,17 +89,17 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput( "X", - "The input tensor of pooling operator. " + "(Tensor) The input tensor of pooling operator. " "The format of input tensor is NCHW. Where N is batch size, C is the " "number of channels, H and W is the height and width of image."); AddOutput("Out", - "The output tensor of pooling operator." + "(Tensor) The output tensor of pooling operator." "The format of output tensor is also NCHW." "Where N is batch size, C is " "the number of channels, H and W is the height and " "width of image."); AddOutput("Mask", - "The Mask tensor of pooling operator." + "(Tensor) The Mask tensor of pooling operator." "The format of output tensor is also NCHW." "Where N is batch size, C is the number of channels, H and W " "is the height and width of image." @@ -106,7 +107,7 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr>( "ksize", - "The pooling size(height, width) of pooling operator." + "The pooling window size(height, width) of pooling operator." "If globalPooling = true, ksize is ignored and need not be " "specified."); // TODO(Chengduo): Add checker. (Currently, // TypedAttrChecker don't support vector type.) @@ -118,13 +119,14 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { "If globalPooling = true, ksize is ignored and need not be specified.") .SetDefault(false); AddAttr>("strides", - "Strides(height, width) of pooling operator." + "The strides(height, width) of pooling window." "Default {1,1}.") .SetDefault({1, 1}); // TODO(Chengduo): Add checker. (Currently, // TypedAttrChecker don't support vector type.) - AddAttr>("paddings", - "Paddings(height, width) of pooling operator." - "Default {0,0}.") + AddAttr>( + "paddings", + "The zero padding(height, width) size on both sides" + "Default {0,0}.") .SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently, // TypedAttrChecker don't support vector type.) @@ -135,6 +137,17 @@ output(Out, Mask) are in NCHW format. Where N is batch size, C is the number of channels, H and W is the height and width of feature. Parameters(ksize, strides, paddings) are two elements. These two elements represent height and width, respectively. +The input(X) size and output(Out, Mask) size may be different. + +Example: + Input: + X shape: (N, C, H_in, W_in) + Output: + Out shape: (N, C, H_out, W_out) + Mask shape: (N, C, H_out, W_out) + where + H_out = (H_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1; + W_out = (W_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1; )DOC"); } }; @@ -146,18 +159,18 @@ class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput( "X", - "The input tensor of pooling operator. " + "(Tensor) The input tensor of pooling operator. " "The format of input tensor is NCDHW. Where N is batch size, C is " "the number of channels, D, H and W is the depth, height and width of " "image."); AddOutput("Out", - "The output tensor of pooling operator." + "(Tensor) The output tensor of pooling operator." "The format of output tensor is also NCDHW." "Where N is batch size, C is " "the number of channels, D, H and W is the depth, height and " "width of image."); AddOutput("Mask", - "The Mask tensor of pooling operator." + "(Tensor) The Mask tensor of pooling operator." "The format of output tensor is also NCDHW." "Where N is batch size, C is the number of channels, D, H and W " "is the depth, height and width of image." @@ -165,7 +178,7 @@ class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr>( "ksize", - "The pooling size(depth, height, width) of pooling operator." + "The pooling window size(depth, height, width) of pooling operator." "If globalPooling = true, ksize is ignored and need not be " "specified."); // TODO(Chengduo): Add checker. (Currently, // TypedAttrChecker don't support vector type.) @@ -196,6 +209,18 @@ Input(X) and output(Out, Mask) are in NCDHW format. Where N is batch size, C is the number of channels, D, H and W is the depth, height and width of feature. Parameters(ksize, strides, paddings) are three elements. These three elements represent depth, height and width, respectively. +The input(X) size and output(Out, Mask) size may be different. + +Example: + Input: + X shape: (N, C, D_in, H_in, W_in) + Output: + Out shape: (N, C, D_out, H_out, W_out) + Mask shape: (N, C, D_out, H_out, W_out) + where + D_out = (D_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1; + H_out = (H_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1; + W_out = (W_in - ksize[2] + 2 * paddings[2]) / strides[2] + 1; )DOC"); } };