/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #pragma once #include #include #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/pooling.h" #ifdef __NVCC__ #include "paddle/fluid/operators/reduce_ops/cub_reduce.h" #endif namespace paddle { namespace operators { template struct DivideFunctor { HOSTDEVICE explicit inline DivideFunctor(int n) : n_inv((T)(1.0 / n)) {} HOSTDEVICE inline T operator()(const T& x) const { return x * n_inv; } private: T n_inv; }; using Tensor = framework::Tensor; class PoolOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override; protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override; framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, const Tensor& tensor, const framework::OpKernelType& expected_kernel_type) const override; }; class PoolOpGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override; protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override; framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, const Tensor& tensor, const framework::OpKernelType& expected_kernel_type) const override; }; class Pool2dOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override; }; class Pool3dOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override; }; template inline void UpdatePadding(std::vector* paddings, const bool global_pooling, const bool adaptive, const std::string padding_algorithm, const framework::DDim data_dims, const std::vector& strides, const std::vector& ksize) { // set padding size == data_dims.size() * 2 auto data_shape = framework::vectorize(data_dims); if (static_cast(paddings->size()) == data_dims.size()) { for (int i = 0; i < data_dims.size(); ++i) { T copy_pad = *(paddings->begin() + 2 * i); paddings->insert(paddings->begin() + 2 * i + 1, copy_pad); } } else { PADDLE_ENFORCE_EQ(data_dims.size() * 2, paddings->size(), platform::errors::InvalidArgument( "Paddings size %d should be the same or twice as the " "pooling size %d.", paddings->size(), data_dims.size() * 2)); } // when padding_algorithm is "VALID" or "SAME" if (padding_algorithm == "SAME") { for (int i = 0; i < data_dims.size(); ++i) { T out_size = (data_dims[i] + strides[i] - 1) / strides[i]; T pad_sum = std::max((out_size - 1) * strides[i] + ksize[i] - data_shape[i], static_cast(0)); T pad_0 = pad_sum / 2; T pad_1 = pad_sum - pad_0; *(paddings->begin() + i * 2) = pad_0; *(paddings->begin() + i * 2 + 1) = pad_1; } } else if (padding_algorithm == "VALID") { for (auto it = paddings->begin(); it != paddings->end(); it++) { *it = 0; } } // if global_pooling == true or adaptive == true, padding will be ignore if (global_pooling || adaptive) { for (auto it = paddings->begin(); it != paddings->end(); it++) { *it = 0; } } } template inline void UpdateKsize(std::vector* ksize, const framework::DDim data_dims) { ksize->resize(static_cast(data_dims.size())); for (size_t i = 0; i < ksize->size(); ++i) { *(ksize->begin() + i) = static_cast(data_dims[i]); } } inline int getReduceNum(const framework::Tensor& input, const framework::Tensor* output, const std::string data_format, std::vector* reduce_dim) { // data_format only can be NCHW bool channel_last = (data_format == "NHWC"); if (channel_last) { return 0; } int reduce_num = 0; const int output_height = output->dims()[2]; const int output_width = output->dims()[3]; if ((output_height == 1) && (output_width == 1)) { reduce_dim->push_back(2); reduce_dim->push_back(3); reduce_num = input.dims()[2] * input.dims()[3]; } return reduce_num; } template class PoolKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { const Tensor* in_x = context.Input("X"); Tensor* out = context.Output("Out"); std::string pooling_type = context.Attr("pooling_type"); std::vector ksize = context.Attr>("ksize"); std::vector strides = context.Attr>("strides"); std::vector paddings = context.Attr>("paddings"); std::string data_format = context.Attr("data_format"); bool exclusive = context.Attr("exclusive"); bool adaptive = context.Attr("adaptive"); bool global_pooling = context.Attr("global_pooling"); std::string padding_algorithm = context.Attr("padding_algorithm"); const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); // update paddings auto in_x_dims = in_x->dims(); framework::DDim data_dims; if (channel_last) { data_dims = framework::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1); } else { data_dims = framework::slice_ddim(in_x_dims, 2, in_x_dims.size()); } UpdatePadding(&paddings, global_pooling, adaptive, padding_algorithm, data_dims, strides, ksize); if (data_dims.size() * 2 == static_cast(paddings.size())) { for (int i = 0; i < data_dims.size(); ++i) { paddings.erase(paddings.begin() + i + 1); } } if (global_pooling) { UpdateKsize(&ksize, data_dims); } auto& dev_ctx = context.template device_context(); switch (ksize.size()) { case 2: { if (pooling_type == "max") { paddle::operators::math::Pool2dFunctor< DeviceContext, paddle::operators::math::MaxPool, T> pool2d_forward; paddle::operators::math::MaxPool pool_process; pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings, data_format, true, false, out, pool_process); } else if (pooling_type == "avg") { std::vector reduce_dim; int reduce_num = getReduceNum(*in_x, out, data_format, &reduce_dim); if (reduce_num > 0 && adaptive) { // for adaptive_avg_pool2d && output_size == 1 #ifdef __HIPCC__ auto stream = dev_ctx.stream(); TensorReduce>( *in_x, out, reduce_dim, static_cast(0), hipcub::Sum(), DivideFunctor(reduce_num), stream); #elif defined(__NVCC__) auto stream = dev_ctx.stream(); TensorReduce>( *in_x, out, reduce_dim, static_cast(0), cub::Sum(), DivideFunctor(reduce_num), stream); #else // for cpu paddle::operators::math::Pool2dFunctor< DeviceContext, paddle::operators::math::AvgPool, T> pool2d_forward; paddle::operators::math::AvgPool pool_process; pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings, data_format, exclusive, adaptive, out, pool_process); #endif } else { // avgpool_2d or adaptive_avg_pool2d && output_size != 1 paddle::operators::math::Pool2dFunctor< DeviceContext, paddle::operators::math::AvgPool, T> pool2d_forward; paddle::operators::math::AvgPool pool_process; pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings, data_format, exclusive, adaptive, out, pool_process); } } } break; case 3: { if (pooling_type == "max") { paddle::operators::math::Pool3dFunctor< DeviceContext, paddle::operators::math::MaxPool, T> pool3d_forward; paddle::operators::math::MaxPool pool_process; pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, data_format, true, false, out, pool_process); } else if (pooling_type == "avg") { paddle::operators::math::Pool3dFunctor< DeviceContext, paddle::operators::math::AvgPool, T> pool3d_forward; paddle::operators::math::AvgPool pool_process; pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, data_format, exclusive, adaptive, out, pool_process); } } break; default: { PADDLE_THROW(platform::errors::InvalidArgument( "Pool op only supports 2D and 3D input.")); } } } }; template class PoolGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { const Tensor* in_x = context.Input("X"); const Tensor* out = context.Input("Out"); const Tensor* out_grad = context.Input(framework::GradVarName("Out")); Tensor* in_x_grad = context.Output(framework::GradVarName("X")); std::string pooling_type = context.Attr("pooling_type"); std::vector ksize = context.Attr>("ksize"); std::vector strides = context.Attr>("strides"); std::vector paddings = context.Attr>("paddings"); bool exclusive = context.Attr("exclusive"); bool adaptive = context.Attr("adaptive"); std::string data_format = context.Attr("data_format"); bool global_pooling = context.Attr("global_pooling"); std::string padding_algorithm = context.Attr("padding_algorithm"); const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); // update paddings auto in_x_dims = in_x->dims(); framework::DDim data_dims; if (channel_last) { data_dims = framework::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1); } else { data_dims = framework::slice_ddim(in_x_dims, 2, in_x_dims.size()); } UpdatePadding(&paddings, global_pooling, adaptive, padding_algorithm, data_dims, strides, ksize); if (data_dims.size() * 2 == static_cast(paddings.size())) { for (int i = 0; i < data_dims.size(); ++i) { paddings.erase(paddings.begin() + i + 1); } } if (global_pooling) { UpdateKsize(&ksize, data_dims); } auto& dev_ctx = context.template device_context(); if (in_x_grad) { in_x_grad->mutable_data(context.GetPlace()); paddle::operators::math::SetConstant set_constant; set_constant(dev_ctx, in_x_grad, static_cast(0.0)); switch (ksize.size()) { case 2: { if (pooling_type == "max") { paddle::operators::math::MaxPool2dGradFunctor pool2d_backward; pool2d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides, paddings, data_format, in_x_grad); } else if (pooling_type == "avg") { paddle::operators::math::Pool2dGradFunctor< DeviceContext, paddle::operators::math::AvgPoolGrad, T> pool2d_backward; paddle::operators::math::AvgPoolGrad pool_process; pool2d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides, paddings, data_format, exclusive, adaptive, in_x_grad, pool_process); } } break; case 3: { if (pooling_type == "max") { paddle::operators::math::MaxPool3dGradFunctor pool3d_backward; pool3d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides, paddings, data_format, in_x_grad); } else if (pooling_type == "avg") { paddle::operators::math::Pool3dGradFunctor< DeviceContext, paddle::operators::math::AvgPoolGrad, T> pool3d_backward; paddle::operators::math::AvgPoolGrad pool_process; pool3d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides, paddings, data_format, exclusive, adaptive, in_x_grad, pool_process); } } break; default: { PADDLE_THROW(platform::errors::InvalidArgument( "Pool op only supports 2D and 3D input.")); } } } } }; } // namespace operators } // namespace paddle