diff --git a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc index 04a4bc91fe43a900a3b2c194787297f215482a9f..b7bed95b1d33583682b997def63bb38243d1794d 100644 --- a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc @@ -28,6 +28,270 @@ using mkldnn::reorder; using mkldnn::stream; using platform::to_void_cast; +template +class PoolingMKLDNNHandler + : public platform::MKLDNNHandlerT { + public: + PoolingMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, + const platform::MKLDNNDeviceContext& dev_ctx, + platform::Place cpu_place, const Tensor* input, + Tensor* output, const std::string& unique_name) + : platform::MKLDNNHandlerT( + dev_ctx, dev_ctx.GetEngine(), cpu_place, + platform::CreateKey(dev_ctx, framework::vectorize(input->dims()), + framework::ToMKLDNNDataType(input->type()), + unique_name)) { + if (!this->isCached()) { + PADDLE_ENFORCE_EQ(input->layout(), DataLayout::kMKLDNN, + platform::errors::InvalidArgument( + "Wrong layout set for Input tensor.")); + PADDLE_ENFORCE_NE(input->format(), MKLDNNMemoryFormat::undef, + platform::errors::InvalidArgument( + "Wrong format set for Input tensor.")); + + const std::string pooling_type = ctx.Attr("pooling_type"); + + std::vector ksize_temp = ctx.Attr>("ksize"); + std::vector ksize(begin(ksize_temp), end(ksize_temp)); + + std::vector strides_temp = ctx.Attr>("strides"); + std::vector strides(begin(strides_temp), end(strides_temp)); + + std::vector paddings_temp = ctx.Attr>("paddings"); + std::vector paddings(begin(paddings_temp), end(paddings_temp)); + + const bool global_pooling = ctx.Attr("global_pooling"); + const std::string padding_algorithm = + ctx.Attr("padding_algorithm"); + + // Only 2D pooling is supported now + PADDLE_ENFORCE_EQ( + ksize.size(), 2, + platform::errors::InvalidArgument( + "The ksize must be 2D, i.e. 2D pooling, but received %dD.", + ksize.size())); + PADDLE_ENFORCE_EQ( + pooling_type == "max" || pooling_type == "avg", true, + platform::errors::InvalidArgument( + "The pooling_type must be 'max' or 'avg', but received %s.", + pooling_type)); + PADDLE_ENFORCE_EQ( + input->dims().size(), 4, + platform::errors::InvalidArgument( + "Input dim must be with 4, i.e. NCHW, but received %d.", + input->dims().size())); + + const auto input_dims = input->dims(); + framework::DDim data_dims = + framework::slice_ddim(input_dims, 2, input_dims.size()); + + if (global_pooling) { + operators::UpdateKsize(&ksize, data_dims); + } + + operators::UpdatePadding(&paddings, global_pooling, 0, padding_algorithm, + data_dims, strides, ksize); + + const auto src_tz = paddle::framework::vectorize(input->dims()); + const auto dst_tz = paddle::framework::vectorize(output->dims()); + + const auto is_test = ctx.Attr("is_test"); + + const auto dt = framework::ToMKLDNNDataType(input->type()); + const auto fmt = input->format(); + + const auto exclude_padding = ctx.Attr("exclusive"); + + const auto src_md = mkldnn::memory::desc(src_tz, dt, fmt); + /* create memory descriptor for pooling without specified format + * ('any') which lets a primitive (pooling in this case) choose + * the memory format preferred for best performance + */ + + const auto dst_md = + platform::MKLDNNMemDesc(dst_tz, dt, MKLDNNMemoryFormat::any); + + auto mkldnn_paddings = platform::ToMkldnnPadding(paddings); + + const bool ceil_mode = ctx.Attr("ceil_mode"); + + if (ceil_mode) { + CorrectOutputSize(src_tz, dst_tz, ksize, paddings, strides, + mkldnn_paddings[1]); + } + + ComputeAdaptivePoolParameters(ctx, src_tz, &ksize, &strides); + + this->AcquireForwardPrimitiveDescriptor( + is_test ? mkldnn::prop_kind::forward_inference + : mkldnn::prop_kind::forward_training, + pooling_type == "max" + ? mkldnn::algorithm::pooling_max + : (exclude_padding + ? mkldnn::algorithm::pooling_avg_exclude_padding + : mkldnn::algorithm::pooling_avg_include_padding), + src_md, dst_md, strides, ksize, mkldnn_paddings[0], + mkldnn_paddings[1]); + } + } + + PoolingMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, + const platform::MKLDNNDeviceContext& dev_ctx, + platform::Place cpu_place, const Tensor* in_x, + const Tensor* out_grad, Tensor* in_x_grad, + const std::string& unique_name) + : platform::MKLDNNHandlerT( + dev_ctx, dev_ctx.GetEngine(), cpu_place, + platform::CreateKey(dev_ctx, framework::vectorize(in_x->dims()), + framework::ToMKLDNNDataType(in_x->type()), + unique_name)) { + if (!this->isBwdCached()) { + PADDLE_ENFORCE_EQ(in_x->layout(), DataLayout::kMKLDNN, + platform::errors::InvalidArgument( + "Wrong layout set for Input tensor")); + PADDLE_ENFORCE_NE(in_x->format(), MKLDNNMemoryFormat::undef, + platform::errors::InvalidArgument( + "Wrong format set for Input tensor")); + + PADDLE_ENFORCE_EQ(out_grad->layout(), DataLayout::kMKLDNN, + platform::errors::InvalidArgument( + "Wrong layout set for Input output_grad tensor")); + PADDLE_ENFORCE_NE(out_grad->format(), MKLDNNMemoryFormat::undef, + platform::errors::InvalidArgument( + "Wrong format set for Input output_grad tensor")); + + PADDLE_ENFORCE_EQ( + ctx.Attr("is_test"), false, + platform::errors::InvalidArgument( + "is_test attribute should be set to False in training phase.")); + + std::string pooling_type = ctx.Attr("pooling_type"); + + std::vector ksize_temp = ctx.Attr>("ksize"); + std::vector ksize(begin(ksize_temp), end(ksize_temp)); + + std::vector strides_temp = ctx.Attr>("strides"); + std::vector strides(begin(strides_temp), end(strides_temp)); + + std::vector paddings_temp = ctx.Attr>("paddings"); + std::vector paddings(begin(paddings_temp), end(paddings_temp)); + + bool global_pooling = ctx.Attr("global_pooling"); + std::string padding_algorithm = + ctx.Attr("padding_algorithm"); + + auto in_x_dims = in_x->dims(); + framework::DDim data_dims = + framework::slice_ddim(in_x_dims, 2, in_x_dims.size()); + + if (global_pooling) { + operators::UpdateKsize(&ksize, data_dims); + } + + operators::UpdatePadding(&paddings, global_pooling, 0, padding_algorithm, + data_dims, strides, ksize); + + auto src_tz = paddle::framework::vectorize(in_x->dims()); + auto diff_src_tz = + paddle::framework::vectorize(in_x_grad->dims()); + auto diff_dst_tz = + paddle::framework::vectorize(out_grad->dims()); + + auto diff_dst_md = mkldnn::memory::desc( + diff_dst_tz, platform::MKLDNNGetDataType(), out_grad->format()); + auto diff_src_md = + mkldnn::memory::desc(diff_src_tz, platform::MKLDNNGetDataType(), + MKLDNNMemoryFormat::any); + + auto mkldnn_paddings = platform::ToMkldnnPadding(paddings); + const bool ceil_mode = ctx.Attr("ceil_mode"); + + if (ceil_mode) { + CorrectOutputSize(src_tz, diff_dst_tz, ksize, paddings, strides, + mkldnn_paddings[1]); + } + ComputeAdaptivePoolParameters(ctx, diff_src_tz, &ksize, &strides); + + const auto exclude_padding = ctx.Attr("exclusive"); + this->AcquireBackwardPrimitiveDescriptor( + pooling_type == "max" + ? mkldnn::algorithm::pooling_max + : (exclude_padding + ? mkldnn::algorithm::pooling_avg_exclude_padding + : mkldnn::algorithm::pooling_avg_include_padding), + diff_src_md, diff_dst_md, strides, ksize, mkldnn_paddings[0], + mkldnn_paddings[1]); + } + } + + std::shared_ptr AcquireWorkspaceMemory(void) { + mkldnn::memory::desc workspace_md = this->fwd_pd_->workspace_desc(); + // Pooling PD has to be passed to Grad op that + // may be executed by diffrent thread, hence + // for that one we use key that does not contain TID + auto local_key = this->key_common_ + "@workspace"; + auto mem_p = std::static_pointer_cast( + this->dev_ctx_.GetBlob(local_key)); + if (mem_p == nullptr) { + static std::mutex acquire_barrier; + std::lock_guard block_threads_until_finish_this_job( + acquire_barrier); + mem_p = std::static_pointer_cast( + this->dev_ctx_.GetBlob(local_key)); + if (mem_p == nullptr) { + mem_p = std::make_shared(workspace_md, this->engine_); + this->dev_ctx_.SetBlob(local_key, mem_p); + } + } + return mem_p; + } + + static void ComputeAdaptivePoolParameters( + const paddle::framework::ExecutionContext& ctx, + const std::vector& src_tz, std::vector* ksize, + std::vector* strides) { + if (ctx.Attr("adaptive")) { + // https://github.com/oneapi-src/oneDNN/tree/bkocot/adaptive-pooling/rfcs/20200818-adaptive-pooling + auto IH = static_cast(src_tz[src_tz.size() - 2]); + auto IW = static_cast(src_tz[src_tz.size() - 1]); + auto OH = static_cast(ksize->at(0)); + auto OW = static_cast(ksize->at(1)); + + strides->at(0) = + static_cast(floor((IH * 2.0) / OH) - floor(IH / OH)); + strides->at(1) = + static_cast(floor((IW * 2.0) / OW) - floor(IW / OW)); + ksize->at(0) = + static_cast(ceil((IH * 2.0) / OH) - floor(IH / OH)); + ksize->at(1) = + static_cast(ceil((IW * 2.0) / OW) - floor(IW / OW)); + } + } + + private: + static inline int ComputeCeiledOutput(int input_size, int kernel_size, + int padding, int stride) { + return (input_size - kernel_size + 2 * padding) / stride + 1; + } + + static inline void CorrectOutputSize( + const std::vector& src_tz, const std::vector& dst_tz, + const std::vector& kernel_size, + const std::vector& paddings, const std::vector& strides, + std::vector& right_bot_padding) { // NOLINT + for (size_t i = 0; i < right_bot_padding.size(); i++) { + int desired_size = ComputeCeiledOutput(src_tz[i + 2], kernel_size[i], + paddings[i], strides[i]); + if (desired_size != dst_tz[i + 2]) { + right_bot_padding[i] += strides[i] - 1; + } + } + } +}; + template class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { public: @@ -37,14 +301,12 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { "Operator DNNL Pool must use CPUPlace")); auto& dev_ctx = ctx.template device_context(); - const auto& mkldnn_engine = dev_ctx.GetEngine(); const Tensor* input = ctx.Input("X"); Tensor* output = ctx.Output("Out"); - platform::PoolingMKLDNNHandler handler(ctx, dev_ctx, mkldnn_engine, - ctx.GetPlace(), input, output, - ctx.OutputName("Out")); + PoolingMKLDNNHandler handler(ctx, dev_ctx, ctx.GetPlace(), input, output, + ctx.OutputName("Out")); auto src_memory = handler.AcquireSrcMemory(input); auto dst_memory = handler.AcquireDstMemory(output); @@ -82,72 +344,11 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel { const Tensor* out_grad = ctx.Input(framework::GradVarName("Out")); Tensor* in_x_grad = ctx.Output(framework::GradVarName("X")); - PADDLE_ENFORCE_EQ( - in_x->layout(), DataLayout::kMKLDNN, - platform::errors::InvalidArgument("Wrong layout set for Input tensor")); - PADDLE_ENFORCE_NE( - in_x->format(), MKLDNNMemoryFormat::undef, - platform::errors::InvalidArgument("Wrong format set for Input tensor")); - - PADDLE_ENFORCE_EQ(out_grad->layout(), DataLayout::kMKLDNN, - platform::errors::InvalidArgument( - "Wrong layout set for Input output_grad tensor")); - PADDLE_ENFORCE_NE(out_grad->format(), MKLDNNMemoryFormat::undef, - platform::errors::InvalidArgument( - "Wrong format set for Input output_grad tensor")); - - PADDLE_ENFORCE_EQ( - ctx.Attr("is_test"), false, - platform::errors::InvalidArgument( - "is_test attribute should be set to False in training phase.")); - - std::string pooling_type = ctx.Attr("pooling_type"); - - std::vector ksize_temp = ctx.Attr>("ksize"); - std::vector ksize(begin(ksize_temp), end(ksize_temp)); - - std::vector strides_temp = ctx.Attr>("strides"); - std::vector strides(begin(strides_temp), end(strides_temp)); - - std::vector paddings_temp = ctx.Attr>("paddings"); - std::vector paddings(begin(paddings_temp), end(paddings_temp)); - - bool global_pooling = ctx.Attr("global_pooling"); - std::string padding_algorithm = ctx.Attr("padding_algorithm"); - - auto in_x_dims = in_x->dims(); - framework::DDim data_dims = - framework::slice_ddim(in_x_dims, 2, in_x_dims.size()); - - if (global_pooling) { - UpdateKsize(&ksize, data_dims); - } - - UpdatePadding(&paddings, global_pooling, 0, padding_algorithm, data_dims, - strides, ksize); - - platform::PoolingMKLDNNHandler::ComputeAdaptivePoolParameters( - ctx, paddle::framework::vectorize(in_x->dims()), ksize, strides); - auto& dev_ctx = ctx.template device_context(); - std::vector pipeline; - - auto diff_src_tz = paddle::framework::vectorize(in_x_grad->dims()); - auto diff_dst_tz = paddle::framework::vectorize(out_grad->dims()); - - // Get an unique name from "argument" name of "Out" variable - // This name will be used as key when referring info from device context - const std::string key = platform::CreateKey( - dev_ctx, diff_src_tz, pooling_type, ksize, strides, paddings, - memory::data_type::f32, in_x->format(), ctx.InputName("Out")); - - platform::PoolingMKLDNNHandler handler( - diff_dst_tz, diff_src_tz, ksize, strides, paddings, pooling_type, - ctx.Attr("ceil_mode"), in_x->format(), out_grad->format(), - paddle::framework::ToMKLDNNDataType(out_grad->type()), dev_ctx, - ctx.GetPlace(), ctx.InputName("Out"), ctx.Attr("exclusive")); + PoolingMKLDNNHandler handler(ctx, dev_ctx, ctx.GetPlace(), in_x, + out_grad, in_x_grad, ctx.InputName("Out")); auto diff_dst_memory = handler.AcquireDiffDstMemory(out_grad); auto diff_src_memory = handler.AcquireDiffSrcMemory(in_x_grad); @@ -155,7 +356,7 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel { auto pool_bwd_p = handler.AcquireBackwardPrimitive(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); - if (pooling_type == "max") { + if (ctx.Attr("pooling_type") == "max") { // Max - pooling needs Workspace auto workspace_memory = handler.AcquireWorkspaceMemory(); pool_bwd_p->execute(astream, {{MKLDNN_ARG_DIFF_SRC, *diff_src_memory}, diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index 55651dcecf6c290bb19def834611895d30237687..2d4ef64cc896a09a59bbbd9e529c19df6cd9a312 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -144,6 +144,35 @@ void PoolOp::InferShape(framework::InferShapeContext* ctx) const { ctx->ShareLoD("X", "Out"); } +bool CanMKLDNNSupportPool(const framework::ExecutionContext& ctx) { + if (ctx.Attr("adaptive") == false) return true; + // (jczaja): oneDNN is supporting only unchangable in size pool window + auto src_tz = paddle::framework::vectorize(ctx.Input("X")->dims()); + std::vector ksize = ctx.Attr>("ksize"); + // Fast but not exhustive check + if ((src_tz[src_tz.size() - 1] % ksize[1] == 0) && + (src_tz[src_tz.size() - 2] % ksize[0] == 0)) + return true; + + // Exhustive check + auto IH = static_cast(src_tz[src_tz.size() - 2]); + auto IW = static_cast(src_tz[src_tz.size() - 1]); + auto OH = static_cast(ksize[0]); + auto OW = static_cast(ksize[1]); + + auto SH = static_cast(floor((IH * 2.0) / OH) - floor(IH / OH)); + auto SW = static_cast(floor((IW * 2.0) / OW) - floor(IW / OW)); + auto KH = static_cast(ceil((IH * 2.0) / OH) - floor(IH / OH)); + auto KW = static_cast(ceil((IW * 2.0) / OW) - floor(IW / OW)); + + auto PH = (SH * (static_cast(OH) - 1) + KH - static_cast(IH)); + auto PW = (SW * (static_cast(OW) - 1) + KW - static_cast(IW)); + // If there is additional padding needed then + // this is situation that oneDNN cannot comply with + // paddlepaddle reference implementation + return (PH == 0) && (PW == 0); +} + framework::OpKernelType PoolOp::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { framework::LibraryType library_{framework::LibraryType::kPlain}; @@ -158,7 +187,7 @@ framework::OpKernelType PoolOp::GetExpectedKernelType( #endif #ifdef PADDLE_WITH_MKLDNN if (library_ == framework::LibraryType::kPlain && - this->CanMKLDNNBeUsed(ctx, data_type)) { + this->CanMKLDNNBeUsed(ctx, data_type) && CanMKLDNNSupportPool(ctx)) { library_ = framework::LibraryType::kMKLDNN; layout_ = framework::DataLayout::kMKLDNN; } @@ -213,7 +242,8 @@ framework::OpKernelType PoolOpGrad::GetExpectedKernelType( #endif #ifdef PADDLE_WITH_MKLDNN if (library_ == framework::LibraryType::kPlain && - this->CanMKLDNNBeUsed(ctx, input_data_type)) { + this->CanMKLDNNBeUsed(ctx, input_data_type) && + CanMKLDNNSupportPool(ctx)) { library_ = framework::LibraryType::kMKLDNN; layout_ = framework::DataLayout::kMKLDNN; } diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 37aae14c83a4dbabc5a156b3381e2e9e5a4c9d6c..2cff67670f6953fd47144aac7a1e862db58f0473 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -120,6 +120,15 @@ class MKLDNNHandlerT { return (dev_ctx_.GetBlob(key_p) != nullptr); } + bool isBwdCached() { + const std::string key_pd = key_common_ + "@bwd_pd"; + bwd_pd_ = std::static_pointer_cast( + dev_ctx_.GetBlob(key_pd)); + + const std::string key_p = key_ + "@bwd_p"; + return (dev_ctx_.GetBlob(key_p) != nullptr); + } + // If your primitive descriptor requires attributes, pass them as a // first argument and paramters to descriptor constructor in the following // arguments. Otherwise, all arguments will be forwarded to descriptor @@ -735,210 +744,6 @@ class LRNMKLDNNHandler } }; -template -class PoolingMKLDNNHandler : public MKLDNNHandlerT { - public: - PoolingMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, - const MKLDNNDeviceContext& dev_ctx, - const mkldnn::engine mkldnn_engine, - platform::Place cpu_place, const Tensor* input, - Tensor* output, const std::string& unique_name) - : platform::MKLDNNHandlerT( - dev_ctx, dev_ctx.GetEngine(), cpu_place, - platform::CreateKey(dev_ctx, framework::vectorize(input->dims()), - framework::ToMKLDNNDataType(input->type()), - unique_name)) { - if (!this->isCached()) { - PADDLE_ENFORCE_EQ(input->layout(), DataLayout::kMKLDNN, - platform::errors::InvalidArgument( - "Wrong layout set for Input tensor.")); - PADDLE_ENFORCE_NE(input->format(), MKLDNNMemoryFormat::undef, - platform::errors::InvalidArgument( - "Wrong format set for Input tensor.")); - - const std::string pooling_type = ctx.Attr("pooling_type"); - - std::vector ksize_temp = ctx.Attr>("ksize"); - std::vector ksize(begin(ksize_temp), end(ksize_temp)); - - std::vector strides_temp = ctx.Attr>("strides"); - std::vector strides(begin(strides_temp), end(strides_temp)); - - std::vector paddings_temp = ctx.Attr>("paddings"); - std::vector paddings(begin(paddings_temp), end(paddings_temp)); - - const bool global_pooling = ctx.Attr("global_pooling"); - const std::string padding_algorithm = - ctx.Attr("padding_algorithm"); - - // Only 2D pooling is supported now - PADDLE_ENFORCE_EQ( - ksize.size(), 2, - platform::errors::InvalidArgument( - "The ksize must be 2D, i.e. 2D pooling, but received %dD.", - ksize.size())); - PADDLE_ENFORCE_EQ( - pooling_type == "max" || pooling_type == "avg", true, - platform::errors::InvalidArgument( - "The pooling_type must be 'max' or 'avg', but received %s.", - pooling_type)); - PADDLE_ENFORCE_EQ( - input->dims().size(), 4, - platform::errors::InvalidArgument( - "Input dim must be with 4, i.e. NCHW, but received %d.", - input->dims().size())); - - const auto input_dims = input->dims(); - framework::DDim data_dims = - framework::slice_ddim(input_dims, 2, input_dims.size()); - - if (global_pooling) { - operators::UpdateKsize(&ksize, data_dims); - } - - operators::UpdatePadding(&paddings, global_pooling, 0, padding_algorithm, - data_dims, strides, ksize); - - const auto src_tz = paddle::framework::vectorize(input->dims()); - const auto dst_tz = paddle::framework::vectorize(output->dims()); - - const auto is_test = ctx.Attr("is_test"); - - const auto dt = framework::ToMKLDNNDataType(input->type()); - const auto fmt = input->format(); - - const auto exclude_padding = ctx.Attr("exclusive"); - - const auto src_md = mkldnn::memory::desc(src_tz, dt, fmt); - /* create memory descriptor for pooling without specified format - * ('any') which lets a primitive (pooling in this case) choose - * the memory format preferred for best performance - */ - - const auto dst_md = - platform::MKLDNNMemDesc(dst_tz, dt, MKLDNNMemoryFormat::any); - - auto mkldnn_paddings = ToMkldnnPadding(paddings); - - const bool ceil_mode = ctx.Attr("ceil_mode"); - - if (ceil_mode) { - CorrectOutputSize(src_tz, dst_tz, ksize, paddings, strides, - mkldnn_paddings[1]); - } - - ComputeAdaptivePoolParameters(ctx, src_tz, ksize, strides); - - this->AcquireForwardPrimitiveDescriptor( - is_test ? mkldnn::prop_kind::forward_inference - : mkldnn::prop_kind::forward_training, - pooling_type == "max" - ? mkldnn::algorithm::pooling_max - : (exclude_padding - ? mkldnn::algorithm::pooling_avg_exclude_padding - : mkldnn::algorithm::pooling_avg_include_padding), - src_md, dst_md, strides, ksize, mkldnn_paddings[0], - mkldnn_paddings[1]); - } - } - - PoolingMKLDNNHandler( - const std::vector& diff_dst_dims, - const std::vector& diff_src_dims, - const std::vector& ksize, const std::vector& strides, - const std::vector& paddings, const std::string& pooling_type, - bool ceil_mode, const MKLDNNMemoryFormat fmt, - const MKLDNNMemoryFormat diff_dst_fmt, mkldnn::memory::data_type dt, - const platform::MKLDNNDeviceContext& dev_ctx, platform::Place cpu_place, - const std::string& unique_name, bool exclude_padding) - : platform::MKLDNNHandlerT( - dev_ctx, dev_ctx.GetEngine(), cpu_place, - platform::CreateKey(dev_ctx, diff_src_dims, dt, unique_name)) { - auto diff_dst_md = mkldnn::memory::desc( - diff_dst_dims, platform::MKLDNNGetDataType(), diff_dst_fmt); - auto diff_src_md = - mkldnn::memory::desc(diff_src_dims, platform::MKLDNNGetDataType(), - MKLDNNMemoryFormat::any); - - auto mkldnn_paddings = ToMkldnnPadding(paddings); - - this->AcquireBackwardPrimitiveDescriptor( - pooling_type == "max" - ? mkldnn::algorithm::pooling_max - : (exclude_padding - ? mkldnn::algorithm::pooling_avg_exclude_padding - : mkldnn::algorithm::pooling_avg_include_padding), - diff_src_md, diff_dst_md, strides, ksize, mkldnn_paddings[0], - mkldnn_paddings[1]); - } - - std::shared_ptr AcquireWorkspaceMemory(void) { - mkldnn::memory::desc workspace_md = this->fwd_pd_->workspace_desc(); - // Pooling PD has to be passed to Grad op that - // may be executed by diffrent thread, hence - // for that one we use key that does not contain TID - auto local_key = this->key_common_ + "@workspace"; - auto mem_p = std::static_pointer_cast( - this->dev_ctx_.GetBlob(local_key)); - if (mem_p == nullptr) { - static std::mutex acquire_barrier; - std::lock_guard block_threads_until_finish_this_job( - acquire_barrier); - mem_p = std::static_pointer_cast( - this->dev_ctx_.GetBlob(local_key)); - if (mem_p == nullptr) { - mem_p = std::make_shared(workspace_md, this->engine_); - this->dev_ctx_.SetBlob(local_key, mem_p); - } - } - return mem_p; - } - - static void ComputeAdaptivePoolParameters( - const paddle::framework::ExecutionContext& ctx, - const std::vector& src_tz, std::vector& ksize, - std::vector& strides) { - if (ctx.Attr("adaptive")) { - // (jczaja): oneDNN is supporting only unchangable in size pool window - PADDLE_ENFORCE_EQ( - src_tz[src_tz.size() - 1] % ksize[1], 0, - platform::errors::Unimplemented( - "Input dim must be divisible by corressponding ksize dim.")); - PADDLE_ENFORCE_EQ( - src_tz[src_tz.size() - 2] % ksize[0], 0, - platform::errors::Unimplemented( - "Input dim must be divisible by corressponding ksize dim.")); - ksize[0] = src_tz[src_tz.size() - 2] / ksize[0]; - ksize[1] = src_tz[src_tz.size() - 1] / ksize[1]; - strides[0] = ksize[0]; - strides[1] = ksize[1]; - } - } - - private: - static inline int ComputeCeiledOutput(int input_size, int kernel_size, - int padding, int stride) { - return (input_size - kernel_size + 2 * padding) / stride + 1; - } - - static inline void CorrectOutputSize( - const std::vector& src_tz, const std::vector& dst_tz, - const std::vector& kernel_size, - const std::vector& paddings, const std::vector& strides, - std::vector& right_bot_padding) { // NOLINT - for (size_t i = 0; i < right_bot_padding.size(); i++) { - int desired_size = ComputeCeiledOutput(src_tz[i + 2], kernel_size[i], - paddings[i], strides[i]); - if (desired_size != dst_tz[i + 2]) { - right_bot_padding[i] += strides[i] - 1; - } - } - } -}; - template class TransposeMKLDNNHandler : public MKLDNNHandler { public: diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py index 467bac67051dd4acd76c99238b562481e897b61a..7ecd0ee09985ec01108c5a1baf807cb4d8189602 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py @@ -92,6 +92,15 @@ class TestAvgPoolAdaptive2(TestAvgPoolAdaptive): self.shape = [2, 3, 6, 6] +class TestAvgPoolAdaptive3(TestAvgPoolAdaptive): + def init_test_case(self): + self.ksize = [3, 3] + self.strides = [1, 1] + + def init_shape(self): + self.shape = [1, 3, 16, 16] + + class TestAsymPad(TestPool2D_Op): def init_test_case(self): self.ksize = [3, 3]