diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 00cbe28d45dc4393ba1c141912aee7d1b7469a89..1032aadcbda4f1b05841e08e1abe7c737c3aeb9c 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -76,10 +76,10 @@ bool AnalysisPredictor::Init( } OptimizeInferenceProgram(); - ctx_ = executor_->Prepare(*inference_program_, 0); if (config_._use_mkldnn) { executor_->EnableMKLDNN(*inference_program_); } + ctx_ = executor_->Prepare(*inference_program_, 0); VLOG(5) << "to create variables"; PADDLE_ENFORCE(scope_.get()); diff --git a/paddle/fluid/operators/pool_mkldnn_op.cc b/paddle/fluid/operators/pool_mkldnn_op.cc index 5341187d1ce9400ac34750ab691608e76158ae0d..56cef91e29cc7da27384c27a7ec63e90cfadfc3b 100644 --- a/paddle/fluid/operators/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/pool_mkldnn_op.cc @@ -46,6 +46,25 @@ static std::string gethash(const memory::dims& input_dims, dims2str(paddings) + pooling_type + suffix; } +static inline int ComputeCeiledOutput(int input_size, int kernel_size, + int padding, int stride) { + return (input_size - kernel_size + 2 * padding) / stride + 1; +} + +static inline void CorrectOutputSize( + const std::vector& src_tz, const std::vector& dst_tz, + const std::vector& kernel_size, const std::vector& paddings, + const std::vector& strides, + std::vector& right_bot_padding) { // NOLINT + for (size_t i = 0; i < right_bot_padding.size(); i++) { + int desired_size = ComputeCeiledOutput(src_tz[i + 2], kernel_size[i], + paddings[i], strides[i]); + if (desired_size != dst_tz[i + 2]) { + right_bot_padding[i] += strides[i]; + } + } +} + template class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { public: @@ -103,6 +122,13 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { auto pool_p = std::static_pointer_cast(dev_ctx.GetBlob(key_pool_p)); if (pool_p == nullptr) { + const std::vector& padding_left_top(paddings); + std::vector padding_right_bottom(paddings); + bool ceil_mode = ctx.Attr("ceil_mode"); + if (ceil_mode) { + CorrectOutputSize(src_tz, dst_tz, ksize, paddings, strides, + padding_right_bottom); + } auto src_md = platform::MKLDNNMemDesc( src_tz, platform::MKLDNNGetDataType(), input_format); @@ -114,8 +140,9 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { mkldnn::memory::format::any); std::shared_ptr pool_pd = - CreatePrimitiveDesc(src_md, dst_md, strides, paddings, ksize, - pooling_type, mkldnn_engine); + CreatePrimitiveDesc(src_md, dst_md, strides, padding_left_top, + padding_right_bottom, ksize, pooling_type, + mkldnn_engine, ceil_mode); // save pool_pd into global device context to be referred in backward path dev_ctx.SetBlob(key_pool_pd, pool_pd); @@ -171,14 +198,16 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { private: std::unique_ptr CreatePrimitiveDesc( const mkldnn::memory::desc& src, const mkldnn::memory::desc& dst, - const std::vector& stride, const std::vector& padding, - const std::vector& kernel, const std::string& pooling_type, - const mkldnn::engine& engine) const { + const std::vector& stride, const std::vector& padding_left_top, + const std::vector& padding_right_bot, const std::vector& kernel, + const std::string& pooling_type, const mkldnn::engine& engine, + bool ceil_mode) const { auto pool_desc = mkldnn::pooling_forward::desc( mkldnn::prop_kind::forward, pooling_type == "max" ? mkldnn::algorithm::pooling_max : mkldnn::algorithm::pooling_avg, - src, dst, stride, kernel, padding, padding, mkldnn::padding_kind::zero); + src, dst, stride, kernel, padding_left_top, padding_right_bot, + mkldnn::padding_kind::zero); auto p_pool_pd = new mkldnn::pooling_forward::primitive_desc(pool_desc, engine);