From cd43c4440ef5aec02c3eb242b6f0cade225bb0e2 Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Fri, 29 Nov 2019 05:06:07 +0100 Subject: [PATCH] [MKL-DNN] LRN and Pool2d (FWD) NHWC support (#21375) --- .../fluid/framework/data_layout_transform.cc | 16 +++++++-- .../fluid/framework/data_layout_transform.h | 7 ++-- paddle/fluid/framework/data_transform.cc | 11 ++++++- paddle/fluid/framework/executor.cc | 1 + paddle/fluid/framework/operator.h | 5 +++ .../fluid/operators/controlflow/fetch_op.cc | 8 +++-- paddle/fluid/operators/lrn_op.cc | 28 ++++++++++++---- .../operators/mkldnn/dequantize_mkldnn_op.cc | 1 + .../fluid/operators/mkldnn/lrn_mkldnn_op.cc | 2 ++ paddle/fluid/operators/pool_op.cc | 33 +++++++++++++++---- paddle/fluid/operators/pool_op.h | 4 +++ paddle/fluid/platform/device_context.cc | 12 +++++++ paddle/fluid/platform/device_context.h | 3 ++ paddle/fluid/platform/mkldnn_reuse.h | 2 +- .../unittests/mkldnn/test_lrn_mkldnn_op.py | 7 +--- .../unittests/mkldnn/test_pool2d_mkldnn_op.py | 10 +----- 16 files changed, 111 insertions(+), 39 deletions(-) diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc index 7fe8e9eb73..a236ff07fe 100644 --- a/paddle/fluid/framework/data_layout_transform.cc +++ b/paddle/fluid/framework/data_layout_transform.cc @@ -127,13 +127,17 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, "TransDataLayoutFromMKLDNN only supports transform from MKLDNN to " "non-MKLDNN"); - innerTransDataLayoutFromMKLDNN(in_layout, out_layout, in, out, place); +#ifdef PADDLE_WITH_MKLDNN + innerTransDataLayoutFromMKLDNN(in_layout, + paddle::platform::get_cur_paddle_data_layout(), + in, out, place); +#endif } +#ifdef PADDLE_WITH_MKLDNN void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout, const Tensor& in, Tensor* out, platform::Place place) { -#ifdef PADDLE_WITH_MKLDNN PADDLE_ENFORCE_NE(in.format(), MKLDNNMemoryFormat::format_undef, platform::errors::InvalidArgument( "Input tensor format is invalid. Input tensor should " @@ -185,11 +189,17 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout, } else { out->ShareDataWith(in); } + // For exepected NHWC data format we need to reshape the Output tensor + // As MKL-DNN description was in NCHW and paddle is expecting NHWC + if (out_layout == DataLayout::kNHWC) { + std::rotate(out_tz.begin() + 1, out_tz.begin() + 2, out_tz.end()); + out->Resize(framework::make_ddim(out_tz)); + } out->set_layout(out_layout); // reset format since the out tensor will be feed to non-MKLDNN OPkernel out->set_format(MKLDNNMemoryFormat::format_undef); -#endif } +#endif } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/data_layout_transform.h b/paddle/fluid/framework/data_layout_transform.h index d67ea1e530..6a6a531ea4 100644 --- a/paddle/fluid/framework/data_layout_transform.h +++ b/paddle/fluid/framework/data_layout_transform.h @@ -66,16 +66,15 @@ inline MKLDNNDataType ToMKLDNNDataType(proto::VarType::Type type) { return MKLDNNDataType::data_undef; } +void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout, + const Tensor& in, Tensor* out, + platform::Place place); #endif void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, const OpKernelType& expected_kernel_type, const Tensor& in, Tensor* out); -void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout, - const Tensor& in, Tensor* out, - platform::Place place); - std::vector GetAxis(const DataLayout& from, const DataLayout& to); void TransDataLayout(const OpKernelType& kernel_type_for_var, diff --git a/paddle/fluid/framework/data_transform.cc b/paddle/fluid/framework/data_transform.cc index 8287222450..7fb3f1f7d7 100644 --- a/paddle/fluid/framework/data_transform.cc +++ b/paddle/fluid/framework/data_transform.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type_transform.h" #ifdef PADDLE_WITH_MKLDNN +#include #include "paddle/fluid/platform/mkldnn_helper.h" #endif @@ -54,8 +55,16 @@ void TransformData(const OpKernelType &expected_kernel_type, auto out_format = platform::MKLDNNFormatForSize(in.dims().size(), ToMKLDNNFormat(lin)); - out.ShareDataWith(input_tensor); + // For NHWC data we need reshape of tensors as MKL-DNN + // is expecting NHWC dims description order + if (lin == DataLayout::kNHWC) { + auto nchw_dims = paddle::framework::vectorize(out.dims()); + std::rotate(nchw_dims.begin() + 1, nchw_dims.end() - 1, + nchw_dims.end()); + out.Resize(framework::make_ddim(nchw_dims)); + paddle::platform::set_cur_paddle_data_layout(lin); + } out.set_layout(DataLayout::kMKLDNN); out.set_format(out_format); #endif diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index 7a20e2b529..26ee5131c1 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -103,6 +103,7 @@ Executor::~Executor() { platform::MKLDNNDeviceContext* dev_ctx = (platform::MKLDNNDeviceContext*)pool.Get(place_); dev_ctx->ResetBlobMap(); + platform::set_cur_paddle_data_layout(paddle::framework::DataLayout::kNCHW); } #endif } diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 7380906588..08a9147acc 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -470,6 +470,11 @@ class OperatorWithKernel : public OperatorBase { return g_all_op_kernels; } + bool IsMKLDNNType() const { + return ((this->kernel_type_) && (this->kernel_type_->data_layout_ == + framework::DataLayout::kMKLDNN)); + } + bool SupportGPU() const override { auto& op_kernels = OperatorWithKernel::AllOpKernels().at(type_); return std::any_of(op_kernels.begin(), op_kernels.end(), diff --git a/paddle/fluid/operators/controlflow/fetch_op.cc b/paddle/fluid/operators/controlflow/fetch_op.cc index 93b4b2d474..073fdaf360 100644 --- a/paddle/fluid/operators/controlflow/fetch_op.cc +++ b/paddle/fluid/operators/controlflow/fetch_op.cc @@ -56,16 +56,20 @@ class FetchOp : public framework::OperatorBase { // FIXME(yuyang18): Should we assume the fetch operator always generate // CPU outputs? if (src_item.IsInitialized() && src_item.numel() > 0) { +#ifdef PADDLE_WITH_MKLDNN // Conversion from MKL-DNN to Paddle if (src_item.layout() == framework::DataLayout::kMKLDNN) { framework::Tensor out; framework::innerTransDataLayoutFromMKLDNN( - src_item.layout(), framework::DataLayout::kNCHW, src_item, &out, - platform::CPUPlace()); + src_item.layout(), paddle::platform::get_cur_paddle_data_layout(), + src_item, &out, platform::CPUPlace()); TensorCopySync(out, platform::CPUPlace(), &dst_item); } else { TensorCopySync(src_item, platform::CPUPlace(), &dst_item); } +#else + TensorCopySync(src_item, platform::CPUPlace(), &dst_item); +#endif } else { // Not copy, if the src tensor is empty. dst_item.clear(); diff --git a/paddle/fluid/operators/lrn_op.cc b/paddle/fluid/operators/lrn_op.cc index cfded0370b..6aaf3ab18b 100644 --- a/paddle/fluid/operators/lrn_op.cc +++ b/paddle/fluid/operators/lrn_op.cc @@ -193,12 +193,6 @@ class LRNOp : public framework::OperatorWithKernel { #ifdef PADDLE_WITH_MKLDNN if (library_ == framework::LibraryType::kPlain && platform::CanMKLDNNBeUsed(ctx)) { - // TODO(jczaja): Add support for NHWC - const std::string data_format = ctx.Attr("data_format"); - PADDLE_ENFORCE_NE( - data_format, "NHWC", - platform::errors::Unimplemented( - "LRN MKLDNN does not support NHWC data format yet")); library_ = framework::LibraryType::kMKLDNN; layout_ = framework::DataLayout::kMKLDNN; } @@ -207,6 +201,28 @@ class LRNOp : public framework::OperatorWithKernel { OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace(), layout_, library_); } + + framework::OpKernelType GetKernelTypeForVar( + const std::string& var_name, const Tensor& tensor, + const framework::OpKernelType& expected_kernel_type) const override { +#ifdef PADDLE_WITH_MKLDNN + if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) && + (tensor.layout() != framework::DataLayout::kMKLDNN)) { + auto attrs = Attrs(); + auto ar = paddle::framework::AttrReader(attrs); + const std::string data_format = ar.Get("data_format"); + auto dl = framework::StringToDataLayout(data_format); + // Some models may have intentionally set "AnyLayout" for pool + // op. Treat this as NCHW (default data_format value) + if (dl != framework::DataLayout::kAnyLayout) { + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), dl); + } + } +#endif + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), tensor.layout()); + } }; template diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc index 9a79feb8a5..41a641659c 100644 --- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc @@ -102,6 +102,7 @@ class DeQuantOpKernel : public framework::OpKernel { pipeline.push_back(*reorder_p); stream(stream::kind::eager).submit(pipeline).wait(); + output->set_layout(DataLayout::kMKLDNN); output->set_format(GetMKLDNNFormat(*dst_memory)); } }; diff --git a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc index 29301e6c67..c1e215ad74 100644 --- a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc @@ -62,6 +62,8 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel { std::shared_ptr lrn_p; if (is_test == false) { workspace_memory = handler.AcquireWorkspaceMemory(mid); + mid->set_layout(framework::DataLayout::kMKLDNN); + mid->set_format(platform::GetMKLDNNFormat(*workspace_memory)); lrn_p = handler.AcquireForwardPrimitive(*src_memory, *workspace_memory, *dst_memory); } else { diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index 069f2339f4..ec166ab74f 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -88,7 +88,10 @@ void PoolOp::InferShape(framework::InferShapeContext* ctx) const { ksize.size(), strides.size(), framework::make_ddim(ksize), framework::make_ddim(strides)); - const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); + // MKL-DNN Kernels are using NCHW order of dims description + // so we ignore data_format consideration for MKL-DNN kernel + const bool channel_last = (this->IsMKLDNNType() == false) && + (data_format == "NHWC" || data_format == "NDHWC"); // update paddings if "SAME" or global_pooling framework::DDim data_dims; @@ -146,12 +149,6 @@ framework::OpKernelType PoolOp::GetExpectedKernelType( #ifdef PADDLE_WITH_MKLDNN if (library_ == framework::LibraryType::kPlain && platform::CanMKLDNNBeUsed(ctx)) { - // TODO(jczaja): Add support for NHWC - const std::string data_format = ctx.Attr("data_format"); - PADDLE_ENFORCE_NE( - data_format, "NHWC", - platform::errors::Unimplemented( - "Pool MKLDNN grad does not support NHWC data format yet")); library_ = framework::LibraryType::kMKLDNN; layout_ = framework::DataLayout::kMKLDNN; } @@ -162,6 +159,28 @@ framework::OpKernelType PoolOp::GetExpectedKernelType( layout_, library_); } +framework::OpKernelType PoolOp::GetKernelTypeForVar( + const std::string& var_name, const Tensor& tensor, + const framework::OpKernelType& expected_kernel_type) const { +#ifdef PADDLE_WITH_MKLDNN + if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) && + (tensor.layout() != framework::DataLayout::kMKLDNN)) { + auto attrs = Attrs(); + auto ar = paddle::framework::AttrReader(attrs); + const std::string data_format = ar.Get("data_format"); + auto dl = framework::StringToDataLayout(data_format); + // Some models may have intentionally set "AnyLayout" for pool + // op. Treat this as NCHW (default data_format value) + if (dl != framework::DataLayout::kAnyLayout) { + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), dl); + } + } +#endif + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), tensor.layout()); +} + void PoolOpGrad::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, "Input(X) must not be null."); PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("X")), true, diff --git a/paddle/fluid/operators/pool_op.h b/paddle/fluid/operators/pool_op.h index 53551a8c50..c37325459b 100644 --- a/paddle/fluid/operators/pool_op.h +++ b/paddle/fluid/operators/pool_op.h @@ -35,6 +35,10 @@ class PoolOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override; + + framework::OpKernelType GetKernelTypeForVar( + const std::string& var_name, const Tensor& tensor, + const framework::OpKernelType& expected_kernel_type) const; }; class PoolOpGrad : public framework::OperatorWithKernel { diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 04a52a5e9c..d50d494a93 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -397,6 +397,10 @@ thread_local std::string cur_input_shape_str = ""; // the cache capacity of different input shapes for MKLDNN. // Default 1 means fixed input shape, not dynamic shape. thread_local int cur_input_shape_cache_capacity = 1; +// Recently registered data_format. This is needed to +// know for converting MKL-DNN Tensor to non MKL-DNN +thread_local paddle::framework::DataLayout cur_paddle_data_layout = + paddle::framework::DataLayout::kNCHW; } // namespace void set_cur_mkldnn_session_id(size_t sid) { cur_mkldnn_session_id = sid; } @@ -408,6 +412,14 @@ void set_cur_input_shape_cache_capacity(int input_shape_cache_capacity) { cur_input_shape_cache_capacity = input_shape_cache_capacity; } +void set_cur_paddle_data_layout(framework::DataLayout dl) { + cur_paddle_data_layout = dl; +} + +framework::DataLayout get_cur_paddle_data_layout(void) { + return cur_paddle_data_layout; +} + void MKLDNNDeviceContext::ResetBlobMap() const { p_blobmap_->clear(); } size_t MKLDNNDeviceContext::GetShapeBlobSize() const { diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 2de2937b98..344ac69f97 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -30,6 +30,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_MKLDNN #include "mkldnn.hpp" +#include "paddle/fluid/framework/data_layout.h" #endif #include @@ -290,6 +291,8 @@ void set_cur_mkldnn_session_id(size_t); size_t get_cur_mkldnn_session_id(void); void set_cur_input_shape_str(std::string input_shape_str); void set_cur_input_shape_cache_capacity(int input_shape_cache_capacity); +void set_cur_paddle_data_layout(framework::DataLayout); +framework::DataLayout get_cur_paddle_data_layout(void); class MKLDNNDeviceContext : public CPUDeviceContext { public: diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 6fe8332c6c..2b641504cd 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -502,7 +502,7 @@ class LRNMKLDNNHandler std::shared_ptr AcquireWorkspaceMemory( framework::Tensor* workspace) { T* ptr = workspace->mutable_data( - this->place_, this->fwd_pd_->dst_primitive_desc().get_size()); + this->place_, this->fwd_pd_->workspace_primitive_desc().get_size()); return this->AcquireMemoryFromPrimitive( this->fwd_pd_->workspace_primitive_desc(), ptr, "@wrk_mem_p"); } diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_lrn_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_lrn_mkldnn_op.py index 9478719578..e9b85dd757 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_lrn_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_lrn_mkldnn_op.py @@ -55,16 +55,11 @@ class TestLRNMKLDNNOpWithIsTest(TestLRNMKLDNNOp): self.assertRaises(AttributeError, check_raise_is_test) -# TODO(jczaja): Once mkl-dnn integration support NHWC input -# then those tests should be changed to actual functional positive tests class TestLRNMKLDNNOpNHWC(TestLRNMKLDNNOp): def init_test_case(self): self.data_format = 'NHWC' - def test_check_output(self): - pass - - # Grad tests both FWD and BWD ops kernels creation + #TODO(jczaja): Add grad support def test_check_grad_normal(self): with self.assertRaises(fluid.core_avx.EnforceNotMet): self.check_grad(['X'], 'Out', max_relative_error=0.01) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py index b8403bc3c6..f22ca516ac 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py @@ -141,9 +141,6 @@ class TestAsymPadValid(TestAsymPad): self.padding_algorithm = "VALID" -# Designed to Fail -# TODO(jczaja): Once mkl-dnn integration support NHWC input -# then those tests should be changed to actual functional positive tests class TestAsymPadValidNHWC(TestAsymPadValid): def init_data_format(self): self.data_format = "NHWC" @@ -151,12 +148,7 @@ class TestAsymPadValidNHWC(TestAsymPadValid): def init_shape(self): self.shape = [2, 7, 7, 3] - def test_check_output(self): - pass - - # Grad tests both FWD and BWD ops kernels creation - # GetExpectedKernelType should throw an exception on lack of support - # to NHWC inputs in pool mkldnn kernel + #TODO(jczaja): Add Grad NHWC support def test_check_grad(self): with self.assertRaises(fluid.core_avx.EnforceNotMet): super(TestAsymPadValidNHWC, self).test_check_grad() -- GitLab