diff --git a/paddle/fluid/framework/data_transform.cc b/paddle/fluid/framework/data_transform.cc index 22a2847c1d834fee9fc3012957ddfc70130e41d3..0946cb121670e97025031ab51d7d47bf21f8f921 100644 --- a/paddle/fluid/framework/data_transform.cc +++ b/paddle/fluid/framework/data_transform.cc @@ -63,9 +63,13 @@ void TransformData(const OpKernelType &expected_kernel_type, out.ShareDataWith(input_tensor); // For NHWC data we need reshape of tensors as MKL-DNN // is expecting NHWC dims description order - platform::MatchShapeToLayout(&out, lin, lout); - paddle::platform::MKLDNNDeviceContext::tls().set_cur_paddle_data_layout( - lin); + if (lin == DataLayout::kNHWC) { + platform::MatchShapeToLayout(&out, lin, lout); + // We register only NHWC assuming that model is consistent e.g. either + // NHWC or NCHW + paddle::platform::MKLDNNDeviceContext::tls() + .set_cur_paddle_data_layout(lin); + } out.set_layout(DataLayout::kMKLDNN); out.set_format(out_format); } else { diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index e5ba46f312897b78dacd60701b71ca031cb43531..270c9a9c4f6b50cce9486938a33737fea9ff1a52 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -128,6 +128,26 @@ class ActivationOp : public framework::OperatorWithKernel { const framework::ExecutionContext& ctx) const override { return GetKernelType(ctx, *this, "X"); } + + framework::OpKernelType GetKernelTypeForVar( + const std::string& var_name, const Tensor& tensor, + const framework::OpKernelType& expected_kernel_type) const { +#ifdef PADDLE_WITH_MKLDNN + // When activation is first oneDNN op (there was some non oneDNN op + // previously) + // then we also need to rotate shape NHWC -> NCWH + if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) && + (tensor.layout() != framework::DataLayout::kMKLDNN) && + paddle::platform::MKLDNNDeviceContext::tls() + .get_cur_paddle_data_layout() == framework::DataLayout::kNHWC) { + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), + framework::DataLayout::kNHWC); + } +#endif + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), tensor.layout()); + } }; class ActivationOpInferVarType diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h index e18ff9727b273acac93beb23dc920eb238d4d531..d726bf0d0b5ab69da644c536c71db5fd2e872124 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_op.h @@ -101,9 +101,37 @@ class ElementwiseOp : public framework::OperatorWithKernel { std::vector x_dims_array(max_dim); std::vector y_dims_array(max_dim); std::vector out_dims_array(max_dim); +#ifdef PADDLE_WITH_MKLDNN + // (jczaja): Broadcasting of dims has to be done on Paddle shapes (NHWC) + // if model is using NHWC. + bool should_rotate = + ctx->IsRunMKLDNNKernel() && + (platform::MKLDNNDeviceContext::tls().get_cur_paddle_data_layout() == + framework::DataLayout::kNHWC); + if (should_rotate) { + // Pick bigger shape and rotate this one + bool x_over_y = (x_dims.size() > y_dims.size()); + auto vdims = x_over_y ? framework::vectorize(x_dims) + : framework::vectorize(y_dims); + std::rotate(vdims.begin() + 1, vdims.begin() + 2, vdims.end()); + if (x_over_y) { + x_dims = framework::make_ddim(vdims); + } else { + y_dims = framework::make_ddim(vdims); + } + } +#endif + GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array.data(), y_dims_array.data(), out_dims_array.data(), max_dim, axis); +#ifdef PADDLE_WITH_MKLDNN + // Now rotate shape back if needed (NHWC -> NCHW) + if (should_rotate) { + std::rotate(out_dims_array.begin() + 1, out_dims_array.end() - 1, + out_dims_array.end()); + } +#endif ctx->SetOutputDim("Out", framework::make_ddim(out_dims_array)); // to do ctx->ShareLoD("X", /*->*/ "Out"); @@ -133,6 +161,21 @@ class ElementwiseOp : public framework::OperatorWithKernel { return framework::OpKernelType(tensor.type(), tensor.place(), tensor.layout()); } else { +#ifdef PADDLE_WITH_MKLDNN + // When elementwise is first oneDNN op (there was some non oneDNN op + // previously) + // then we also need to rotate shape NHWC -> NCWH + if ((expected_kernel_type.data_layout_ == + framework::DataLayout::kMKLDNN) && + (tensor.layout() != framework::DataLayout::kMKLDNN) && + paddle::platform::MKLDNNDeviceContext::tls() + .get_cur_paddle_data_layout() == + framework::DataLayout::kNHWC) { + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), + framework::DataLayout::kNHWC); + } +#endif return framework::OpKernelType(expected_kernel_type.data_type_, tensor.place(), tensor.layout()); } diff --git a/paddle/fluid/operators/mkldnn/nhwc_op_tests.cmake b/paddle/fluid/operators/mkldnn/nhwc_op_tests.cmake index 232626df02e5045c5fe66395255553d8522aa411..c471ba62f609b32578323332f76027eaa0c35141 100644 --- a/paddle/fluid/operators/mkldnn/nhwc_op_tests.cmake +++ b/paddle/fluid/operators/mkldnn/nhwc_op_tests.cmake @@ -1,2 +1,2 @@ -cc_test(test_mkldnn_op_nhwc SRCS mkldnn/test_mkldnn_op_nhwc.cc DEPS op_registry pool_op pooling transpose_op scope device_context enforce executor) +cc_test(test_mkldnn_op_nhwc SRCS mkldnn/test_mkldnn_op_nhwc.cc DEPS op_registry pool_op activation_op pooling transpose_op scope device_context enforce executor) diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc index e7caeef85f5f95859096c2d6edb494106b0a0f93..8ee2519ac2b963f8953133df227445b544fd71e4 100644 --- a/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc +++ b/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc @@ -27,6 +27,8 @@ USE_OP(pool2d); USE_OP_DEVICE_KERNEL(pool2d, MKLDNN); +USE_OP(relu); +USE_OP_DEVICE_KERNEL(relu, MKLDNN); USE_OP(transpose); USE_OP_DEVICE_KERNEL(transpose, MKLDNN); @@ -90,5 +92,63 @@ TEST(test_pool2d_transpose_nhwc, cpu_place) { "Computed shape does not match expected shape")); } +TEST(test_pool2d_relu_relu_nhwc, cpu_place) { + framework::DDim dims({1, 4, 8, 512}); // NHWC shape + framework::DDim expected_dims({1, 512, 3, 7}); // NHWC expected shape + platform::CPUPlace p; + framework::Scope scope; + + InputVars input_name = {"x", + scope.Var("x")->GetMutable()}; + // Initialize input data + std::uniform_real_distribution dist(static_cast(10.0), + static_cast(20.0)); + std::mt19937 engine; + size_t numel = static_cast(framework::product(dims)); + input_name.tensor->Resize(dims); + auto data_ptr = input_name.tensor->mutable_data(p); + for (size_t i = 0; i < numel; ++i) { + data_ptr[i] = dist(engine); + } + + scope.Var("y")->GetMutable(); + scope.Var("u")->GetMutable(); + auto *z = scope.Var("z")->GetMutable(); + + auto &pool = platform::DeviceContextPool::Instance(); + + // Make pool2d(oneDNN) followed by relu(CPU paddle) followed by + // relu(oneDNN). Second relu should make a shape rotation to NCHW + + auto ksize = std::vector(2, 2); + auto op_pool = framework::OpRegistry::CreateOp( + "pool2d", {{"X", {"x"}}}, {{"Out", {"y"}}}, + {{"pooling_type", {std::string("max")}}, + {"ksize", {ksize}}, + {"data_format", {std::string("NHWC")}}, + {"use_mkldnn", {true}}}); + + auto axis = std::vector(4, 0); + axis[1] = 2; + axis[2] = 3; + axis[3] = 1; + auto op_relu1 = framework::OpRegistry::CreateOp( + "relu", {{"X", {"y"}}}, {{"Out", {"u"}}}, + {{"axis", {axis}}, {"use_mkldnn", {false}}}); + + auto op_relu2 = framework::OpRegistry::CreateOp( + "relu", {{"X", {"u"}}}, {{"Out", {"z"}}}, {{"use_mkldnn", {true}}}); + + op_pool->Run(scope, p); + op_relu1->Run(scope, p); + op_relu2->Run(scope, p); + + pool.Get(p)->Wait(); + + // Verify shape of output + PADDLE_ENFORCE_EQ(z->dims(), expected_dims, + platform::errors::InvalidArgument( + "Computed shape does not match expected shape")); +} } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/transfer_layout_op.h b/paddle/fluid/operators/transfer_layout_op.h index cd3f7e7067831799531d3f40b2e169b1fe035ff4..47c9d1dba91b994747465d08a7a26b74bf45bc6d 100644 --- a/paddle/fluid/operators/transfer_layout_op.h +++ b/paddle/fluid/operators/transfer_layout_op.h @@ -67,9 +67,11 @@ class TransferLayoutFunctor { out_tensor.ShareDataWith(in_tensor); // For NHWC data we need reshape of tensors as MKL-DNN // is expecting NHWC dims description order - platform::MatchShapeToLayout(&out_tensor, in_layout, out_layout); - paddle::platform::MKLDNNDeviceContext::tls().set_cur_paddle_data_layout( - in_layout); + if (in_layout == DataLayout::kNHWC) { + platform::MatchShapeToLayout(&out_tensor, in_layout, out_layout); + paddle::platform::MKLDNNDeviceContext::tls() + .set_cur_paddle_data_layout(in_layout); + } out_tensor.set_layout(DataLayout::kMKLDNN); out_tensor.set_format(out_format); } else { diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 9aadd36c2e8aca50a9c54cf086e3563255f51f3b..69fe068a9b4a8816d9dcef73e4a4597217e92792 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -651,11 +651,21 @@ class BinaryMKLDNNHandler std::vector dims1_ex(rankdiff, 1); dims1_ex.insert(next(dims1_ex.begin(), (axis == -1 ? rankdiff : axis)), src_y_tz.begin(), src_y_tz.end()); + // For broadcasting for NHWC we need rotate extended shape + if (MKLDNNDeviceContext::tls().get_cur_paddle_data_layout() == + framework::DataLayout::kNHWC) { + std::rotate(dims1_ex.begin() + 1, dims1_ex.end() - 1, dims1_ex.end()); + } src1_md = src1_md.reshape(dims1_ex); } else if (rankdiff < 0) { // First input is of smaller than second std::vector dims0_ex(-rankdiff, 1); dims0_ex.insert(next(dims0_ex.begin(), (axis == -1 ? -rankdiff : axis)), src_x_tz.begin(), src_x_tz.end()); + // For broadcasting for NHWC we need rotate extended shape + if (MKLDNNDeviceContext::tls().get_cur_paddle_data_layout() == + framework::DataLayout::kNHWC) { + std::rotate(dims0_ex.begin() + 1, dims0_ex.end() - 1, dims0_ex.end()); + } src0_md = src0_md.reshape(dims0_ex); } const auto dst_md = memory::desc(dst_tz, platform::MKLDNNGetDataType(),