From 2cff0e8a7f7c5561bd0618b800aec36f204d7c01 Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Fri, 4 Nov 2022 10:13:34 +0100 Subject: [PATCH] slice & mul & requantize tensors to use mem_desc (#47617) * slice & mul & requantize * - Fix to requentize test --- .../fluid/operators/mkldnn/mul_mkldnn_op.cc | 20 ++++++++++--------- .../operators/mkldnn/requantize_mkldnn_op.cc | 13 ++++++++---- paddle/fluid/operators/slice_op.cc | 18 ++++++----------- 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc index 0dcfe4d61cb..2622dfb4eb2 100644 --- a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc @@ -199,15 +199,17 @@ class MulPrimitiveFactory { const ExecutionContext &ctx) { Tensor x_tmp; Tensor data_matrix; - MKLDNNMemoryFormat src_fmt = data->format(); - MKLDNNMemoryFormat dst_fmt; - auto src_mdesc = CreateMemDescriptor(data, src_fmt); - - if ((data->dims().size() == 4 && - src_fmt != (dst_fmt = MKLDNNMemoryFormat::nchw)) || - (data->dims().size() == 5 && - src_fmt != (dst_fmt = MKLDNNMemoryFormat::ncdhw))) { - auto dst_mdesc = CreateMemDescriptor(data, dst_fmt); + // This code is enforcing plain (non-blocked) memory arrangement + // in order to flatten (reduce dimensionality) of Tensor later + auto src_mdesc = data->mem_desc(); + auto dst_mdesc = + data->dims().size() >= 4 + ? (data->dims().size() == 5 + ? CreateMemDescriptor(data, MKLDNNMemoryFormat::ncdhw) + : CreateMemDescriptor(data, MKLDNNMemoryFormat::nchw)) + : src_mdesc; + + if (src_mdesc != dst_mdesc) { x_tmp.mutable_data(ctx.GetPlace(), data->memory_size()); Reorder(src_mdesc, diff --git a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc index 23409db02be..b1a323e7ab5 100644 --- a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include // NOLINT #include "dnnl.hpp" // NOLINT #include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/tensor.h" @@ -85,15 +86,19 @@ class ReQuantOpKernel : public framework::OpKernel { const T* input_data = input->data(); if (reorder_p == nullptr) { - auto dst_tz = phi::vectorize(output->dims()); auto src_dt = framework::ToMKLDNNDataType( framework::TransToProtoVarType(input->dtype())); auto dst_dt = with_shift ? framework::MKLDNNDataType::u8 : src_dt; - auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, input->format()); src_memory = std::make_shared( - src_md, engine, to_void_cast(input_data)); - auto dst_md = platform::MKLDNNMemDesc({dst_tz}, dst_dt, input->format()); + input->mem_desc(), engine, to_void_cast(input_data)); + + auto xstrides = input->mem_desc().data.format_desc.blocking.strides; + + std::vector vstrides(xstrides, + xstrides + input->mem_desc().data.ndims); + + auto dst_md = dnnl::memory::desc({src_tz}, dst_dt, vstrides); dnnl::primitive_attr attri; int mask = 0; diff --git a/paddle/fluid/operators/slice_op.cc b/paddle/fluid/operators/slice_op.cc index 71da14eae7f..38aefa3a4f2 100644 --- a/paddle/fluid/operators/slice_op.cc +++ b/paddle/fluid/operators/slice_op.cc @@ -162,11 +162,9 @@ class SliceOp : public framework::OperatorWithKernel { // reorders, because if blocked dimension is not divisible by 8 or // 16(depending on which blocking format is used) submemory cannot be // created, so in that scenario a fallback is needed - auto tmp_md = dnnl::memory::desc( - phi::vectorize(ctx.Input("Input")->dims()), - dnnl::memory::data_type::f32, - ctx.Input("Input")->format()); - if (tmp_md.data.format_desc.blocking.inner_nblks == 0) + if (ctx.Input("Input") + ->mem_desc() + .data.format_desc.blocking.inner_nblks == 0) return framework::OpKernelType(input_data_type, ctx.GetPlace(), phi::DataLayout::kMKLDNN, @@ -337,13 +335,9 @@ class SliceOpGrad : public framework::OperatorWithKernel { // reorders, because if blocked dimension is not divisible by 8 or // 16(depending on which blocking format is used) submemory cannot be // created, so in that scenario a fallback is needed - auto tmp_md = dnnl::memory::desc( - phi::vectorize( - ctx.Input(framework::GradVarName("Out")) - ->dims()), - dnnl::memory::data_type::f32, - ctx.Input(framework::GradVarName("Out"))->format()); - if (tmp_md.data.format_desc.blocking.inner_nblks == 0) + if (ctx.Input(framework::GradVarName("Out")) + ->mem_desc() + .data.format_desc.blocking.inner_nblks == 0) return framework::OpKernelType(input_data_type, ctx.GetPlace(), phi::DataLayout::kMKLDNN, -- GitLab