From e2a3a6f73a6766962bf52f67698d354c69e6b25e Mon Sep 17 00:00:00 2001 From: jakpiase <62569058+jakpiase@users.noreply.github.com> Date: Sat, 22 May 2021 05:09:19 +0200 Subject: [PATCH] Added oneDNN matmul grad BF16/FP32 kernel (#32968) * added support for most matmul cases * added more functionality * full functionality of matmul op, fp32 only * added bf16 tests and functionality * added formatting * changes after review * minor change * added reviewers suggestions --- paddle/fluid/operators/matmul_op.cc | 15 + .../operators/mkldnn/matmul_mkldnn_op.cc | 300 +++++++++++++++++- .../mkldnn/test_matmul_bf16_mkldnn_op.py | 101 ++++-- .../unittests/mkldnn/test_matmul_mkldnn_op.py | 99 +++++- 4 files changed, 473 insertions(+), 42 deletions(-) diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc index c12aecc9ba5..e226ab53288 100644 --- a/paddle/fluid/operators/matmul_op.cc +++ b/paddle/fluid/operators/matmul_op.cc @@ -825,6 +825,21 @@ class MatMulOpGrad : public framework::OperatorWithKernel { context->SetOutputDim(y_grad_name, y_dims); } } + + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext &ctx) const override { + auto input_data_type = + OperatorWithKernel::IndicateOrPromoteVarDataTypes(ctx, "X", "Y"); + +#ifdef PADDLE_WITH_MKLDNN + if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { + return framework::OpKernelType(input_data_type, ctx.GetPlace(), + framework::DataLayout::kMKLDNN, + framework::LibraryType::kMKLDNN); + } +#endif + return framework::OpKernelType(input_data_type, ctx.GetPlace()); + } }; template diff --git a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc index 3ef9d88e4e9..2b3496359b0 100644 --- a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc @@ -15,7 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/mkldnn_reuse.h" namespace paddle { namespace platform { @@ -37,6 +37,111 @@ using platform::MKLDNNGetDataType; using platform::to_void_cast; using Tensor = framework::Tensor; +// Reshape a rank-3 tensor from P x M x N to (P * M) x N. +// Identity op if the tensor is not of rank 3. +static framework::Tensor FoldOuterDims(const Tensor& input) { + auto output = input; + auto in_dims = input.dims(); + if (in_dims.size() == 3) { + output.Resize({in_dims[0] * in_dims[1], in_dims[2]}); + } + return output; +} + +// Reshape a rank-3 tensor from P x M x N to M x (P * N). +// (Warning: This requires transposing data and writes into new memory.) +// Identity op if the tensor is not of rank 3. +template +static framework::Tensor FoldFirstAndLastDims( + const MKLDNNDeviceContext& dev_ctx, const Tensor* input) { + auto input_dims = framework::vectorize(input->dims()); + if (input_dims.size() != 3) { + return *input; + } + + framework::Tensor output; + output.Resize({input_dims[1], input_dims[0], input_dims[2]}); + + auto output_dims = framework::vectorize(output.dims()); + + memory::data_type input_type = framework::ToMKLDNNDataType(input->type()); + std::string key = platform::CreateKey(dev_ctx, input_dims, input->format(), + input->format(), input_type); + platform::ReorderMKLDNNHandler reorder_handler(output_dims, input->type(), + input_type, dev_ctx, + dev_ctx.GetEngine(), key); + + auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( + memory::format_tag::abc, platform::to_void_cast(input->data())); + auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory( + &output, memory::format_tag::bac, dev_ctx.GetPlace()); + auto reorder_p = reorder_handler.AcquireReorder(reorder_src_memory_p, + reorder_dst_memory_p); + + platform::RecordEvent record_reorder("int_reorder", + platform::EventRole::kUniqueOp); + + auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); + reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); + astream.wait(); + + output.Resize({input_dims[1], input_dims[0] * input_dims[2]}); + return output; +} + +template +class MatMulMKLDNNHandler : public platform::MKLDNNHandlerT { + public: + MatMulMKLDNNHandler(const MKLDNNDeviceContext& dev_ctx, + const mkldnn::engine engine, platform::Place cpu_place, + Tensor* x, bool trans_x, Tensor* y, bool trans_y, + Tensor* out, float scale, const std::string& uniq_name) + : platform::MKLDNNHandlerT( + dev_ctx, engine, cpu_place, + platform::CreateKey(dev_ctx, framework::vectorize(x->dims()), + uniq_name)) { + if (!this->isCached()) { + auto mat_dim_x = math::CreateMatrixDescriptor(x->dims(), 0, trans_x); + auto mat_dim_y = math::CreateMatrixDescriptor(y->dims(), 0, trans_y); + + memory::dim x_bs = mat_dim_x.batch_size_; + memory::dim y_bs = mat_dim_y.batch_size_; + + memory::dim out_bs = x_bs || y_bs ? std::max(x_bs, y_bs) : 1; + const memory::dim M = mat_dim_x.height_; + const memory::dim N = mat_dim_y.width_; + const memory::dim K = mat_dim_x.width_; + + memory::dims x_dims = {x_bs > 0 ? x_bs : 1, M, K}; + memory::dims y_dims = {y_bs > 0 ? y_bs : 1, K, N}; + memory::dims out_dims = {out_bs, M, N}; + + memory::dims x_strides = + !trans_x ? memory::dims{M * K, K, 1} : memory::dims{M * K, 1, M}; + + memory::dims y_strides = + !trans_y ? memory::dims{N * K, N, 1} : memory::dims{N * K, 1, K}; + memory::dims out_strides = memory::dims{M * N, N, 1}; + + auto x_md = memory::desc(x_dims, MKLDNNGetDataType(), x_strides); + auto y_md = memory::desc(y_dims, MKLDNNGetDataType(), y_strides); + auto out_md = memory::desc(out_dims, MKLDNNGetDataType(), out_strides); + + dnnl::primitive_attr attrs; + if (scale != 1.0f) attrs.set_output_scales(0, {scale}); + + this->AcquireForwardPrimitiveDescriptor(attrs, x_md, y_md, out_md); + } + } + + std::shared_ptr AcquireWeightsMemory(const Tensor* input) { + const T* input_data = input->data(); + return this->AcquireMemoryFromPrimitive(this->fwd_pd_->weights_desc(), + to_void_cast(input_data), + "@weights_mem_p"); + } +}; + template constexpr bool IsInt8() { return std::is_same::value || std::is_same::value; @@ -44,7 +149,7 @@ constexpr bool IsInt8() { template constexpr bool IsBfloat16() { - return std::is_same::value; + return std::is_same::value; } // Get row matrix shape from a vector shape. If the rank of x_dim > 1, the @@ -60,6 +165,60 @@ static framework::DDim ColumnMatrixDimsFromVector( return y_dim.size() > 1 ? y_dim : framework::make_ddim({y_dim[0], 1}); } +/** + * Reshape a tensor to 3-D or 2-D tensor by matrix descriptor. + * + * The shape would be [BatchSize, H, W] or [H, W]. + * If transposed, `H,W` will be swapped. + */ +static void ReshapeTensorToMatrixSequence( + framework::Tensor* x, const math::MatDescriptor& descriptor) { + int64_t h, w; + h = descriptor.height_; + w = descriptor.width_; + if (descriptor.trans_) { + std::swap(w, h); + } + if (descriptor.batch_size_) { + x->Resize({descriptor.batch_size_, h, w}); + } else { + x->Resize({h, w}); + } +} + +/** + * Reshape the x,y,out tensor to 3-D or 2-D tensor by matrix descriptor + * Out = matmul(x, y) + * + * This method will first calculate X,Y matrix sequence, and then calculate + * the out shape. + * + * Assume X = [BatchSize, H1, W1], Y = [BatchSize, H2, W2] + * The out = [BatchSize, H1, W2] + * + * If there is no batch size in `X` and `Y`, the out will be [H1, W2] + * If any of `X` and `Y` has batch size BatchSize, the out will have the + * BatchSize. + */ +static void ReshapeXYOutToMatrixSequence(framework::Tensor* x, + framework::Tensor* y, + framework::Tensor* out, bool trans_x, + bool trans_y) { + auto x_dim = RowMatrixDimsFromVector(x->dims()); + auto y_dim = ColumnMatrixDimsFromVector(y->dims()); + auto mat_dim_x = math::CreateMatrixDescriptor(x_dim, 0, trans_x); + auto mat_dim_y = math::CreateMatrixDescriptor(y_dim, 0, trans_y); + if (mat_dim_x.batch_size_ == 0 && mat_dim_y.batch_size_ == 0) { + out->Resize({mat_dim_x.height_, mat_dim_y.width_}); + } else { + out->Resize({std::max(mat_dim_x.batch_size_, mat_dim_y.batch_size_), + mat_dim_x.height_, mat_dim_y.width_}); + } + + ReshapeTensorToMatrixSequence(x, mat_dim_x); + ReshapeTensorToMatrixSequence(y, mat_dim_y); +} + template class MatMulFactory { public: @@ -372,7 +531,7 @@ static void ExecuteMatMul(const ExecutionContext& ctx) { template class DNNLMatMulKernel : public framework::OpKernel { public: - void Compute(const framework::ExecutionContext& ctx) const override { + void Compute(const ExecutionContext& ctx) const override { if (ctx.HasAttr("head_number")) { PADDLE_ENFORCE_EQ( ctx.Attr("head_number"), 1, @@ -385,6 +544,137 @@ class DNNLMatMulKernel : public framework::OpKernel { ExecuteMatMul(ctx); } }; + +template +class MatMulGradMKLDNNKernel : public framework::OpKernel { + public: + void Compute(const ExecutionContext& ctx) const override { + if (ctx.HasAttr("head_number")) { + PADDLE_ENFORCE_EQ( + ctx.Attr("head_number"), 1, + platform::errors::Unimplemented( + "DNNL matmul doesn't support multiple heads. Expected " + "head_number=1. But received `head_number` is %d", + ctx.Attr("head_number"))); + } + RunKernel(ctx); + } + + private: + void ExecuteMatMulGrad(const ExecutionContext& ctx, + const MKLDNNDeviceContext& dev_ctx, + const mkldnn::engine& engine, Tensor* x, bool trans_x, + bool is_fold_init_dims_x, Tensor* y, bool trans_y, + bool is_fold_init_dims_y, Tensor* out, + int execution_number) const { + // gradient is calculated in a different way when broadcasting is used + bool need_combine = (x->dims().size() == 3 || y->dims().size() == 3) && + out->dims().size() == 2; + + Tensor x_combined, y_combined; + if (!need_combine) { + x_combined = *x; + y_combined = *y; + } else { + x_combined = is_fold_init_dims_x ? FoldOuterDims(*x) + : FoldFirstAndLastDims(dev_ctx, x); + y_combined = is_fold_init_dims_y ? FoldOuterDims(*y) + : FoldFirstAndLastDims(dev_ctx, y); + } + + MatMulMKLDNNHandler handler( + dev_ctx, engine, ctx.GetPlace(), &x_combined, trans_x, &y_combined, + trans_y, out, ctx.Attr("alpha"), + ctx.InputName(framework::GradVarName("Out")) + + std::to_string(execution_number)); + + const auto src_memory_p = handler.AcquireSrcMemory(&x_combined); + const auto weights_memory_p = handler.AcquireWeightsMemory(&y_combined); + const auto dst_memory_p = handler.AcquireDstMemory(out); + + auto matmul_p = handler.AcquireForwardPrimitive(); + + std::unordered_map matmul_args = { + {DNNL_ARG_SRC, *src_memory_p}, + {DNNL_ARG_WEIGHTS, *weights_memory_p}, + {DNNL_ARG_DST, *dst_memory_p}}; + + auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); + matmul_p->execute(astream, matmul_args); + astream.wait(); + + out->set_layout(framework::DataLayout::kMKLDNN); + out->set_format(platform::GetMKLDNNFormat(dst_memory_p->get_desc().reshape( + framework::vectorize(out->dims())))); + } + + template + void RunKernel(const ExecutionContext& ctx) const { + const auto& dev_ctx = + ctx.template device_context(); + const auto& onednn_engine = dev_ctx.GetEngine(); + + auto x = *ctx.Input("X"); + auto y = *ctx.Input("Y"); + auto dout = *ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); + + bool transpose_x = ctx.Attr("transpose_X"); + bool transpose_y = ctx.Attr("transpose_Y"); + + ReshapeXYOutToMatrixSequence(&x, &y, &dout, transpose_x, transpose_y); + framework::DDim dx_dims; + if (dx) { + dx_dims = dx->dims(); + if (dx_dims != x.dims()) { + dx->Resize(x.dims()); + } + } + + framework::DDim dy_dims; + if (dy) { + dy_dims = dy->dims(); + if (dy_dims != y.dims()) { + dy->Resize(y.dims()); + } + } + + if (transpose_x && transpose_y) { + this->ExecuteMatMulGrad(ctx, dev_ctx, onednn_engine, &y, true, true, + &dout, true, false, dx, 0); + this->ExecuteMatMulGrad(ctx, dev_ctx, onednn_engine, &dout, true, true, + &x, true, false, dy, 1); + } else if (transpose_x) { + this->ExecuteMatMulGrad(ctx, dev_ctx, onednn_engine, &y, false, false, + &dout, true, false, dx, 0); + this->ExecuteMatMulGrad(ctx, dev_ctx, onednn_engine, &x, false, false, + &dout, false, true, dy, 1); + } else if (transpose_y) { + this->ExecuteMatMulGrad(ctx, dev_ctx, onednn_engine, &dout, false, false, + &y, false, true, dx, 0); + this->ExecuteMatMulGrad(ctx, dev_ctx, onednn_engine, &dout, true, true, + &x, false, true, dy, 1); + } else { + this->ExecuteMatMulGrad(ctx, dev_ctx, onednn_engine, &dout, false, false, + &y, true, false, dx, 0); + this->ExecuteMatMulGrad(ctx, dev_ctx, onednn_engine, &x, true, true, + &dout, false, true, dy, 1); + } + + if (dx) { + if (dx_dims != x.dims()) { + dx->Resize(dx_dims); + } + } + if (dy) { + if (dy_dims != y.dims()) { + dy->Resize(dy_dims); + } + } + } +}; + } // namespace operators } // namespace paddle namespace ops = paddle::operators; @@ -394,3 +684,7 @@ REGISTER_OP_KERNEL(matmul, MKLDNN, ::paddle::platform::CPUPlace, ops::DNNLMatMulKernel, ops::DNNLMatMulKernel, ops::DNNLMatMulKernel); + +REGISTER_OP_KERNEL(matmul_grad, MKLDNN, ::paddle::platform::CPUPlace, + ops::MatMulGradMKLDNNKernel, + ops::MatMulGradMKLDNNKernel); diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_bf16_mkldnn_op.py index 149002fc765..dba63be27b4 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_bf16_mkldnn_op.py @@ -26,22 +26,23 @@ from paddle import enable_static "place does not support BF16 evaluation") class TestMatmulBf16MklDNNOp(OpTest): def generate_data(self): - self.x = np.random.random((25, 2, 2)).astype(np.float32) - self.y = np.random.random((25, 2, 2)).astype(np.float32) - self.alpha = 1.0 - self.out = self.alpha * np.matmul(self.x, self.y) + self.x_fp32 = np.random.random((25, 2, 2)).astype(np.float32) + self.y_fp32 = np.random.random((25, 2, 2)).astype(np.float32) + self.out = self.alpha * np.matmul(self.x_fp32, self.y_fp32) def set_attributes(self): - self.alpha = self.alpha if hasattr(self, 'alpha') else 1.0 self.attrs = { 'alpha': self.alpha, "use_mkldnn": self.use_mkldnn, "mkldnn_data_type": self.mkldnn_data_type, - "force_fp32_output": self.force_fp32_output + "force_fp32_output": self.force_fp32_output, + 'transpose_X': False, + 'transpose_Y': False } def setUp(self): self.op_type = "matmul" + self.alpha = 1.0 self.use_mkldnn = True self.dtype = np.uint16 self.mkldnn_data_type = "bfloat16" @@ -53,67 +54,113 @@ class TestMatmulBf16MklDNNOp(OpTest): self.out = convert_float_to_uint16(self.out) self.outputs = {'Out': self.out} - self.x = convert_float_to_uint16(self.x) - self.y = convert_float_to_uint16(self.y) - self.inputs = {'X': self.x, 'Y': self.y} + self.x_bf16 = convert_float_to_uint16(self.x_fp32) + self.y_bf16 = convert_float_to_uint16(self.y_fp32) + self.inputs = {'X': self.x_bf16, 'Y': self.y_bf16} def test_check_output(self): self.check_output_with_place(core.CPUPlace()) def test_check_grad(self): - pass + self.calculate_grads() + self.check_grad_with_place( + core.CPUPlace(), ["X", "Y"], + "Out", + check_dygraph=False, + user_defined_grads=[self.dx, self.dy], + user_defined_grad_outputs=[convert_float_to_uint16(self.dout)]) + + def matmul_grad(self, x, transpose_x, y, transpose_y): + x_transpose_axes = [1, 0] if x.ndim == 2 else [0, 2, 1] + y_transpose_axes = [1, 0] if y.ndim == 2 else [0, 2, 1] + + x = np.transpose(x, x_transpose_axes) if transpose_x else x + y = np.transpose(y, y_transpose_axes) if transpose_y else y + + return self.alpha * np.matmul(x, y) + + def calculate_grads(self): + x_transpose_axes = [1, 0] if self.x_fp32.ndim == 2 else [0, 2, 1] + y_transpose_axes = [1, 0] if self.y_fp32.ndim == 2 else [0, 2, 1] + + x = np.transpose(self.x_fp32, x_transpose_axes) if self.attrs[ + 'transpose_X'] is True else self.x_fp32 + y = np.transpose(self.y_fp32, y_transpose_axes) if self.attrs[ + 'transpose_Y'] is True else self.y_fp32 + + dout = self.alpha * np.matmul(x, y) + + if self.attrs['transpose_X'] is True and self.attrs[ + 'transpose_Y'] is True: + self.dx = self.matmul_grad(self.y_fp32, True, dout, True) + self.dy = self.matmul_grad(dout, True, self.x_fp32, True) + elif self.attrs['transpose_X'] is True and self.attrs[ + 'transpose_Y'] is False: + self.dx = self.matmul_grad(self.y_fp32, False, dout, True) + self.dy = self.matmul_grad(self.x_fp32, False, dout, False) + elif self.attrs['transpose_X'] is False and self.attrs[ + 'transpose_Y'] is True: + self.dx = self.matmul_grad(dout, False, self.y_fp32, False) + self.dy = self.matmul_grad(dout, True, self.x_fp32, False) + else: + self.dx = self.matmul_grad(dout, False, self.y_fp32, True) + self.dy = self.matmul_grad(self.x_fp32, True, dout, False) + + self.dout = dout class TestDnnlMatMulOpAlpha(TestMatmulBf16MklDNNOp): def generate_data(self): - self.x = np.random.random((17, 2, 3)).astype(np.float32) - self.y = np.random.random((17, 3, 2)).astype(np.float32) + self.x_fp32 = np.random.random((17, 2, 3)).astype(np.float32) + self.y_fp32 = np.random.random((17, 3, 2)).astype(np.float32) self.alpha = 2.0 - self.out = self.alpha * np.matmul(self.x, self.y) + self.out = self.alpha * np.matmul(self.x_fp32, self.y_fp32) class TestDnnlMatMulOp2D(TestMatmulBf16MklDNNOp): def generate_data(self): - self.x = np.random.random((12, 9)).astype(np.float32) - self.y = np.random.random((9, 12)).astype(np.float32) - self.out = np.matmul(self.x, self.y) + self.x_fp32 = np.random.random((12, 9)).astype(np.float32) + self.y_fp32 = np.random.random((9, 12)).astype(np.float32) + self.out = np.matmul(self.x_fp32, self.y_fp32) class TestDnnlMatMulOpTransposeX(TestMatmulBf16MklDNNOp): def generate_data(self): - self.x = np.random.random((12, 9)).astype(np.float32) - self.y = np.random.random((12, 9)).astype(np.float32) - self.out = np.matmul(np.transpose(self.x), self.y) + self.x_fp32 = np.random.random((12, 9)).astype(np.float32) + self.y_fp32 = np.random.random((12, 9)).astype(np.float32) + self.out = np.matmul(np.transpose(self.x_fp32), self.y_fp32) def set_attributes(self): self.attrs = { "use_mkldnn": self.use_mkldnn, "mkldnn_data_type": self.mkldnn_data_type, - 'transpose_X': True + 'transpose_X': True, + 'transpose_Y': False } class TestDnnlMatMulOpTransposeY(TestMatmulBf16MklDNNOp): def generate_data(self): - self.x = np.random.random((12, 9)).astype(np.float32) - self.y = np.random.random((12, 9)).astype(np.float32) - self.out = np.matmul(self.x, np.transpose(self.y)) + self.x_fp32 = np.random.random((12, 9)).astype(np.float32) + self.y_fp32 = np.random.random((12, 9)).astype(np.float32) + self.out = np.matmul(self.x_fp32, np.transpose(self.y_fp32)) def set_attributes(self): self.attrs = { "use_mkldnn": self.use_mkldnn, "mkldnn_data_type": self.mkldnn_data_type, - 'transpose_Y': True + 'transpose_Y': True, + 'transpose_X': False } class TestMatmulBf16MklDNNForceFp32Output(TestMatmulBf16MklDNNOp): def generate_data(self): - self.x = np.random.random((12, 9)).astype(np.float32) - self.y = np.random.random((9, 12)).astype(np.float32) + self.x_fp32 = np.random.random((12, 9)).astype(np.float32) + self.y_fp32 = np.random.random((9, 12)).astype(np.float32) self.force_fp32_output = True self.alpha = 0.5 - self.out = self.alpha * np.matmul(self.x, self.y) + self.out = self.alpha * np.matmul(self.x_fp32, self.y_fp32) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_mkldnn_op.py index 2f557f0bf14..724b9d9818d 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_mkldnn_op.py @@ -19,7 +19,6 @@ import numpy as np from paddle.fluid.tests.unittests.op_test import OpTest, skip_check_grad_ci -@skip_check_grad_ci(reason="DNNL's MatMul doesn't implemend grad kernel.") class TestDnnlMatMulOp(OpTest): def generate_data(self): self.x = np.random.random((25, 2, 2)).astype("float32") @@ -48,21 +47,99 @@ class TestDnnlMatMulOp(OpTest): self.check_output() -class TestDnnlMatMulOpMixedDims1(TestDnnlMatMulOp): +class TestDnnlMatMulWithGradOp(TestDnnlMatMulOp): + def test_check_grad(self): + self.check_grad(['X', 'Y'], 'Out', max_relative_error=1e-2) + + +class TestDnnlMatMulOpMixedDims1(TestDnnlMatMulWithGradOp): def generate_data(self): self.x = np.random.random((17, 2, 3)).astype("float32") self.y = np.random.random((3, 4)).astype("float32") self.out = np.matmul(self.x, self.y) -class TestDnnlMatMulOpMixedDims2(TestDnnlMatMulOp): +class TestDnnlMatMulOpMixedDimsYWiderTransposeY(TestDnnlMatMulWithGradOp): + def generate_data(self): + self.x = np.random.random((8, 2, 3)).astype("float32") + self.y = np.random.random((4, 3)).astype("float32") + self.out = np.matmul(self.x, np.transpose(self.y)) + + def set_attributes(self): + self.attrs = {'transpose_Y': True} + + +class TestDnnlMatMulOpMixedDimsYWiderTransposeX(TestDnnlMatMulWithGradOp): + def generate_data(self): + self.x = np.random.random((8, 3, 2)).astype("float32") + self.y = np.random.random((3, 4)).astype("float32") + self.out = np.matmul(np.transpose(self.x, (0, 2, 1)), self.y) + + def set_attributes(self): + self.attrs = {'transpose_X': True} + + +class TestDnnlMatMulOpMixedDimsXWiderTransposeXY(TestDnnlMatMulWithGradOp): + def generate_data(self): + self.x = np.random.random((8, 3, 2)).astype("float32") + self.y = np.random.random((4, 3)).astype("float32") + self.out = np.matmul( + np.transpose(self.x, (0, 2, 1)), np.transpose(self.y)) + + def set_attributes(self): + self.attrs = {'transpose_X': True, 'transpose_Y': True} + + +class TestDnnlMatMulOpMixedDimsYWiderTransposeXY(TestDnnlMatMulWithGradOp): + def generate_data(self): + self.x = np.random.random((3, 2)).astype("float32") + self.y = np.random.random((8, 4, 3)).astype("float32") + self.out = np.matmul( + np.transpose(self.x), np.transpose(self.y, (0, 2, 1))) + + def set_attributes(self): + self.attrs = {'transpose_X': True, 'transpose_Y': True} + + +class TestDnnlMatMulOpMixedDimsXWiderTransposeX(TestDnnlMatMulWithGradOp): + def generate_data(self): + self.x = np.random.random((5, 4)).astype("float32") + self.y = np.random.random((8, 5, 4)).astype("float32") + self.out = np.matmul(np.transpose(self.x), self.y) + + def set_attributes(self): + self.attrs = {'transpose_X': True} + + +class TestDnnlMatMulOpVectorMultiply(TestDnnlMatMulWithGradOp): + def generate_data(self): + self.x = np.random.random((5)).astype("float32") + self.y = np.random.random((5)).astype("float32") + self.out = np.matmul(self.x, self.y) + + +class TestDnnlMatMulOpVectorMultiplyTranspose(TestDnnlMatMulWithGradOp): + def generate_data(self): + self.x = np.random.random((5)).astype("float32") + x_resized = np.copy(self.x) + x_resized = np.expand_dims(x_resized, 1) + self.y = np.random.random((6)).astype("float32") + y_resized = np.copy(self.y) + y_resized = np.expand_dims(y_resized, 0) + self.out = np.matmul(x_resized, y_resized) + + def set_attributes(self): + self.attrs = {'transpose_Y': True, 'transpose_X': True} + + +class TestDnnlMatMulOpMixedDims2(TestDnnlMatMulWithGradOp): def generate_data(self): self.x = np.random.random((2, 3)).astype("float32") self.y = np.random.random((17, 3, 4)).astype("float32") self.out = np.matmul(self.x, self.y) -class TestDnnlMatMulOpAlpha(TestDnnlMatMulOp): +class TestDnnlMatMulOpAlpha(TestDnnlMatMulWithGradOp): def generate_data(self): self.x = np.random.random((17, 2, 3)).astype("float32") self.y = np.random.random((17, 3, 2)).astype("float32") @@ -70,18 +147,14 @@ class TestDnnlMatMulOpAlpha(TestDnnlMatMulOp): self.out = self.alpha * np.matmul(self.x, self.y) -class TestDnnlMatMulOp2D(TestDnnlMatMulOp): - def print_tensor(self, name, tensor): - print(name) - print(tensor) - +class TestDnnlMatMulOp2D(TestDnnlMatMulWithGradOp): def generate_data(self): self.x = np.random.random((12, 9)).astype("float32") self.y = np.random.random((9, 12)).astype("float32") self.out = np.matmul(self.x, self.y) -class TestDnnlMatMulOpTransposeX(TestDnnlMatMulOp): +class TestDnnlMatMulOpTransposeX(TestDnnlMatMulWithGradOp): def generate_data(self): self.x = np.random.random((12, 9)).astype("float32") self.y = np.random.random((12, 9)).astype("float32") @@ -91,7 +164,7 @@ class TestDnnlMatMulOpTransposeX(TestDnnlMatMulOp): self.attrs = {'transpose_X': True} -class TestDnnlMatMulOpTransposeY(TestDnnlMatMulOp): +class TestDnnlMatMulOpTransposeY(TestDnnlMatMulWithGradOp): def generate_data(self): self.x = np.random.random((12, 9)).astype("float32") self.y = np.random.random((12, 9)).astype("float32") @@ -101,7 +174,7 @@ class TestDnnlMatMulOpTransposeY(TestDnnlMatMulOp): self.attrs = {'transpose_Y': True} -class TestDnnlMatMulOpTransposeY3D(TestDnnlMatMulOp): +class TestDnnlMatMulOpTransposeY3D(TestDnnlMatMulWithGradOp): def generate_data(self): self.x = np.random.random((17, 3, 2)).astype("float32") self.y = np.random.random((17, 3, 2)).astype("float32") @@ -480,4 +553,6 @@ class TestMatMulOpTransposeReshapeRankOfReshapeNotSupportedException( if __name__ == "__main__": + from paddle import enable_static + enable_static() unittest.main() -- GitLab