Support matmul_v2 triple grad Kernel (#36459)

* native commit for triple grad of sigmod * Updated unittests files * init functional jacobian api * Updated trible_test func * Updated gradient_checker & test_script * finish test with dtype float32 * add float64 test case * polish code * use atol=1e-5 with dtype float64 * fix for ci * set timeout for test_jacobian * fix dygraph grad to support high differential * polish API docstring * Updated gradient checker and some related files * fix double grad strip error for high differential * fix double grad strip error for high differential * Add Sigmoid triple grad tests * fix dygraph double grad dtype error when calling for high differential senario * Updated triple grad teses func * Use np.random to initialize ddx * Updated triple_grad_check func * add todo for gradient checker and refine some comments * remove additional code * add test for warnging in backward.py * format python code * support multi input in triple gradient checker * Add matmul triple grad kernel * Updated comments of TODO * Supported some special tests * Change code-format to follow CI std * Updated gradient_checker.py * Fix conflicts * Removed unnecessary printing log * Change code style to follow CI std Co-authored-by: N levi131 <limaolin01@baidu.com> Co-authored-by: N Jiabin Yang <360788950@qq.com>

Support matmul_v2 triple grad Kernel (#36459)
* native commit for triple grad of sigmod * Updated unittests files * init functional jacobian api * Updated trible_test func * Updated gradient_checker & test_script * finish test with dtype float32 * add float64 test case * polish code * use atol=1e-5 with dtype float64 * fix for ci * set timeout for test_jacobian * fix dygraph grad to support high differential * polish API docstring * Updated gradient checker and some related files * fix double grad strip error for high differential * fix double grad strip error for high differential * Add Sigmoid triple grad tests * fix dygraph double grad dtype error when calling for high differential senario * Updated triple grad teses func * Use np.random to initialize ddx * Updated triple_grad_check func * add todo for gradient checker and refine some comments * remove additional code * add test for warnging in backward.py * format python code * support multi input in triple gradient checker * Add matmul triple grad kernel * Updated comments of TODO * Supported some special tests * Change code-format to follow CI std * Updated gradient_checker.py * Fix conflicts * Removed unnecessary printing log * Change code style to follow CI std Co-authored-by: N levi131 <limaolin01@baidu.com> Co-authored-by: N Jiabin Yang <360788950@qq.com>
203a0e3e · Weilong Wu · GitHub · b9fdd3bc · 203a0e3e · 203a0e3e
5 changed file
--- a/paddle/fluid/operators/matmul_v2_op.cc
+++ b/paddle/fluid/operators/matmul_v2_op.cc
@@ -347,6 +347,76 @@ class MatMulV2OpDoubleGradMaker : public framework::SingleGradOpMaker<T> {
    op->SetAttrMap(this->Attrs());
  }
 };
+class MatMulV2OpTripleGrad : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(framework::InferShapeContext* context) const override {
+    OP_INOUT_CHECK(context->HasInput("X"), "Input", "X",
+                   "matmul_v2_triple_grad");
+    OP_INOUT_CHECK(context->HasInput("Y"), "Input", "Y",
+                   "matmul_v2_triple_grad");
+    OP_INOUT_CHECK(context->HasInput("DOut"), "Input", "DOut",
+                   "matmul_v2_triple_grad");
+    OP_INOUT_CHECK(context->HasInput("DDX"), "Input", "DDX",
+                   "matmul_v2_triple_grad");
+    OP_INOUT_CHECK(context->HasInput("DDY"), "Input", "DDY",
+                   "matmul_v2_triple_grad");
+    OP_INOUT_CHECK(context->HasInput("D_DX"), "Input", "D_DX",
+                   "matmul_v2_triple_grad");
+    OP_INOUT_CHECK(context->HasInput("D_DY"), "Input", "D_DY",
+                   "matmul_v2_triple_grad");
+    OP_INOUT_CHECK(context->HasInput("D_DDOut"), "Input", "D_DDOut",
+                   "matmul_v2_triple_grad");
+
+    if (context->HasOutput("D_X_out")) {
+      context->ShareDim("X", "D_X_out");
+    }
+    if (context->HasOutput("D_Y_out")) {
+      context->ShareDim("Y", "D_Y_out");
+    }
+    if (context->HasOutput("D_DOut_out")) {
+      context->ShareDim("DOut", "D_DOut_out");
+    }
+    if (context->HasOutput("D_DDX_out")) {
+      context->ShareDim("X", "D_DDX_out");
+    }
+    if (context->HasOutput("D_DDY_out")) {
+      context->ShareDim("Y", "D_DDY_out");
+    }
+  }
+};
+
+template <typename T>
+class MatMulV2OpTripleGradMaker : public framework::SingleGradOpMaker<T> {
+ public:
+  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
+
+ protected:
+  void Apply(GradOpPtr<T> op) const override {
+    op->SetType("matmul_v2_triple_grad");
+
+    // get input from double grad
+    op->SetInput("X", this->Input("X"));
+    op->SetInput("Y", this->Input("Y"));
+    op->SetInput("DOut", this->Input("DOut"));
+    op->SetInput("DDX", this->Input("DDX"));
+    op->SetInput("DDY", this->Input("DDY"));
+    op->SetInput("D_DX", this->OutputGrad("DX"));
+    op->SetInput("D_DY", this->OutputGrad("DY"));
+    op->SetInput("D_DDOut", this->OutputGrad("DDOut"));
+
+    // set outputs
+    op->SetOutput("D_X_out", this->InputGrad("X"));
+    op->SetOutput("D_Y_out", this->InputGrad("Y"));
+    op->SetOutput("D_DOut_out", this->InputGrad("DOut"));
+    op->SetOutput("D_DDX_out", this->InputGrad("DDX"));
+    op->SetOutput("D_DDY_out", this->InputGrad("DDY"));
+
+    op->SetAttrMap(this->Attrs());
+  }
+};
 }  // namespace operators
 }  // namespace paddle

@@ -359,7 +429,11 @@ REGISTER_OPERATOR(matmul_v2_grad, ops::MatMulV2OpGrad,
                  ops::MatMulV2OpDoubleGradMaker<paddle::framework::OpDesc>,
                  ops::MatMulV2OpDoubleGradMaker<paddle::imperative::OpBase>);

-REGISTER_OPERATOR(matmul_v2_grad_grad, ops::MatMulV2OpDoubleGrad);
+REGISTER_OPERATOR(matmul_v2_grad_grad, ops::MatMulV2OpDoubleGrad,
+                  ops::MatMulV2OpTripleGradMaker<paddle::framework::OpDesc>,
+                  ops::MatMulV2OpTripleGradMaker<paddle::imperative::OpBase>);
+
+REGISTER_OPERATOR(matmul_v2_triple_grad, ops::MatMulV2OpTripleGrad);

 REGISTER_OP_CPU_KERNEL(
    matmul_v2, ops::MatMulV2Kernel<paddle::platform::CPUDeviceContext, float>,
@@ -385,3 +459,12 @@ REGISTER_OP_CPU_KERNEL(
                                  paddle::platform::complex<float>>,
    ops::MatMulV2DoubleGradKernel<paddle::platform::CPUDeviceContext,
                                  paddle::platform::complex<double>>);
+
+REGISTER_OP_CPU_KERNEL(
+    matmul_v2_triple_grad,
+    ops::MatMulV2TripleGradKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::MatMulV2TripleGradKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::MatMulV2TripleGradKernel<paddle::platform::CPUDeviceContext,
+                                  paddle::platform::complex<float>>,
+    ops::MatMulV2TripleGradKernel<paddle::platform::CPUDeviceContext,
+                                  paddle::platform::complex<double>>);
--- a/paddle/fluid/operators/matmul_v2_op.cu
+++ b/paddle/fluid/operators/matmul_v2_op.cu
@@ -40,3 +40,13 @@ REGISTER_OP_CUDA_KERNEL(
                                  paddle::platform::complex<float>>,
    ops::MatMulV2DoubleGradKernel<paddle::platform::CUDADeviceContext,
                                  paddle::platform::complex<double>>);
+
+REGISTER_OP_CUDA_KERNEL(
+    matmul_v2_triple_grad,
+    ops::MatMulV2TripleGradKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::MatMulV2TripleGradKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::MatMulV2TripleGradKernel<plf::CUDADeviceContext, plf::float16>,
+    ops::MatMulV2TripleGradKernel<paddle::platform::CUDADeviceContext,
+                                  paddle::platform::complex<float>>,
+    ops::MatMulV2TripleGradKernel<paddle::platform::CUDADeviceContext,
+                                  paddle::platform::complex<double>>);
--- a/paddle/fluid/operators/matmul_v2_op.h
+++ b/paddle/fluid/operators/matmul_v2_op.h
--- a/python/paddle/fluid/tests/unittests/gradient_checker.py
+++ b/python/paddle/fluid/tests/unittests/gradient_checker.py
@@ -304,7 +304,6 @@ def grad_check(x,
                if b.has_var(xi.name):
                    clone_x.append(b.var(xi.name))
                    break
-
        analytical.append(
            _compute_analytical_jacobian(prog, clone_x, clone_y, place, scope))

@@ -486,7 +485,6 @@ def triple_grad_check(x,
            var_to_np_array_in_scope(scope, place, v.name)
            for v in x_grads_grads
        ]
-
    x += y_grads
    x_init = _as_list(x_init)
    x_init += y_grads_init

--- a/python/paddle/fluid/tests/unittests/test_nn_matmul_v2_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_matmul_v2_grad.py
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -146,5 +146,427 @@ class TestMatmulDoubleGradCheckCase3(unittest.TestCase):
            self.func(p)


+class TestMatmulTripleGradCheckDotCase(unittest.TestCase):
+    def setUp(self):
+        self.init_test()
+
+
+def init_test(self):
+    self.x_shape = [2]
+    self.y_shape = [2]
+    self.transpose_x = False
+    self.transpose_y = False
+
+
+@prog_scope()
+def func(self, place):
+    eps = 0.005
+    dtype = np.float64
+    typename = "float64"
+    x = paddle.static.create_parameter(
+        dtype=typename, shape=self.x_shape, name='x')
+    y = paddle.static.create_parameter(
+        dtype=typename, shape=self.y_shape, name='y')
+    out = paddle.matmul(x, y, self.transpose_x, self.transpose_y, name='out')
+    np.random.seed(2021)
+    x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype)
+    y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype)
+    gradient_checker.triple_grad_check(
+        [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps)
+
+
+def test_grad(self):
+    places = [fluid.CPUPlace()]
+    if core.is_compiled_with_cuda():
+        places.append(fluid.CUDAPlace(0))
+    for p in places:
+        self.func(p)
+
+
+class TestMatmulTripleGradCheckNormalCase1(unittest.TestCase):
+    def setUp(self):
+        self.init_test()
+
+    def init_test(self):
+        self.x_shape = [2, 2]
+        self.y_shape = [2, 2]
+        self.transpose_x = False
+        self.transpose_y = False
+
+    @prog_scope()
+    def func(self, place):
+        eps = 0.005
+        dtype = np.float64
+        typename = "float64"
+        x = paddle.static.create_parameter(
+            dtype=typename, shape=self.x_shape, name='x')
+        y = paddle.static.create_parameter(
+            dtype=typename, shape=self.y_shape, name='y')
+        out = paddle.matmul(
+            x, y, self.transpose_x, self.transpose_y, name='out')
+        np.random.seed(2021)
+        x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype)
+        y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype)
+        gradient_checker.triple_grad_check(
+            [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps)
+
+    def test_grad(self):
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
+class TestMatmulTripleGradCheckNormalCase2(unittest.TestCase):
+    def setUp(self):
+        self.init_test()
+
+    def init_test(self):
+        self.x_shape = [2, 2]
+        self.y_shape = [2, 2]
+        self.transpose_x = True
+        self.transpose_y = False
+
+    @prog_scope()
+    def func(self, place):
+        eps = 0.005
+        dtype = np.float64
+        typename = "float64"
+        x = paddle.static.create_parameter(
+            dtype=typename, shape=self.x_shape, name='x')
+        y = paddle.static.create_parameter(
+            dtype=typename, shape=self.y_shape, name='y')
+        out = paddle.matmul(
+            x, y, self.transpose_x, self.transpose_y, name='out')
+        np.random.seed(2021)
+        x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype)
+        y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype)
+        gradient_checker.triple_grad_check(
+            [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps)
+
+    def test_grad(self):
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
+class TestMatmulTripleGradCheckNormalCase3(unittest.TestCase):
+    def setUp(self):
+        self.init_test()
+
+    def init_test(self):
+        self.x_shape = [2, 2]
+        self.y_shape = [2, 2]
+        self.transpose_x = False
+        self.transpose_y = True
+
+    @prog_scope()
+    def func(self, place):
+        eps = 0.005
+        dtype = np.float64
+        typename = "float64"
+        x = paddle.static.create_parameter(
+            dtype=typename, shape=self.x_shape, name='x')
+        y = paddle.static.create_parameter(
+            dtype=typename, shape=self.y_shape, name='y')
+        out = paddle.matmul(
+            x, y, self.transpose_x, self.transpose_y, name='out')
+        np.random.seed(2021)
+        x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype)
+        y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype)
+        gradient_checker.triple_grad_check(
+            [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps)
+
+    def test_grad(self):
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
+class TestMatmulTripleGradCheckNormalCase4(unittest.TestCase):
+    def setUp(self):
+        self.init_test()
+
+    def init_test(self):
+        self.x_shape = [2, 2]
+        self.y_shape = [2, 2]
+        self.transpose_x = True
+        self.transpose_y = True
+
+    @prog_scope()
+    def func(self, place):
+        eps = 0.005
+        dtype = np.float64
+        typename = "float64"
+        x = paddle.static.create_parameter(
+            dtype=typename, shape=self.x_shape, name='x')
+        y = paddle.static.create_parameter(
+            dtype=typename, shape=self.y_shape, name='y')
+        out = paddle.matmul(
+            x, y, self.transpose_x, self.transpose_y, name='out')
+        np.random.seed(2021)
+        x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype)
+        y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype)
+        gradient_checker.triple_grad_check(
+            [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps)
+
+    def test_grad(self):
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
+class TestMatmulTripleGradCheckBroadcastCase1(unittest.TestCase):
+    def setUp(self):
+        self.init_test()
+
+    def init_test(self):
+        self.x_shape = [3, 2, 2]
+        self.y_shape = [1, 2, 2]
+        self.transpose_x = False
+        self.transpose_y = False
+
+    @prog_scope()
+    def func(self, place):
+        eps = 0.005
+        dtype = np.float64
+        typename = "float64"
+        x = paddle.static.create_parameter(
+            dtype=typename, shape=self.x_shape, name='x')
+        y = paddle.static.create_parameter(
+            dtype=typename, shape=self.y_shape, name='y')
+        out = paddle.matmul(
+            x, y, self.transpose_x, self.transpose_y, name='out')
+        np.random.seed(2021)
+        x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype)
+        y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype)
+        gradient_checker.triple_grad_check(
+            [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps)
+
+    def test_grad(self):
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
+class TestMatmulTripleGradCheckBroadcastCase2(unittest.TestCase):
+    def setUp(self):
+        self.init_test()
+
+    def init_test(self):
+        self.x_shape = [1, 2, 2]
+        self.y_shape = [3, 2, 2]
+        self.transpose_x = False
+        self.transpose_y = False
+
+    @prog_scope()
+    def func(self, place):
+        eps = 0.005
+        dtype = np.float64
+        typename = "float64"
+        x = paddle.static.create_parameter(
+            dtype=typename, shape=self.x_shape, name='x')
+        y = paddle.static.create_parameter(
+            dtype=typename, shape=self.y_shape, name='y')
+        out = paddle.matmul(
+            x, y, self.transpose_x, self.transpose_y, name='out')
+        np.random.seed(2021)
+        x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype)
+        y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype)
+        gradient_checker.triple_grad_check(
+            [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps)
+
+    def test_grad(self):
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
+class TestMatmulTripleGradCheckBroadcastCase3(unittest.TestCase):
+    def setUp(self):
+        self.init_test()
+
+    def init_test(self):
+        self.x_shape = [1, 2, 2]
+        self.y_shape = [3, 2, 2]
+        self.transpose_x = True
+        self.transpose_y = False
+
+    @prog_scope()
+    def func(self, place):
+        eps = 0.005
+        dtype = np.float64
+        typename = "float64"
+        x = paddle.static.create_parameter(
+            dtype=typename, shape=self.x_shape, name='x')
+        y = paddle.static.create_parameter(
+            dtype=typename, shape=self.y_shape, name='y')
+        out = paddle.matmul(
+            x, y, self.transpose_x, self.transpose_y, name='out')
+        np.random.seed(2021)
+        x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype)
+        y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype)
+        gradient_checker.triple_grad_check(
+            [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps)
+
+    def test_grad(self):
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
+class TestMatmulTripleGradCheckBroadcastCase4(unittest.TestCase):
+    def setUp(self):
+        self.init_test()
+
+    def init_test(self):
+        self.x_shape = [1, 2, 2]
+        self.y_shape = [3, 2, 2]
+        self.transpose_x = False
+        self.transpose_y = True
+
+    @prog_scope()
+    def func(self, place):
+        eps = 0.005
+        dtype = np.float64
+        typename = "float64"
+        x = paddle.static.create_parameter(
+            dtype=typename, shape=self.x_shape, name='x')
+        y = paddle.static.create_parameter(
+            dtype=typename, shape=self.y_shape, name='y')
+        out = paddle.matmul(
+            x, y, self.transpose_x, self.transpose_y, name='out')
+        np.random.seed(2021)
+        x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype)
+        y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype)
+        gradient_checker.triple_grad_check(
+            [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps)
+
+    def test_grad(self):
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
+class TestMatmulTripleGradCheckBroadcastCase5(unittest.TestCase):
+    def setUp(self):
+        self.init_test()
+
+    def init_test(self):
+        self.x_shape = [1, 2, 2]
+        self.y_shape = [3, 2, 2]
+        self.transpose_x = True
+        self.transpose_y = True
+
+    @prog_scope()
+    def func(self, place):
+        eps = 0.005
+        dtype = np.float64
+        typename = "float64"
+        x = paddle.static.create_parameter(
+            dtype=typename, shape=self.x_shape, name='x')
+        y = paddle.static.create_parameter(
+            dtype=typename, shape=self.y_shape, name='y')
+        out = paddle.matmul(
+            x, y, self.transpose_x, self.transpose_y, name='out')
+        np.random.seed(2021)
+        x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype)
+        y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype)
+        gradient_checker.triple_grad_check(
+            [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps)
+
+    def test_grad(self):
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
+class TestMatmulTripleGradCheckSpecialCase1(unittest.TestCase):
+    def setUp(self):
+        self.init_test()
+
+    def init_test(self):
+        self.x_shape = [3, 4, 5]
+        self.y_shape = [5]
+        self.transpose_x = False
+        self.transpose_y = False
+
+    @prog_scope()
+    def func(self, place):
+        eps = 0.005
+        dtype = np.float64
+        typename = "float64"
+        x = paddle.static.create_parameter(
+            dtype=typename, shape=self.x_shape, name='x')
+        y = paddle.static.create_parameter(
+            dtype=typename, shape=self.y_shape, name='y')
+        out = paddle.matmul(
+            x, y, self.transpose_x, self.transpose_y, name='out')
+        np.random.seed(2021)
+        x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype)
+        y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype)
+        gradient_checker.triple_grad_check(
+            [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps)
+
+    def test_grad(self):
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
+class TestMatmulTripleGradCheckSpecialCase2(unittest.TestCase):
+    def setUp(self):
+        self.init_test()
+
+    def init_test(self):
+        self.x_shape = [4, 5, 5]
+        self.y_shape = [5]
+        self.transpose_x = True
+        self.transpose_y = False
+
+    @prog_scope()
+    def func(self, place):
+        eps = 0.005
+        dtype = np.float64
+        typename = "float64"
+        x = paddle.static.create_parameter(
+            dtype=typename, shape=self.x_shape, name='x')
+        y = paddle.static.create_parameter(
+            dtype=typename, shape=self.y_shape, name='y')
+        out = paddle.matmul(
+            x, y, self.transpose_x, self.transpose_y, name='out')
+        np.random.seed(2021)
+        x_arr = np.random.uniform(-1, 1, self.x_shape).astype(dtype)
+        y_arr = np.random.uniform(-1, 1, self.y_shape).astype(dtype)
+        gradient_checker.triple_grad_check(
+            [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps)
+
+    def test_grad(self):
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
 if __name__ == "__main__":
    unittest.main()