diff --git a/paddle/phi/kernels/cpu/matmul_kernel.cc b/paddle/phi/kernels/cpu/matmul_kernel.cc
index c75a50130db767e903decd4b60e979b77a6a7af7..af5d11839e0e2f3fc0ec4edc5d1d60dd9981a9d7 100644
--- a/paddle/phi/kernels/cpu/matmul_kernel.cc
+++ b/paddle/phi/kernels/cpu/matmul_kernel.cc
@@ -25,6 +25,8 @@ PD_REGISTER_KERNEL(matmul,
                    phi::MatmulKernel,
                    float,
                    double,
+                   int32_t,
+                   int64_t,
                    phi::dtype::complex<float>,
                    phi::dtype::complex<double>) {}
 
diff --git a/paddle/phi/kernels/gpu/matmul_kernel.cu b/paddle/phi/kernels/gpu/matmul_kernel.cu
index c5271a4eeece6fe808ca9d82238690a3f754e446..71095bf783b0bb545d0cf686488fafb85c657f9c 100644
--- a/paddle/phi/kernels/gpu/matmul_kernel.cu
+++ b/paddle/phi/kernels/gpu/matmul_kernel.cu
@@ -25,6 +25,8 @@ PD_REGISTER_KERNEL(matmul,
                    phi::MatmulKernel,
                    float,
                    double,
+                   int32_t,
+                   int64_t,
                    phi::dtype::float16,
                    phi::dtype::bfloat16,
                    phi::dtype::complex<float>,
diff --git a/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h b/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h
index 885827a36beab164572b915f02a9f11ce7544906..899ee5f3a497bd26fe2f70effacaca012fafcc68 100644
--- a/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h
@@ -97,13 +97,14 @@ static DenseTensor FoldHeadAndLastDims(const Context& dev_ctx,
 }
 
 template <typename Context, typename T>
-void MatMul(const Context& dev_ctx,
-            const DenseTensor& a,
-            bool trans_a,
-            const DenseTensor& b,
-            bool trans_b,
-            DenseTensor* out,
-            bool flag = false) {
+typename std::enable_if<!std::is_integral<T>::value>::type MatMul(
+    const Context& dev_ctx,
+    const DenseTensor& a,
+    bool trans_a,
+    const DenseTensor& b,
+    bool trans_b,
+    DenseTensor* out,
+    bool flag = false) {
   dev_ctx.template Alloc<T>(out);
   auto blas = phi::funcs::GetBlas<Context, T>(dev_ctx);
   auto mat_dim_a = phi::funcs::CreateMatrixDescriptor(a.dims(), 0, trans_a);
diff --git a/paddle/phi/kernels/impl/matmul_kernel_impl.h b/paddle/phi/kernels/impl/matmul_kernel_impl.h
index a77fbd961312f9daa4c27d7c294f9967ddfaa587..e680e164e623d12a186c0f5ef2ed7daccc7eb95c 100644
--- a/paddle/phi/kernels/impl/matmul_kernel_impl.h
+++ b/paddle/phi/kernels/impl/matmul_kernel_impl.h
@@ -19,6 +19,7 @@ limitations under the License. */
 #include "paddle/phi/common/memory_utils.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/kernels/autotune/cache_base.h"
+#include "paddle/phi/kernels/cast_kernel.h"
 #include "paddle/phi/kernels/funcs/blas/blas.h"
 #include "paddle/phi/kernels/funcs/blas/blaslt_impl.cu.h"
 #include "paddle/phi/kernels/funcs/complex_functors.h"
@@ -1078,6 +1079,38 @@ void MatMulInt8Function(const Context& ctx,
 #endif
 }
 
+template <typename Context, typename T>
+typename std::enable_if<std::is_integral<T>::value>::type
+MatmulJudgeDtypeKernel(const Context& ctx,
+                       const DenseTensor& x,
+                       const DenseTensor& y,
+                       const std::vector<std::int64_t>& x_dims,
+                       const std::vector<std::int64_t>& y_dims,
+                       DenseTensor* out,
+                       bool transpose_x,
+                       bool transpose_y) {
+  auto x_tmp = phi::Cast<T, Context>(ctx, x, phi::DataType::FLOAT32);
+  auto y_tmp = phi::Cast<T, Context>(ctx, y, phi::DataType::FLOAT32);
+  DenseTensor out_tmp;
+  MatMulFunction<Context, float>(
+      ctx, x_tmp, y_tmp, x_dims, y_dims, &out_tmp, transpose_x, transpose_y);
+  phi::CastKernel<float>(ctx, out_tmp, x.dtype(), out);
+}
+
+template <typename Context, typename T>
+typename std::enable_if<!std::is_integral<T>::value>::type
+MatmulJudgeDtypeKernel(const Context& ctx,
+                       const DenseTensor& x,
+                       const DenseTensor& y,
+                       const std::vector<std::int64_t>& x_dims,
+                       const std::vector<std::int64_t>& y_dims,
+                       DenseTensor* out,
+                       bool transpose_x,
+                       bool transpose_y) {
+  MatMulFunction<Context, T>(
+      ctx, x, y, x_dims, y_dims, out, transpose_x, transpose_y);
+}
+
 template <typename T, typename Context>
 void MatmulKernel(const Context& ctx,
                   const DenseTensor& x,
@@ -1097,7 +1130,7 @@ void MatmulKernel(const Context& ctx,
                                    " but reviced dims size is 0. "));
   const std::vector<std::int64_t> x_dims = vectorize(x.dims());
   const std::vector<std::int64_t> y_dims = vectorize(y.dims());
-  MatMulFunction<Context, T>(
+  MatmulJudgeDtypeKernel<Context, T>(
       ctx, x, y, x_dims, y_dims, out, transpose_x, transpose_y);
 }
 
diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
index 157349e8ff013aefa149e10b6c327cb61fcdbb65..2dcfcb5b08ff6d921e8be30e7e4b3d8601e937b4 100644
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -2315,7 +2315,10 @@ def outer(x, y, name=None):
             var_names = {'x': x, 'y': y}
             for name, val in var_names.items():
                 check_variable_and_dtype(
-                    val, name, ['float16', 'float32', 'float64'], 'inner'
+                    val,
+                    name,
+                    ['float16', 'float32', 'float64', 'int32', 'int64'],
+                    'outer',
                 )
 
         __check_input(nx, ny)
diff --git a/test/legacy_test/test_matmul_v2_op.py b/test/legacy_test/test_matmul_v2_op.py
index 7869042f507e33a2a50a9ad1fd8a907959ef792a..fa939fd29d48e9de0feb644d5d89be751034158a 100644
--- a/test/legacy_test/test_matmul_v2_op.py
+++ b/test/legacy_test/test_matmul_v2_op.py
@@ -712,6 +712,110 @@ class TestMatMulTypePromotion(TestComplexMatMulOp):
         self.out = np.dot(self.x, self.y)
 
 
+class TestInt32MatmulOp(OpTest):
+    def setUp(self):
+        self.op_type = "matmul_v2"
+        self.python_api = paddle.tensor.matmul
+        self.init_base_dtype()
+        self.init_input_output()
+
+        self.inputs = {
+            'X': OpTest.np_dtype_to_fluid_dtype(self.x),
+            'Y': OpTest.np_dtype_to_fluid_dtype(self.y),
+        }
+        self.attrs = {'axis': -1, 'use_mkldnn': False}
+        self.outputs = {'Out': self.out}
+
+    def init_base_dtype(self):
+        self.dtype = np.int32
+
+    def init_input_output(self):
+        self.x = np.random.random((10, 10)).astype(self.dtype)
+        self.y = np.random.random((10, 10)).astype(self.dtype)
+        self.out = np.matmul(self.x, self.y)
+
+    def test_check_output(self):
+        self.check_output(check_cinn=False)
+
+
+class TestInt32MatMulOpBroadcast(OpTest):
+    def setUp(self):
+        self.op_type = "matmul_v2"
+        self.python_api = paddle.tensor.matmul
+        self.init_base_dtype()
+        self.init_input_output()
+
+        self.inputs = {
+            'X': OpTest.np_dtype_to_fluid_dtype(self.x),
+            'Y': OpTest.np_dtype_to_fluid_dtype(self.y),
+        }
+        self.attrs = {'axis': -1, 'use_mkldnn': False}
+        self.outputs = {'Out': self.out}
+
+    def init_base_dtype(self):
+        self.dtype = np.int32
+
+    def init_input_output(self):
+        self.x = np.random.random((10, 2, 5)).astype(self.dtype)
+        self.y = np.random.random((5, 20)).astype(self.dtype)
+        self.out = np.matmul(self.x, self.y)
+
+    def test_check_output(self):
+        self.check_output(check_cinn=False)
+
+
+class TestInt64MatmulOp(OpTest):
+    def setUp(self):
+        self.op_type = "matmul_v2"
+        self.python_api = paddle.tensor.matmul
+        self.init_base_dtype()
+        self.init_input_output()
+
+        self.inputs = {
+            'X': OpTest.np_dtype_to_fluid_dtype(self.x),
+            'Y': OpTest.np_dtype_to_fluid_dtype(self.y),
+        }
+        self.attrs = {'axis': -1, 'use_mkldnn': False}
+        self.outputs = {'Out': self.out}
+
+    def init_base_dtype(self):
+        self.dtype = np.int64
+
+    def init_input_output(self):
+        self.x = np.random.random((10, 10)).astype(self.dtype)
+        self.y = np.random.random((10, 10)).astype(self.dtype)
+        self.out = np.matmul(self.x, self.y)
+
+    def test_check_output(self):
+        self.check_output(check_cinn=False)
+
+
+class TestInt64MatMulOpBroadcast(OpTest):
+    def setUp(self):
+        self.op_type = "matmul_v2"
+        self.python_api = paddle.tensor.matmul
+        self.init_base_dtype()
+        self.init_input_output()
+
+        self.inputs = {
+            'X': OpTest.np_dtype_to_fluid_dtype(self.x),
+            'Y': OpTest.np_dtype_to_fluid_dtype(self.y),
+        }
+        self.attrs = {'axis': -1, 'use_mkldnn': False}
+        self.outputs = {'Out': self.out}
+
+    def init_base_dtype(self):
+        self.dtype = np.int64
+
+    def init_input_output(self):
+        self.x = np.random.random((10, 2, 5)).astype(self.dtype)
+        self.y = np.random.random((5, 20)).astype(self.dtype)
+        self.out = np.matmul(self.x, self.y)
+
+    def test_check_output(self):
+        self.check_output(check_cinn=False)
+
+
 class TestMatmulop(unittest.TestCase):
     def func_dygraph_matmul(self):
         paddle.disable_static()
diff --git a/test/legacy_test/test_outer.py b/test/legacy_test/test_outer.py
index 3bbe20b7b5b573a2b5907ffec2c01b5cebf0a9c0..5ce564509d46e319824bfe8c05ad1d2fd0c3fcbf 100644
--- a/test/legacy_test/test_outer.py
+++ b/test/legacy_test/test_outer.py
@@ -74,6 +74,18 @@ class TestMultiplyApi(unittest.TestCase):
         res = self._run_static_graph_case(x_data, y_data)
         np.testing.assert_allclose(res, np.outer(x_data, y_data), rtol=1e-05)
 
+        # test static computation graph: 1-d int32 array
+        x_data = np.random.rand(50).astype(np.int32)
+        y_data = np.random.rand(50).astype(np.int32)
+        res = self._run_static_graph_case(x_data, y_data)
+        np.testing.assert_allclose(res, np.outer(x_data, y_data), rtol=1e-05)
+
+        # test static computation graph: 1-d int64 array
+        x_data = np.random.rand(50).astype(np.int64)
+        y_data = np.random.rand(50).astype(np.int64)
+        res = self._run_static_graph_case(x_data, y_data)
+        np.testing.assert_allclose(res, np.outer(x_data, y_data), rtol=1e-05)
+
         # test dynamic computation graph: 3-d array
         x_data = np.random.rand(5, 10, 10).astype(np.float64)
         y_data = np.random.rand(2, 10).astype(np.float64)
@@ -112,6 +124,18 @@ class TestMultiplyApi(unittest.TestCase):
         res = self._run_dynamic_graph_case(x_data, y_data)
         np.testing.assert_allclose(res, np.outer(x_data, y_data), rtol=1e-05)
 
+        # test dynamic computation graph: 3-d int32 array
+        x_data = np.random.rand(5, 10, 10).astype(np.int32)
+        y_data = np.random.rand(2, 10).astype(np.int32)
+        res = self._run_dynamic_graph_case(x_data, y_data)
+        np.testing.assert_allclose(res, np.outer(x_data, y_data), rtol=1e-05)
+
+        # test dynamic computation graph: 3-d int64 array
+        x_data = np.random.rand(5, 10, 10).astype(np.int64)
+        y_data = np.random.rand(2, 10).astype(np.int64)
+        res = self._run_dynamic_graph_case(x_data, y_data)
+        np.testing.assert_allclose(res, np.outer(x_data, y_data), rtol=1e-05)
+
 
 class TestMultiplyError(unittest.TestCase):
     def test_errors(self):