diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.cc
index 73fdb5c11b56c575c6582eef7bd0c0d90410bfab..e4d0a6c00a3fba41b63955dbeed90047e290f723 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.cc
@@ -18,6 +18,7 @@
 #include <algorithm>
 #include "backend/session/anf_runtime_algorithm.h"
 #include "backend/optimizer/common/helper.h"
+#include "backend/kernel_compiler/common_utils.h"
 
 namespace mindspore {
 namespace kernel {
@@ -75,15 +76,7 @@ void SetAkgAttrsForCast(const AnfNodePtr &anf_node) {
 
   std::string dst_type;
   TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, 0);
-  if (output_type == kFloat32->type_id()) {
-    dst_type = "float32";
-  } else if (output_type == kFloat16->type_id()) {
-    dst_type = "float16";
-  } else if (output_type == kInt32->type_id()) {
-    dst_type = "int32";
-  } else {
-    MS_LOG(WARNING) << "Unknown cast_to type: " << TypeIdToType(output_type)->ToString();
-  }
+  dst_type = TypeId2String(output_type);
   AnfAlgo::SetNodeAttr("dst_type", MakeValue(dst_type), anf_node);
 }
 
diff --git a/mindspore/ops/_op_impl/akg/gpu/cast.py b/mindspore/ops/_op_impl/akg/gpu/cast.py
index c8aef249cd9420b18c534b8ee4b0b2e9f9057981..3c9ffa897408b2557c7e7b8842ec03314896a6f8 100644
--- a/mindspore/ops/_op_impl/akg/gpu/cast.py
+++ b/mindspore/ops/_op_impl/akg/gpu/cast.py
@@ -21,10 +21,39 @@ cast_op_info = AkgGpuRegOp("Cast") \
     .output(0, "output") \
     .attr("dst_type", "required", "str") \
     .dtype_format(DataType.F16_Default, DataType.F32_Default) \
-    .dtype_format(DataType.F32_Default, DataType.F16_Default) \
-    .dtype_format(DataType.F32_Default, DataType.I32_Default) \
+    .dtype_format(DataType.F16_Default, DataType.I32_Default) \
+    .dtype_format(DataType.F16_Default, DataType.F64_Default) \
+    .dtype_format(DataType.I32_Default, DataType.F16_Default) \
     .dtype_format(DataType.I32_Default, DataType.F32_Default) \
+    .dtype_format(DataType.I32_Default, DataType.I8_Default) \
+    .dtype_format(DataType.I32_Default, DataType.U8_Default) \
+    .dtype_format(DataType.I32_Default, DataType.BOOL_Default) \
+    .dtype_format(DataType.I8_Default, DataType.F64_Default) \
+    .dtype_format(DataType.I8_Default, DataType.F32_Default) \
+    .dtype_format(DataType.I8_Default, DataType.F16_Default) \
+    .dtype_format(DataType.I8_Default, DataType.I32_Default) \
+    .dtype_format(DataType.I8_Default, DataType.I16_Default) \
+    .dtype_format(DataType.I8_Default, DataType.I64_Default) \
     .dtype_format(DataType.BOOL_Default, DataType.F32_Default) \
+    .dtype_format(DataType.BOOL_Default, DataType.F16_Default) \
+    .dtype_format(DataType.BOOL_Default, DataType.F64_Default) \
+    .dtype_format(DataType.BOOL_Default, DataType.I8_Default) \
+    .dtype_format(DataType.BOOL_Default, DataType.I16_Default) \
+    .dtype_format(DataType.BOOL_Default, DataType.I32_Default) \
+    .dtype_format(DataType.BOOL_Default, DataType.I64_Default) \
+    .dtype_format(DataType.U8_Default, DataType.F32_Default) \
+    .dtype_format(DataType.U8_Default, DataType.F16_Default) \
+    .dtype_format(DataType.U8_Default, DataType.I32_Default) \
+    .dtype_format(DataType.I16_Default, DataType.F64_Default) \
+    .dtype_format(DataType.I16_Default, DataType.F32_Default) \
+    .dtype_format(DataType.I16_Default, DataType.F16_Default) \
+    .dtype_format(DataType.I16_Default, DataType.I32_Default) \
+    .dtype_format(DataType.I16_Default, DataType.I64_Default) \
+    .dtype_format(DataType.I64_Default, DataType.F64_Default) \
+    .dtype_format(DataType.I16_Default, DataType.F32_Default) \
+    .dtype_format(DataType.I16_Default, DataType.F16_Default) \
+    .dtype_format(DataType.F32_Default, DataType.I32_Default) \
+    .dtype_format(DataType.F32_Default, DataType.F16_Default) \
     .get_op_info()
 
 
diff --git a/tests/st/ops/gpu/test_cast_op.py b/tests/st/ops/gpu/test_cast_op.py
index 793d92d7bc439df1df02d422d65b35ed9fcd1ed4..b3b48fcfa0f5040c033f703924c8e3e037265ee8 100644
--- a/tests/st/ops/gpu/test_cast_op.py
+++ b/tests/st/ops/gpu/test_cast_op.py
@@ -70,3 +70,275 @@ def test_cast1():
     assert type0 == 'float32'
     type1 = output[1].asnumpy().dtype
     assert type1 == 'float32'
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_cast2():
+    x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.float16))
+    t0 = mstype.int32
+    x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.float16))
+    t1 = mstype.float64
+
+    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
+    net = Net(t0, t1)
+    output = net(x0, x1)
+    type0 = output[0].asnumpy().dtype
+    assert type0 == 'int32'
+    type1 = output[1].asnumpy().dtype
+    assert type1 == 'float64'
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_cast3():
+    x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.float16))
+    t0 = mstype.int32
+    x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.float32))
+    t1 = mstype.int32
+
+    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
+    net = Net(t0, t1)
+    output = net(x0, x1)
+    type0 = output[0].asnumpy().dtype
+    assert type0 == 'int32'
+    type1 = output[1].asnumpy().dtype
+    assert type1 == 'int32'
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_cast4():
+    x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int32))
+    t0 = mstype.float16
+    x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int32))
+    t1 = mstype.int8
+
+    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
+    net = Net(t0, t1)
+    output = net(x0, x1)
+    type0 = output[0].asnumpy().dtype
+    assert type0 == 'float16'
+    type1 = output[1].asnumpy().dtype
+    assert type1 == 'int8'
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_cast5():
+    x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int32))
+    t0 = mstype.uint8
+    x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int32))
+    t1 = mstype.bool_
+
+    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
+    net = Net(t0, t1)
+    output = net(x0, x1)
+    type0 = output[0].asnumpy().dtype
+    assert type0 == 'uint8'
+    type1 = output[1].asnumpy().dtype
+    assert type1 == 'bool'
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_cast6():
+    x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int8))
+    t0 = mstype.float64
+    x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int8))
+    t1 = mstype.float32
+
+    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
+    net = Net(t0, t1)
+    output = net(x0, x1)
+    type0 = output[0].asnumpy().dtype
+    assert type0 == 'float64'
+    type1 = output[1].asnumpy().dtype
+    assert type1 == 'float32'
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_cast7():
+    x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int8))
+    t0 = mstype.float32
+    x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int8))
+    t1 = mstype.float16
+
+    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
+    net = Net(t0, t1)
+    output = net(x0, x1)
+    type0 = output[0].asnumpy().dtype
+    assert type0 == 'float32'
+    type1 = output[1].asnumpy().dtype
+    assert type1 == 'float16'
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_cast8():
+    x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int8))
+    t0 = mstype.int32
+    x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int8))
+    t1 = mstype.int16
+
+    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
+    net = Net(t0, t1)
+    output = net(x0, x1)
+    type0 = output[0].asnumpy().dtype
+    assert type0 == 'int32'
+    type1 = output[1].asnumpy().dtype
+    assert type1 == 'int16'
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_cast9():
+    x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int8))
+    t0 = mstype.int64
+    x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.bool))
+    t1 = mstype.float16
+
+    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
+    net = Net(t0, t1)
+    output = net(x0, x1)
+    type0 = output[0].asnumpy().dtype
+    assert type0 == 'int64'
+    type1 = output[1].asnumpy().dtype
+    assert type1 == 'float16'
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_cast10():
+    x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.bool))
+    t0 = mstype.int8
+    x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.bool))
+    t1 = mstype.float64
+
+    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
+    net = Net(t0, t1)
+    output = net(x0, x1)
+    type0 = output[0].asnumpy().dtype
+    assert type0 == 'int8'
+    type1 = output[1].asnumpy().dtype
+    assert type1 == 'float64'
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_cast11():
+    x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.bool))
+    t0 = mstype.int16
+    x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.bool))
+    t1 = mstype.int32
+
+    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
+    net = Net(t0, t1)
+    output = net(x0, x1)
+    type0 = output[0].asnumpy().dtype
+    assert type0 == 'int16'
+    type1 = output[1].asnumpy().dtype
+    assert type1 == 'int32'
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_cast12():
+    x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.bool))
+    t0 = mstype.int64
+    x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.uint8))
+    t1 = mstype.float32
+
+    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
+    net = Net(t0, t1)
+    output = net(x0, x1)
+    type0 = output[0].asnumpy().dtype
+    assert type0 == 'int64'
+    type1 = output[1].asnumpy().dtype
+    assert type1 == 'float32'
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_cast13():
+    x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.uint8))
+    t0 = mstype.int32
+    x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.uint8))
+    t1 = mstype.float16
+
+    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
+    net = Net(t0, t1)
+    output = net(x0, x1)
+    type0 = output[0].asnumpy().dtype
+    assert type0 == 'int32'
+    type1 = output[1].asnumpy().dtype
+    assert type1 == 'float16'
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_cast14():
+    x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int16))
+    t0 = mstype.float64
+    x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int16))
+    t1 = mstype.float32
+
+    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
+    net = Net(t0, t1)
+    output = net(x0, x1)
+    type0 = output[0].asnumpy().dtype
+    assert type0 == 'float64'
+    type1 = output[1].asnumpy().dtype
+    assert type1 == 'float32'
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_cast15():
+    x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int16))
+    t0 = mstype.float16
+    x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int16))
+    t1 = mstype.int32
+
+    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
+    net = Net(t0, t1)
+    output = net(x0, x1)
+    type0 = output[0].asnumpy().dtype
+    assert type0 == 'float16'
+    type1 = output[1].asnumpy().dtype
+    assert type1 == 'int32'
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_cast16():
+    x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int16))
+    t0 = mstype.float16
+    x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int64))
+    t1 = mstype.float64
+
+    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
+    net = Net(t0, t1)
+    output = net(x0, x1)
+    type0 = output[0].asnumpy().dtype
+    assert type0 == 'float16'
+    type1 = output[1].asnumpy().dtype
+    assert type1 == 'float64'
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_cast17():
+    x0 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int16))
+    t0 = mstype.float32
+    x1 = Tensor(np.arange(24).reshape((4, 3, 2)).astype(np.int16))
+    t1 = mstype.float16
+
+    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
+    net = Net(t0, t1)
+    output = net(x0, x1)
+    type0 = output[0].asnumpy().dtype
+    assert type0 == 'float32'
+    type1 = output[1].asnumpy().dtype
+    assert type1 == 'float16'