提交 df1d9f90 编写于 作者: T TensorFlower Gardener

Merge pull request #57979 from ROCmSoftwarePlatform:fixed_gpu_kernel_tiling_test_2

PiperOrigin-RevId: 480958124
......@@ -68,8 +68,17 @@ std::string GpuCodegenTest::MakePlatformSpecificLlvm(absl::string_view input) {
is_built_with_rocm_ ? "@llvm.amdgcn.s.barrier" : "@llvm.nvvm.barrier0"},
{"SHUFFLE", is_built_with_rocm_ ? "i32 @llvm.amdgcn.ds.bpermute"
: "float @llvm.nvvm.shfl.sync.down.f32"},
{"TIDX", is_built_with_rocm_ ? "llvm.amdgcn.workitem.id.x"
: "@llvm.nvvm.read.ptx.sreg.tid.x"}});
{"TIDX", is_built_with_rocm_ ? "@llvm.amdgcn.workitem.id.x"
: "@llvm.nvvm.read.ptx.sreg.tid.x"},
{"LCAL", is_built_with_rocm_ ? "%[[LOGICAL_T1:.*]] = call { i1, i64 } "
"@llvm.amdgcn.if.i64(i1 %[[LOGICAL_T0]])"
: "0"},
{"EXTV",
is_built_with_rocm_
? "%[[LOGICAL_T2:.*]] = extractvalue { i1, i64 } %[[LOGICAL_T1]], 0"
: "0"},
{"BR_CAL", is_built_with_rocm_ ? "br i1 %[[LOGICAL_T2]],"
: "br i1 %[[LOGICAL_T0]]"}});
}
} // namespace gpu
......
......@@ -535,7 +535,9 @@ TEST_F(GpuKernelTilingTest, RowReductionTwoRowsPerWarp) {
; CHECK: %[[TID_LOGICAL:.*]] = and i32 %[[TID_X]], 15
; CHECK: call SHUFFLE
; CHECK: %[[LOGICAL_T0:.*]] = icmp eq i32 %[[TID_LOGICAL]], 0
; CHECK: br i1 %[[LOGICAL_T0]],
; CHECK: LCAL
; CHECK: EXTV
; CHECK: BR_CAL
)";
CompileAndVerifyIr(std::move(hlo_module),
MakePlatformSpecificLlvm(expected_ir),
......@@ -572,8 +574,11 @@ TEST_F(GpuKernelTilingTest, RowReductionFourRowsPerWarp) {
; CHECK: %[[TID_LOGICAL:.*]] = and i32 %[[TID_X]], 7
; CHECK: call SHUFFLE
; CHECK: %[[LOGICAL_T0:.*]] = icmp eq i32 %[[TID_LOGICAL]], 0
; CHECK: br i1 %[[LOGICAL_T0]],
; CHECK: LCAL
; CHECK: EXTV
; CHECK: BR_CAL
)";
CompileAndVerifyIr(std::move(hlo_module),
MakePlatformSpecificLlvm(expected_ir),
/*match_optimized_ir=*/true);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册