提交 d4f75f25 编写于 作者: R Richard Henderson 提交者: Peter Maydell

target/arm: Rewrite vector gather loads

This fixes the endianness problem for softmmu, and moves
the main loop out of a macro and into an inlined function.
Reviewed-by: NPeter Maydell <peter.maydell@linaro.org>
Tested-by: NLaurent Desnogues <laurent.desnogues@gmail.com>
Signed-off-by: NRichard Henderson <richard.henderson@linaro.org>
Message-id: 20181005175350.30752-13-richard.henderson@linaro.org
Signed-off-by: NPeter Maydell <peter.maydell@linaro.org>
上级 28d57f2d
...@@ -1292,69 +1292,111 @@ DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) ...@@ -1292,69 +1292,111 @@ DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbsu_zsu, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldbsu_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldhsu_zsu, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldssu_zsu, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldhsu_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldss_le_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldss_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbss_zsu, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldbss_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldhss_zsu, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldhss_le_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldhss_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbsu_zss, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldbsu_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldhsu_zss, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldhsu_le_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldssu_zss, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldhsu_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldss_le_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldss_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbss_zss, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldbss_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldhss_zss, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldhss_le_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldhss_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbdu_zsu, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldbdu_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldhdu_zsu, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldhdu_le_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldsdu_zsu, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldhdu_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldddu_zsu, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldsdu_le_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldsdu_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_lddd_le_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_lddd_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbds_zsu, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldbds_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldhds_zsu, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldhds_le_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldhds_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldsds_le_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldsds_zsu, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldsds_be_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbdu_zss, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldbdu_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldhdu_zss, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldhdu_le_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldsdu_zss, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldhdu_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldddu_zss, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldsdu_le_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldsdu_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_lddd_le_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_lddd_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbds_zss, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldbds_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldhds_zss, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldhds_le_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldhds_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldsds_le_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldsds_zss, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldsds_be_zss, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbdu_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldbdu_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldhdu_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldhdu_le_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldsdu_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldhdu_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldddu_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldsdu_le_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldsdu_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_lddd_le_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_lddd_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldbds_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldbds_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldhds_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldhds_le_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldhds_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldsds_le_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldsds_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldsds_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32) void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu, TCG_CALL_NO_WG,
......
...@@ -4878,82 +4878,173 @@ DO_STN_2(4, dd, 8, 8) ...@@ -4878,82 +4878,173 @@ DO_STN_2(4, dd, 8, 8)
#undef DO_STN_1 #undef DO_STN_1
#undef DO_STN_2 #undef DO_STN_2
/* Loads with a vector index. */ /*
* Loads with a vector index.
*/
#define DO_LD1_ZPZ_S(NAME, TYPEI, TYPEM, FN) \ /*
void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \ * Load the element at @reg + @reg_ofs, sign or zero-extend as needed.
target_ulong base, uint32_t desc) \ */
{ \ typedef target_ulong zreg_off_fn(void *reg, intptr_t reg_ofs);
intptr_t i, oprsz = simd_oprsz(desc); \
unsigned scale = simd_data(desc); \ static target_ulong off_zsu_s(void *reg, intptr_t reg_ofs)
uintptr_t ra = GETPC(); \ {
for (i = 0; i < oprsz; ) { \ return *(uint32_t *)(reg + H1_4(reg_ofs));
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
do { \
TYPEM m = 0; \
if (pg & 1) { \
target_ulong off = *(TYPEI *)(vm + H1_4(i)); \
m = FN(env, base + (off << scale), ra); \
} \
*(uint32_t *)(vd + H1_4(i)) = m; \
i += 4, pg >>= 4; \
} while (i & 15); \
} \
} }
#define DO_LD1_ZPZ_D(NAME, TYPEI, TYPEM, FN) \ static target_ulong off_zss_s(void *reg, intptr_t reg_ofs)
void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \ {
target_ulong base, uint32_t desc) \ return *(int32_t *)(reg + H1_4(reg_ofs));
{ \ }
intptr_t i, oprsz = simd_oprsz(desc) / 8; \
unsigned scale = simd_data(desc); \ static target_ulong off_zsu_d(void *reg, intptr_t reg_ofs)
uintptr_t ra = GETPC(); \ {
uint64_t *d = vd, *m = vm; uint8_t *pg = vg; \ return (uint32_t)*(uint64_t *)(reg + reg_ofs);
for (i = 0; i < oprsz; i++) { \ }
TYPEM mm = 0; \
if (pg[H1(i)] & 1) { \ static target_ulong off_zss_d(void *reg, intptr_t reg_ofs)
target_ulong off = (TYPEI)m[i]; \ {
mm = FN(env, base + (off << scale), ra); \ return (int32_t)*(uint64_t *)(reg + reg_ofs);
} \
d[i] = mm; \
} \
} }
DO_LD1_ZPZ_S(sve_ldbsu_zsu, uint32_t, uint8_t, cpu_ldub_data_ra) static target_ulong off_zd_d(void *reg, intptr_t reg_ofs)
DO_LD1_ZPZ_S(sve_ldhsu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra) {
DO_LD1_ZPZ_S(sve_ldssu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra) return *(uint64_t *)(reg + reg_ofs);
DO_LD1_ZPZ_S(sve_ldbss_zsu, uint32_t, int8_t, cpu_ldub_data_ra) }
DO_LD1_ZPZ_S(sve_ldhss_zsu, uint32_t, int16_t, cpu_lduw_data_ra)
static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
DO_LD1_ZPZ_S(sve_ldbsu_zss, int32_t, uint8_t, cpu_ldub_data_ra) target_ulong base, uint32_t desc, uintptr_t ra,
DO_LD1_ZPZ_S(sve_ldhsu_zss, int32_t, uint16_t, cpu_lduw_data_ra) zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
DO_LD1_ZPZ_S(sve_ldssu_zss, int32_t, uint32_t, cpu_ldl_data_ra) {
DO_LD1_ZPZ_S(sve_ldbss_zss, int32_t, int8_t, cpu_ldub_data_ra) const int mmu_idx = cpu_mmu_index(env, false);
DO_LD1_ZPZ_S(sve_ldhss_zss, int32_t, int16_t, cpu_lduw_data_ra) intptr_t i, oprsz = simd_oprsz(desc);
unsigned scale = simd_data(desc);
DO_LD1_ZPZ_D(sve_ldbdu_zsu, uint32_t, uint8_t, cpu_ldub_data_ra) ARMVectorReg scratch = { };
DO_LD1_ZPZ_D(sve_ldhdu_zsu, uint32_t, uint16_t, cpu_lduw_data_ra)
DO_LD1_ZPZ_D(sve_ldsdu_zsu, uint32_t, uint32_t, cpu_ldl_data_ra) set_helper_retaddr(ra);
DO_LD1_ZPZ_D(sve_ldddu_zsu, uint32_t, uint64_t, cpu_ldq_data_ra) for (i = 0; i < oprsz; ) {
DO_LD1_ZPZ_D(sve_ldbds_zsu, uint32_t, int8_t, cpu_ldub_data_ra) uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
DO_LD1_ZPZ_D(sve_ldhds_zsu, uint32_t, int16_t, cpu_lduw_data_ra) do {
DO_LD1_ZPZ_D(sve_ldsds_zsu, uint32_t, int32_t, cpu_ldl_data_ra) if (likely(pg & 1)) {
target_ulong off = off_fn(vm, i);
DO_LD1_ZPZ_D(sve_ldbdu_zss, int32_t, uint8_t, cpu_ldub_data_ra) tlb_fn(env, &scratch, i, base + (off << scale), mmu_idx, ra);
DO_LD1_ZPZ_D(sve_ldhdu_zss, int32_t, uint16_t, cpu_lduw_data_ra) }
DO_LD1_ZPZ_D(sve_ldsdu_zss, int32_t, uint32_t, cpu_ldl_data_ra) i += 4, pg >>= 4;
DO_LD1_ZPZ_D(sve_ldddu_zss, int32_t, uint64_t, cpu_ldq_data_ra) } while (i & 15);
DO_LD1_ZPZ_D(sve_ldbds_zss, int32_t, int8_t, cpu_ldub_data_ra) }
DO_LD1_ZPZ_D(sve_ldhds_zss, int32_t, int16_t, cpu_lduw_data_ra) set_helper_retaddr(0);
DO_LD1_ZPZ_D(sve_ldsds_zss, int32_t, int32_t, cpu_ldl_data_ra)
/* Wait until all exceptions have been raised to write back. */
DO_LD1_ZPZ_D(sve_ldbdu_zd, uint64_t, uint8_t, cpu_ldub_data_ra) memcpy(vd, &scratch, oprsz);
DO_LD1_ZPZ_D(sve_ldhdu_zd, uint64_t, uint16_t, cpu_lduw_data_ra) }
DO_LD1_ZPZ_D(sve_ldsdu_zd, uint64_t, uint32_t, cpu_ldl_data_ra)
DO_LD1_ZPZ_D(sve_ldddu_zd, uint64_t, uint64_t, cpu_ldq_data_ra) static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
DO_LD1_ZPZ_D(sve_ldbds_zd, uint64_t, int8_t, cpu_ldub_data_ra) target_ulong base, uint32_t desc, uintptr_t ra,
DO_LD1_ZPZ_D(sve_ldhds_zd, uint64_t, int16_t, cpu_lduw_data_ra) zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
DO_LD1_ZPZ_D(sve_ldsds_zd, uint64_t, int32_t, cpu_ldl_data_ra) {
const int mmu_idx = cpu_mmu_index(env, false);
intptr_t i, oprsz = simd_oprsz(desc) / 8;
unsigned scale = simd_data(desc);
ARMVectorReg scratch = { };
set_helper_retaddr(ra);
for (i = 0; i < oprsz; i++) {
uint8_t pg = *(uint8_t *)(vg + H1(i));
if (likely(pg & 1)) {
target_ulong off = off_fn(vm, i * 8);
tlb_fn(env, &scratch, i * 8, base + (off << scale), mmu_idx, ra);
}
}
set_helper_retaddr(0);
/* Wait until all exceptions have been raised to write back. */
memcpy(vd, &scratch, oprsz * 8);
}
#define DO_LD1_ZPZ_S(MEM, OFS) \
void __attribute__((flatten)) HELPER(sve_ld##MEM##_##OFS) \
(CPUARMState *env, void *vd, void *vg, void *vm, \
target_ulong base, uint32_t desc) \
{ \
sve_ld1_zs(env, vd, vg, vm, base, desc, GETPC(), \
off_##OFS##_s, sve_ld1##MEM##_tlb); \
}
#define DO_LD1_ZPZ_D(MEM, OFS) \
void __attribute__((flatten)) HELPER(sve_ld##MEM##_##OFS) \
(CPUARMState *env, void *vd, void *vg, void *vm, \
target_ulong base, uint32_t desc) \
{ \
sve_ld1_zd(env, vd, vg, vm, base, desc, GETPC(), \
off_##OFS##_d, sve_ld1##MEM##_tlb); \
}
DO_LD1_ZPZ_S(bsu, zsu)
DO_LD1_ZPZ_S(bsu, zss)
DO_LD1_ZPZ_D(bdu, zsu)
DO_LD1_ZPZ_D(bdu, zss)
DO_LD1_ZPZ_D(bdu, zd)
DO_LD1_ZPZ_S(bss, zsu)
DO_LD1_ZPZ_S(bss, zss)
DO_LD1_ZPZ_D(bds, zsu)
DO_LD1_ZPZ_D(bds, zss)
DO_LD1_ZPZ_D(bds, zd)
DO_LD1_ZPZ_S(hsu_le, zsu)
DO_LD1_ZPZ_S(hsu_le, zss)
DO_LD1_ZPZ_D(hdu_le, zsu)
DO_LD1_ZPZ_D(hdu_le, zss)
DO_LD1_ZPZ_D(hdu_le, zd)
DO_LD1_ZPZ_S(hsu_be, zsu)
DO_LD1_ZPZ_S(hsu_be, zss)
DO_LD1_ZPZ_D(hdu_be, zsu)
DO_LD1_ZPZ_D(hdu_be, zss)
DO_LD1_ZPZ_D(hdu_be, zd)
DO_LD1_ZPZ_S(hss_le, zsu)
DO_LD1_ZPZ_S(hss_le, zss)
DO_LD1_ZPZ_D(hds_le, zsu)
DO_LD1_ZPZ_D(hds_le, zss)
DO_LD1_ZPZ_D(hds_le, zd)
DO_LD1_ZPZ_S(hss_be, zsu)
DO_LD1_ZPZ_S(hss_be, zss)
DO_LD1_ZPZ_D(hds_be, zsu)
DO_LD1_ZPZ_D(hds_be, zss)
DO_LD1_ZPZ_D(hds_be, zd)
DO_LD1_ZPZ_S(ss_le, zsu)
DO_LD1_ZPZ_S(ss_le, zss)
DO_LD1_ZPZ_D(sdu_le, zsu)
DO_LD1_ZPZ_D(sdu_le, zss)
DO_LD1_ZPZ_D(sdu_le, zd)
DO_LD1_ZPZ_S(ss_be, zsu)
DO_LD1_ZPZ_S(ss_be, zss)
DO_LD1_ZPZ_D(sdu_be, zsu)
DO_LD1_ZPZ_D(sdu_be, zss)
DO_LD1_ZPZ_D(sdu_be, zd)
DO_LD1_ZPZ_D(sds_le, zsu)
DO_LD1_ZPZ_D(sds_le, zss)
DO_LD1_ZPZ_D(sds_le, zd)
DO_LD1_ZPZ_D(sds_be, zsu)
DO_LD1_ZPZ_D(sds_be, zss)
DO_LD1_ZPZ_D(sds_be, zd)
DO_LD1_ZPZ_D(dd_le, zsu)
DO_LD1_ZPZ_D(dd_le, zss)
DO_LD1_ZPZ_D(dd_le, zd)
DO_LD1_ZPZ_D(dd_be, zsu)
DO_LD1_ZPZ_D(dd_be, zss)
DO_LD1_ZPZ_D(dd_be, zd)
#undef DO_LD1_ZPZ_S
#undef DO_LD1_ZPZ_D
/* First fault loads with a vector index. */ /* First fault loads with a vector index. */
......
...@@ -5077,91 +5077,176 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale, ...@@ -5077,91 +5077,176 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
tcg_temp_free_i32(desc); tcg_temp_free_i32(desc);
} }
/* Indexed by [ff][xs][u][msz]. */ /* Indexed by [be][ff][xs][u][msz]. */
static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = { static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = {
{ { { gen_helper_sve_ldbss_zsu, /* Little-endian */
gen_helper_sve_ldhss_zsu, { { { { gen_helper_sve_ldbss_zsu,
NULL, }, gen_helper_sve_ldhss_le_zsu,
{ gen_helper_sve_ldbsu_zsu, NULL, },
gen_helper_sve_ldhsu_zsu, { gen_helper_sve_ldbsu_zsu,
gen_helper_sve_ldssu_zsu, } }, gen_helper_sve_ldhsu_le_zsu,
{ { gen_helper_sve_ldbss_zss, gen_helper_sve_ldss_le_zsu, } },
gen_helper_sve_ldhss_zss, { { gen_helper_sve_ldbss_zss,
NULL, }, gen_helper_sve_ldhss_le_zss,
{ gen_helper_sve_ldbsu_zss, NULL, },
gen_helper_sve_ldhsu_zss, { gen_helper_sve_ldbsu_zss,
gen_helper_sve_ldssu_zss, } } }, gen_helper_sve_ldhsu_le_zss,
gen_helper_sve_ldss_le_zss, } } },
{ { { gen_helper_sve_ldffbss_zsu,
gen_helper_sve_ldffhss_zsu, /* First-fault */
NULL, }, { { { gen_helper_sve_ldffbss_zsu,
{ gen_helper_sve_ldffbsu_zsu, gen_helper_sve_ldffhss_zsu,
gen_helper_sve_ldffhsu_zsu, NULL, },
gen_helper_sve_ldffssu_zsu, } }, { gen_helper_sve_ldffbsu_zsu,
{ { gen_helper_sve_ldffbss_zss, gen_helper_sve_ldffhsu_zsu,
gen_helper_sve_ldffhss_zss, gen_helper_sve_ldffssu_zsu, } },
NULL, }, { { gen_helper_sve_ldffbss_zss,
{ gen_helper_sve_ldffbsu_zss, gen_helper_sve_ldffhss_zss,
gen_helper_sve_ldffhsu_zss, NULL, },
gen_helper_sve_ldffssu_zss, } } } { gen_helper_sve_ldffbsu_zss,
gen_helper_sve_ldffhsu_zss,
gen_helper_sve_ldffssu_zss, } } } },
/* Big-endian */
{ { { { gen_helper_sve_ldbss_zsu,
gen_helper_sve_ldhss_be_zsu,
NULL, },
{ gen_helper_sve_ldbsu_zsu,
gen_helper_sve_ldhsu_be_zsu,
gen_helper_sve_ldss_be_zsu, } },
{ { gen_helper_sve_ldbss_zss,
gen_helper_sve_ldhss_be_zss,
NULL, },
{ gen_helper_sve_ldbsu_zss,
gen_helper_sve_ldhsu_be_zss,
gen_helper_sve_ldss_be_zss, } } },
/* First-fault */
{ { { gen_helper_sve_ldffbss_zsu,
gen_helper_sve_ldffhss_zsu,
NULL, },
{ gen_helper_sve_ldffbsu_zsu,
gen_helper_sve_ldffhsu_zsu,
gen_helper_sve_ldffssu_zsu, } },
{ { gen_helper_sve_ldffbss_zss,
gen_helper_sve_ldffhss_zss,
NULL, },
{ gen_helper_sve_ldffbsu_zss,
gen_helper_sve_ldffhsu_zss,
gen_helper_sve_ldffssu_zss, } } } },
}; };
/* Note that we overload xs=2 to indicate 64-bit offset. */ /* Note that we overload xs=2 to indicate 64-bit offset. */
static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = { static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = {
{ { { gen_helper_sve_ldbds_zsu, /* Little-endian */
gen_helper_sve_ldhds_zsu, { { { { gen_helper_sve_ldbds_zsu,
gen_helper_sve_ldsds_zsu, gen_helper_sve_ldhds_le_zsu,
NULL, }, gen_helper_sve_ldsds_le_zsu,
{ gen_helper_sve_ldbdu_zsu, NULL, },
gen_helper_sve_ldhdu_zsu, { gen_helper_sve_ldbdu_zsu,
gen_helper_sve_ldsdu_zsu, gen_helper_sve_ldhdu_le_zsu,
gen_helper_sve_ldddu_zsu, } }, gen_helper_sve_ldsdu_le_zsu,
{ { gen_helper_sve_ldbds_zss, gen_helper_sve_lddd_le_zsu, } },
gen_helper_sve_ldhds_zss, { { gen_helper_sve_ldbds_zss,
gen_helper_sve_ldsds_zss, gen_helper_sve_ldhds_le_zss,
NULL, }, gen_helper_sve_ldsds_le_zss,
{ gen_helper_sve_ldbdu_zss, NULL, },
gen_helper_sve_ldhdu_zss, { gen_helper_sve_ldbdu_zss,
gen_helper_sve_ldsdu_zss, gen_helper_sve_ldhdu_le_zss,
gen_helper_sve_ldddu_zss, } }, gen_helper_sve_ldsdu_le_zss,
{ { gen_helper_sve_ldbds_zd, gen_helper_sve_lddd_le_zss, } },
gen_helper_sve_ldhds_zd, { { gen_helper_sve_ldbds_zd,
gen_helper_sve_ldsds_zd, gen_helper_sve_ldhds_le_zd,
NULL, }, gen_helper_sve_ldsds_le_zd,
{ gen_helper_sve_ldbdu_zd, NULL, },
gen_helper_sve_ldhdu_zd, { gen_helper_sve_ldbdu_zd,
gen_helper_sve_ldsdu_zd, gen_helper_sve_ldhdu_le_zd,
gen_helper_sve_ldddu_zd, } } }, gen_helper_sve_ldsdu_le_zd,
gen_helper_sve_lddd_le_zd, } } },
{ { { gen_helper_sve_ldffbds_zsu,
gen_helper_sve_ldffhds_zsu, /* First-fault */
gen_helper_sve_ldffsds_zsu, { { { gen_helper_sve_ldffbds_zsu,
NULL, }, gen_helper_sve_ldffhds_zsu,
{ gen_helper_sve_ldffbdu_zsu, gen_helper_sve_ldffsds_zsu,
gen_helper_sve_ldffhdu_zsu, NULL, },
gen_helper_sve_ldffsdu_zsu, { gen_helper_sve_ldffbdu_zsu,
gen_helper_sve_ldffddu_zsu, } }, gen_helper_sve_ldffhdu_zsu,
{ { gen_helper_sve_ldffbds_zss, gen_helper_sve_ldffsdu_zsu,
gen_helper_sve_ldffhds_zss, gen_helper_sve_ldffddu_zsu, } },
gen_helper_sve_ldffsds_zss, { { gen_helper_sve_ldffbds_zss,
NULL, }, gen_helper_sve_ldffhds_zss,
{ gen_helper_sve_ldffbdu_zss, gen_helper_sve_ldffsds_zss,
gen_helper_sve_ldffhdu_zss, NULL, },
gen_helper_sve_ldffsdu_zss, { gen_helper_sve_ldffbdu_zss,
gen_helper_sve_ldffddu_zss, } }, gen_helper_sve_ldffhdu_zss,
{ { gen_helper_sve_ldffbds_zd, gen_helper_sve_ldffsdu_zss,
gen_helper_sve_ldffhds_zd, gen_helper_sve_ldffddu_zss, } },
gen_helper_sve_ldffsds_zd, { { gen_helper_sve_ldffbds_zd,
NULL, }, gen_helper_sve_ldffhds_zd,
{ gen_helper_sve_ldffbdu_zd, gen_helper_sve_ldffsds_zd,
gen_helper_sve_ldffhdu_zd, NULL, },
gen_helper_sve_ldffsdu_zd, { gen_helper_sve_ldffbdu_zd,
gen_helper_sve_ldffddu_zd, } } } gen_helper_sve_ldffhdu_zd,
gen_helper_sve_ldffsdu_zd,
gen_helper_sve_ldffddu_zd, } } } },
/* Big-endian */
{ { { { gen_helper_sve_ldbds_zsu,
gen_helper_sve_ldhds_be_zsu,
gen_helper_sve_ldsds_be_zsu,
NULL, },
{ gen_helper_sve_ldbdu_zsu,
gen_helper_sve_ldhdu_be_zsu,
gen_helper_sve_ldsdu_be_zsu,
gen_helper_sve_lddd_be_zsu, } },
{ { gen_helper_sve_ldbds_zss,
gen_helper_sve_ldhds_be_zss,
gen_helper_sve_ldsds_be_zss,
NULL, },
{ gen_helper_sve_ldbdu_zss,
gen_helper_sve_ldhdu_be_zss,
gen_helper_sve_ldsdu_be_zss,
gen_helper_sve_lddd_be_zss, } },
{ { gen_helper_sve_ldbds_zd,
gen_helper_sve_ldhds_be_zd,
gen_helper_sve_ldsds_be_zd,
NULL, },
{ gen_helper_sve_ldbdu_zd,
gen_helper_sve_ldhdu_be_zd,
gen_helper_sve_ldsdu_be_zd,
gen_helper_sve_lddd_be_zd, } } },
/* First-fault */
{ { { gen_helper_sve_ldffbds_zsu,
gen_helper_sve_ldffhds_zsu,
gen_helper_sve_ldffsds_zsu,
NULL, },
{ gen_helper_sve_ldffbdu_zsu,
gen_helper_sve_ldffhdu_zsu,
gen_helper_sve_ldffsdu_zsu,
gen_helper_sve_ldffddu_zsu, } },
{ { gen_helper_sve_ldffbds_zss,
gen_helper_sve_ldffhds_zss,
gen_helper_sve_ldffsds_zss,
NULL, },
{ gen_helper_sve_ldffbdu_zss,
gen_helper_sve_ldffhdu_zss,
gen_helper_sve_ldffsdu_zss,
gen_helper_sve_ldffddu_zss, } },
{ { gen_helper_sve_ldffbds_zd,
gen_helper_sve_ldffhds_zd,
gen_helper_sve_ldffsds_zd,
NULL, },
{ gen_helper_sve_ldffbdu_zd,
gen_helper_sve_ldffhdu_zd,
gen_helper_sve_ldffsdu_zd,
gen_helper_sve_ldffddu_zd, } } } },
}; };
static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn) static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
{ {
gen_helper_gvec_mem_scatter *fn = NULL; gen_helper_gvec_mem_scatter *fn = NULL;
int be = s->be_data == MO_BE;
if (!sve_access_check(s)) { if (!sve_access_check(s)) {
return true; return true;
...@@ -5169,10 +5254,10 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn) ...@@ -5169,10 +5254,10 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
switch (a->esz) { switch (a->esz) {
case MO_32: case MO_32:
fn = gather_load_fn32[a->ff][a->xs][a->u][a->msz]; fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz];
break; break;
case MO_64: case MO_64:
fn = gather_load_fn64[a->ff][a->xs][a->u][a->msz]; fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz];
break; break;
} }
assert(fn != NULL); assert(fn != NULL);
...@@ -5185,6 +5270,7 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn) ...@@ -5185,6 +5270,7 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn) static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
{ {
gen_helper_gvec_mem_scatter *fn = NULL; gen_helper_gvec_mem_scatter *fn = NULL;
int be = s->be_data == MO_BE;
TCGv_i64 imm; TCGv_i64 imm;
if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
...@@ -5196,10 +5282,10 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn) ...@@ -5196,10 +5282,10 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
switch (a->esz) { switch (a->esz) {
case MO_32: case MO_32:
fn = gather_load_fn32[a->ff][0][a->u][a->msz]; fn = gather_load_fn32[be][a->ff][0][a->u][a->msz];
break; break;
case MO_64: case MO_64:
fn = gather_load_fn64[a->ff][2][a->u][a->msz]; fn = gather_load_fn64[be][a->ff][2][a->u][a->msz];
break; break;
} }
assert(fn != NULL); assert(fn != NULL);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册