提交 8e18cde3 编写于 作者: P Peter Maydell 提交者: Aurelien Jarno

target-arm: Fix VLD of single element to all lanes

Fix several bugs in VLD of single element to all lanes:

The "single element to all lanes" form of VLD1 differs from those for
VLD2, VLD3 and VLD4 in that bit 5 indicates whether the loaded element
should be written to one or two Dregs (rather than being a register
stride). Handle this by special-casing VLD1 rather than trying to
have one loop which deals with both VLD1 and 2/3/4.

Handle VLD4.32 with 16 byte alignment specified, rather than UNDEFfing.

UNDEF for the invalid size and alignment combinations.
Signed-off-by: NPeter Maydell <peter.maydell@linaro.org>
Signed-off-by: NAurelien Jarno <aurelien@aurel32.net>
上级 ac60cc18
...@@ -2648,6 +2648,28 @@ static void gen_neon_dup_high16(TCGv var) ...@@ -2648,6 +2648,28 @@ static void gen_neon_dup_high16(TCGv var)
tcg_temp_free_i32(tmp); tcg_temp_free_i32(tmp);
} }
static TCGv gen_load_and_replicate(DisasContext *s, TCGv addr, int size)
{
/* Load a single Neon element and replicate into a 32 bit TCG reg */
TCGv tmp;
switch (size) {
case 0:
tmp = gen_ld8u(addr, IS_USER(s));
gen_neon_dup_u8(tmp, 0);
break;
case 1:
tmp = gen_ld16u(addr, IS_USER(s));
gen_neon_dup_low16(tmp);
break;
case 2:
tmp = gen_ld32(addr, IS_USER(s));
break;
default: /* Avoid compiler warnings. */
abort();
}
return tmp;
}
/* Disassemble a VFP instruction. Returns nonzero if an error occured /* Disassemble a VFP instruction. Returns nonzero if an error occured
(ie. an undefined instruction). */ (ie. an undefined instruction). */
static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn) static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
...@@ -3890,36 +3912,48 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn) ...@@ -3890,36 +3912,48 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
size = (insn >> 10) & 3; size = (insn >> 10) & 3;
if (size == 3) { if (size == 3) {
/* Load single element to all lanes. */ /* Load single element to all lanes. */
if (!load) int a = (insn >> 4) & 1;
if (!load) {
return 1; return 1;
}
size = (insn >> 6) & 3; size = (insn >> 6) & 3;
nregs = ((insn >> 8) & 3) + 1; nregs = ((insn >> 8) & 3) + 1;
stride = (insn & (1 << 5)) ? 2 : 1;
load_reg_var(s, addr, rn); if (size == 3) {
for (reg = 0; reg < nregs; reg++) { if (nregs != 4 || a == 0) {
switch (size) {
case 0:
tmp = gen_ld8u(addr, IS_USER(s));
gen_neon_dup_u8(tmp, 0);
break;
case 1:
tmp = gen_ld16u(addr, IS_USER(s));
gen_neon_dup_low16(tmp);
break;
case 2:
tmp = gen_ld32(addr, IS_USER(s));
break;
case 3:
return 1; return 1;
default: /* Avoid compiler warnings. */
abort();
} }
tcg_gen_addi_i32(addr, addr, 1 << size); /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
tmp2 = tcg_temp_new_i32(); size = 2;
tcg_gen_mov_i32(tmp2, tmp); }
neon_store_reg(rd, 0, tmp2); if (nregs == 1 && a == 1 && size == 0) {
neon_store_reg(rd, 1, tmp); return 1;
rd += stride; }
if (nregs == 3 && a == 1) {
return 1;
}
load_reg_var(s, addr, rn);
if (nregs == 1) {
/* VLD1 to all lanes: bit 5 indicates how many Dregs to write */
tmp = gen_load_and_replicate(s, addr, size);
tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
if (insn & (1 << 5)) {
tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0));
tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1));
}
tcg_temp_free_i32(tmp);
} else {
/* VLD2/3/4 to all lanes: bit 5 indicates register stride */
stride = (insn & (1 << 5)) ? 2 : 1;
for (reg = 0; reg < nregs; reg++) {
tmp = gen_load_and_replicate(s, addr, size);
tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
tcg_temp_free_i32(tmp);
tcg_gen_addi_i32(addr, addr, 1 << size);
rd += stride;
}
} }
stride = (1 << size) * nregs; stride = (1 << size) * nregs;
} else { } else {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册