提交 bbd0968c 编写于 作者: R Richard Henderson 提交者: Peter Maydell

target/arm: Reorganize SVE WHILE

The pseudocode for this operation is an increment + compare loop,
so comparing <= the maximum integer produces an all-true predicate.

Rather than bound in both the inline code and the helper, pass the
helper the number of predicate bits to set instead of the number
of predicate elements to set.
Reported-by: NLaurent Desnogues <laurent.desnogues@gmail.com>
Signed-off-by: NRichard Henderson <richard.henderson@linaro.org>
Reviewed-by: NLaurent Desnogues <laurent.desnogues@gmail.com>
Tested-by: NAlex Bennée <alex.bennee@linaro.org>
Tested-by: NLaurent Desnogues <laurent.desnogues@gmail.com>
Message-id: 20180801123111.3595-4-richard.henderson@linaro.org
Signed-off-by: NPeter Maydell <peter.maydell@linaro.org>
上级 7a31e0c6
...@@ -2846,11 +2846,6 @@ uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc) ...@@ -2846,11 +2846,6 @@ uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc)
return flags; return flags;
} }
/* Scale from predicate element count to bits. */
count <<= esz;
/* Bound to the bits in the predicate. */
count = MIN(count, oprsz * 8);
/* Set all of the requested bits. */ /* Set all of the requested bits. */
for (i = 0; i < count / 64; ++i) { for (i = 0; i < count / 64; ++i) {
d->p[i] = esz_mask; d->p[i] = esz_mask;
......
...@@ -3173,19 +3173,19 @@ static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn) ...@@ -3173,19 +3173,19 @@ static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn) static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
{ {
if (!sve_access_check(s)) { TCGv_i64 op0, op1, t0, t1, tmax;
return true;
}
TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
TCGv_i64 t0 = tcg_temp_new_i64();
TCGv_i64 t1 = tcg_temp_new_i64();
TCGv_i32 t2, t3; TCGv_i32 t2, t3;
TCGv_ptr ptr; TCGv_ptr ptr;
unsigned desc, vsz = vec_full_reg_size(s); unsigned desc, vsz = vec_full_reg_size(s);
TCGCond cond; TCGCond cond;
if (!sve_access_check(s)) {
return true;
}
op0 = read_cpu_reg(s, a->rn, 1);
op1 = read_cpu_reg(s, a->rm, 1);
if (!a->sf) { if (!a->sf) {
if (a->u) { if (a->u) {
tcg_gen_ext32u_i64(op0, op0); tcg_gen_ext32u_i64(op0, op0);
...@@ -3198,32 +3198,47 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn) ...@@ -3198,32 +3198,47 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
/* For the helper, compress the different conditions into a computation /* For the helper, compress the different conditions into a computation
* of how many iterations for which the condition is true. * of how many iterations for which the condition is true.
*
* This is slightly complicated by 0 <= UINT64_MAX, which is nominally
* 2**64 iterations, overflowing to 0. Of course, predicate registers
* aren't that large, so any value >= predicate size is sufficient.
*/ */
t0 = tcg_temp_new_i64();
t1 = tcg_temp_new_i64();
tcg_gen_sub_i64(t0, op1, op0); tcg_gen_sub_i64(t0, op1, op0);
/* t0 = MIN(op1 - op0, vsz). */ tmax = tcg_const_i64(vsz >> a->esz);
tcg_gen_movi_i64(t1, vsz);
tcg_gen_umin_i64(t0, t0, t1);
if (a->eq) { if (a->eq) {
/* Equality means one more iteration. */ /* Equality means one more iteration. */
tcg_gen_addi_i64(t0, t0, 1); tcg_gen_addi_i64(t0, t0, 1);
/* If op1 is max (un)signed integer (and the only time the addition
* above could overflow), then we produce an all-true predicate by
* setting the count to the vector length. This is because the
* pseudocode is described as an increment + compare loop, and the
* max integer would always compare true.
*/
tcg_gen_movi_i64(t1, (a->sf
? (a->u ? UINT64_MAX : INT64_MAX)
: (a->u ? UINT32_MAX : INT32_MAX)));
tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
} }
/* t0 = (condition true ? t0 : 0). */ /* Bound to the maximum. */
tcg_gen_umin_i64(t0, t0, tmax);
tcg_temp_free_i64(tmax);
/* Set the count to zero if the condition is false. */
cond = (a->u cond = (a->u
? (a->eq ? TCG_COND_LEU : TCG_COND_LTU) ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
: (a->eq ? TCG_COND_LE : TCG_COND_LT)); : (a->eq ? TCG_COND_LE : TCG_COND_LT));
tcg_gen_movi_i64(t1, 0); tcg_gen_movi_i64(t1, 0);
tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1); tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
tcg_temp_free_i64(t1);
/* Since we're bounded, pass as a 32-bit type. */
t2 = tcg_temp_new_i32(); t2 = tcg_temp_new_i32();
tcg_gen_extrl_i64_i32(t2, t0); tcg_gen_extrl_i64_i32(t2, t0);
tcg_temp_free_i64(t0); tcg_temp_free_i64(t0);
tcg_temp_free_i64(t1);
/* Scale elements to bits. */
tcg_gen_shli_i32(t2, t2, a->esz);
desc = (vsz / 8) - 2; desc = (vsz / 8) - 2;
desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz); desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册