提交 f1064f61 编写于 作者: T Tom Musta 提交者: Alexander Graf

target-ppc: Altivec 2.07: Vector Gather Bits by Bytes

This patch adds the Vector Gather Bits by Bytes Doubleword (vgbbd)
instruction which is introduced in Power ISA Version 2.07.
Signed-off-by: NTom Musta <tommusta@gmail.com>
Signed-off-by: NAlexander Graf <agraf@suse.de>
上级 6f3dab41
......@@ -310,6 +310,7 @@ DEF_HELPER_2(vpopcnth, void, avr, avr)
DEF_HELPER_2(vpopcntw, void, avr, avr)
DEF_HELPER_2(vpopcntd, void, avr, avr)
DEF_HELPER_3(vbpermq, void, avr, avr, avr)
DEF_HELPER_2(vgbbd, void, avr, avr)
DEF_HELPER_2(xsadddp, void, env, i32)
DEF_HELPER_2(xssubdp, void, env, i32)
......
......@@ -1075,6 +1075,282 @@ void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
#undef VBPERMQ_INDEX
#undef VBPERMQ_DW
uint64_t VGBBD_MASKS[256] = {
0x0000000000000000ull, /* 00 */
0x0000000000000080ull, /* 01 */
0x0000000000008000ull, /* 02 */
0x0000000000008080ull, /* 03 */
0x0000000000800000ull, /* 04 */
0x0000000000800080ull, /* 05 */
0x0000000000808000ull, /* 06 */
0x0000000000808080ull, /* 07 */
0x0000000080000000ull, /* 08 */
0x0000000080000080ull, /* 09 */
0x0000000080008000ull, /* 0A */
0x0000000080008080ull, /* 0B */
0x0000000080800000ull, /* 0C */
0x0000000080800080ull, /* 0D */
0x0000000080808000ull, /* 0E */
0x0000000080808080ull, /* 0F */
0x0000008000000000ull, /* 10 */
0x0000008000000080ull, /* 11 */
0x0000008000008000ull, /* 12 */
0x0000008000008080ull, /* 13 */
0x0000008000800000ull, /* 14 */
0x0000008000800080ull, /* 15 */
0x0000008000808000ull, /* 16 */
0x0000008000808080ull, /* 17 */
0x0000008080000000ull, /* 18 */
0x0000008080000080ull, /* 19 */
0x0000008080008000ull, /* 1A */
0x0000008080008080ull, /* 1B */
0x0000008080800000ull, /* 1C */
0x0000008080800080ull, /* 1D */
0x0000008080808000ull, /* 1E */
0x0000008080808080ull, /* 1F */
0x0000800000000000ull, /* 20 */
0x0000800000000080ull, /* 21 */
0x0000800000008000ull, /* 22 */
0x0000800000008080ull, /* 23 */
0x0000800000800000ull, /* 24 */
0x0000800000800080ull, /* 25 */
0x0000800000808000ull, /* 26 */
0x0000800000808080ull, /* 27 */
0x0000800080000000ull, /* 28 */
0x0000800080000080ull, /* 29 */
0x0000800080008000ull, /* 2A */
0x0000800080008080ull, /* 2B */
0x0000800080800000ull, /* 2C */
0x0000800080800080ull, /* 2D */
0x0000800080808000ull, /* 2E */
0x0000800080808080ull, /* 2F */
0x0000808000000000ull, /* 30 */
0x0000808000000080ull, /* 31 */
0x0000808000008000ull, /* 32 */
0x0000808000008080ull, /* 33 */
0x0000808000800000ull, /* 34 */
0x0000808000800080ull, /* 35 */
0x0000808000808000ull, /* 36 */
0x0000808000808080ull, /* 37 */
0x0000808080000000ull, /* 38 */
0x0000808080000080ull, /* 39 */
0x0000808080008000ull, /* 3A */
0x0000808080008080ull, /* 3B */
0x0000808080800000ull, /* 3C */
0x0000808080800080ull, /* 3D */
0x0000808080808000ull, /* 3E */
0x0000808080808080ull, /* 3F */
0x0080000000000000ull, /* 40 */
0x0080000000000080ull, /* 41 */
0x0080000000008000ull, /* 42 */
0x0080000000008080ull, /* 43 */
0x0080000000800000ull, /* 44 */
0x0080000000800080ull, /* 45 */
0x0080000000808000ull, /* 46 */
0x0080000000808080ull, /* 47 */
0x0080000080000000ull, /* 48 */
0x0080000080000080ull, /* 49 */
0x0080000080008000ull, /* 4A */
0x0080000080008080ull, /* 4B */
0x0080000080800000ull, /* 4C */
0x0080000080800080ull, /* 4D */
0x0080000080808000ull, /* 4E */
0x0080000080808080ull, /* 4F */
0x0080008000000000ull, /* 50 */
0x0080008000000080ull, /* 51 */
0x0080008000008000ull, /* 52 */
0x0080008000008080ull, /* 53 */
0x0080008000800000ull, /* 54 */
0x0080008000800080ull, /* 55 */
0x0080008000808000ull, /* 56 */
0x0080008000808080ull, /* 57 */
0x0080008080000000ull, /* 58 */
0x0080008080000080ull, /* 59 */
0x0080008080008000ull, /* 5A */
0x0080008080008080ull, /* 5B */
0x0080008080800000ull, /* 5C */
0x0080008080800080ull, /* 5D */
0x0080008080808000ull, /* 5E */
0x0080008080808080ull, /* 5F */
0x0080800000000000ull, /* 60 */
0x0080800000000080ull, /* 61 */
0x0080800000008000ull, /* 62 */
0x0080800000008080ull, /* 63 */
0x0080800000800000ull, /* 64 */
0x0080800000800080ull, /* 65 */
0x0080800000808000ull, /* 66 */
0x0080800000808080ull, /* 67 */
0x0080800080000000ull, /* 68 */
0x0080800080000080ull, /* 69 */
0x0080800080008000ull, /* 6A */
0x0080800080008080ull, /* 6B */
0x0080800080800000ull, /* 6C */
0x0080800080800080ull, /* 6D */
0x0080800080808000ull, /* 6E */
0x0080800080808080ull, /* 6F */
0x0080808000000000ull, /* 70 */
0x0080808000000080ull, /* 71 */
0x0080808000008000ull, /* 72 */
0x0080808000008080ull, /* 73 */
0x0080808000800000ull, /* 74 */
0x0080808000800080ull, /* 75 */
0x0080808000808000ull, /* 76 */
0x0080808000808080ull, /* 77 */
0x0080808080000000ull, /* 78 */
0x0080808080000080ull, /* 79 */
0x0080808080008000ull, /* 7A */
0x0080808080008080ull, /* 7B */
0x0080808080800000ull, /* 7C */
0x0080808080800080ull, /* 7D */
0x0080808080808000ull, /* 7E */
0x0080808080808080ull, /* 7F */
0x8000000000000000ull, /* 80 */
0x8000000000000080ull, /* 81 */
0x8000000000008000ull, /* 82 */
0x8000000000008080ull, /* 83 */
0x8000000000800000ull, /* 84 */
0x8000000000800080ull, /* 85 */
0x8000000000808000ull, /* 86 */
0x8000000000808080ull, /* 87 */
0x8000000080000000ull, /* 88 */
0x8000000080000080ull, /* 89 */
0x8000000080008000ull, /* 8A */
0x8000000080008080ull, /* 8B */
0x8000000080800000ull, /* 8C */
0x8000000080800080ull, /* 8D */
0x8000000080808000ull, /* 8E */
0x8000000080808080ull, /* 8F */
0x8000008000000000ull, /* 90 */
0x8000008000000080ull, /* 91 */
0x8000008000008000ull, /* 92 */
0x8000008000008080ull, /* 93 */
0x8000008000800000ull, /* 94 */
0x8000008000800080ull, /* 95 */
0x8000008000808000ull, /* 96 */
0x8000008000808080ull, /* 97 */
0x8000008080000000ull, /* 98 */
0x8000008080000080ull, /* 99 */
0x8000008080008000ull, /* 9A */
0x8000008080008080ull, /* 9B */
0x8000008080800000ull, /* 9C */
0x8000008080800080ull, /* 9D */
0x8000008080808000ull, /* 9E */
0x8000008080808080ull, /* 9F */
0x8000800000000000ull, /* A0 */
0x8000800000000080ull, /* A1 */
0x8000800000008000ull, /* A2 */
0x8000800000008080ull, /* A3 */
0x8000800000800000ull, /* A4 */
0x8000800000800080ull, /* A5 */
0x8000800000808000ull, /* A6 */
0x8000800000808080ull, /* A7 */
0x8000800080000000ull, /* A8 */
0x8000800080000080ull, /* A9 */
0x8000800080008000ull, /* AA */
0x8000800080008080ull, /* AB */
0x8000800080800000ull, /* AC */
0x8000800080800080ull, /* AD */
0x8000800080808000ull, /* AE */
0x8000800080808080ull, /* AF */
0x8000808000000000ull, /* B0 */
0x8000808000000080ull, /* B1 */
0x8000808000008000ull, /* B2 */
0x8000808000008080ull, /* B3 */
0x8000808000800000ull, /* B4 */
0x8000808000800080ull, /* B5 */
0x8000808000808000ull, /* B6 */
0x8000808000808080ull, /* B7 */
0x8000808080000000ull, /* B8 */
0x8000808080000080ull, /* B9 */
0x8000808080008000ull, /* BA */
0x8000808080008080ull, /* BB */
0x8000808080800000ull, /* BC */
0x8000808080800080ull, /* BD */
0x8000808080808000ull, /* BE */
0x8000808080808080ull, /* BF */
0x8080000000000000ull, /* C0 */
0x8080000000000080ull, /* C1 */
0x8080000000008000ull, /* C2 */
0x8080000000008080ull, /* C3 */
0x8080000000800000ull, /* C4 */
0x8080000000800080ull, /* C5 */
0x8080000000808000ull, /* C6 */
0x8080000000808080ull, /* C7 */
0x8080000080000000ull, /* C8 */
0x8080000080000080ull, /* C9 */
0x8080000080008000ull, /* CA */
0x8080000080008080ull, /* CB */
0x8080000080800000ull, /* CC */
0x8080000080800080ull, /* CD */
0x8080000080808000ull, /* CE */
0x8080000080808080ull, /* CF */
0x8080008000000000ull, /* D0 */
0x8080008000000080ull, /* D1 */
0x8080008000008000ull, /* D2 */
0x8080008000008080ull, /* D3 */
0x8080008000800000ull, /* D4 */
0x8080008000800080ull, /* D5 */
0x8080008000808000ull, /* D6 */
0x8080008000808080ull, /* D7 */
0x8080008080000000ull, /* D8 */
0x8080008080000080ull, /* D9 */
0x8080008080008000ull, /* DA */
0x8080008080008080ull, /* DB */
0x8080008080800000ull, /* DC */
0x8080008080800080ull, /* DD */
0x8080008080808000ull, /* DE */
0x8080008080808080ull, /* DF */
0x8080800000000000ull, /* E0 */
0x8080800000000080ull, /* E1 */
0x8080800000008000ull, /* E2 */
0x8080800000008080ull, /* E3 */
0x8080800000800000ull, /* E4 */
0x8080800000800080ull, /* E5 */
0x8080800000808000ull, /* E6 */
0x8080800000808080ull, /* E7 */
0x8080800080000000ull, /* E8 */
0x8080800080000080ull, /* E9 */
0x8080800080008000ull, /* EA */
0x8080800080008080ull, /* EB */
0x8080800080800000ull, /* EC */
0x8080800080800080ull, /* ED */
0x8080800080808000ull, /* EE */
0x8080800080808080ull, /* EF */
0x8080808000000000ull, /* F0 */
0x8080808000000080ull, /* F1 */
0x8080808000008000ull, /* F2 */
0x8080808000008080ull, /* F3 */
0x8080808000800000ull, /* F4 */
0x8080808000800080ull, /* F5 */
0x8080808000808000ull, /* F6 */
0x8080808000808080ull, /* F7 */
0x8080808080000000ull, /* F8 */
0x8080808080000080ull, /* F9 */
0x8080808080008000ull, /* FA */
0x8080808080008080ull, /* FB */
0x8080808080800000ull, /* FC */
0x8080808080800080ull, /* FD */
0x8080808080808000ull, /* FE */
0x8080808080808080ull, /* FF */
};
void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
{
int i;
uint64_t t[2] = { 0, 0 };
VECTOR_FOR_INORDER_I(i, u8) {
#if defined(HOST_WORDS_BIGENDIAN)
t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
#else
t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
#endif
}
r->u64[0] = t[0];
r->u64[1] = t[1];
}
#if defined(HOST_WORDS_BIGENDIAN)
#define PKBIG 1
#else
......
......@@ -7371,6 +7371,7 @@ GEN_VXFORM_DUAL(vclzw, PPC_NONE, PPC2_ALTIVEC_207, \
GEN_VXFORM_DUAL(vclzd, PPC_NONE, PPC2_ALTIVEC_207, \
vpopcntd, PPC_NONE, PPC2_ALTIVEC_207)
GEN_VXFORM(vbpermq, 6, 21);
GEN_VXFORM_NOA(vgbbd, 6, 20);
/*** VSX extension ***/
......@@ -10621,6 +10622,7 @@ GEN_VXFORM_DUAL(vclzw, vpopcntw, 1, 30, PPC_NONE, PPC2_ALTIVEC_207),
GEN_VXFORM_DUAL(vclzd, vpopcntd, 1, 31, PPC_NONE, PPC2_ALTIVEC_207),
GEN_VXFORM_207(vbpermq, 6, 21),
GEN_VXFORM_207(vgbbd, 6, 20),
GEN_HANDLER_E(lxsdx, 0x1F, 0x0C, 0x12, 0, PPC_NONE, PPC2_VSX),
GEN_HANDLER_E(lxsiwax, 0x1F, 0x0C, 0x02, 0, PPC_NONE, PPC2_VSX207),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册