提交 4e624eda 编写于 作者: P Peter Maydell

target-arm: add support for v8 VMULL.P64 instruction

Add support for the VMULL.P64 polynomial 64x64 to 128 bit multiplication
instruction in the A32/T32 instruction sets; this is part of the v8
Crypto Extensions.

To do this we have to move the neon_pmull_64_{lo,hi} helpers from
helper-a64.c into neon_helper.c so they can be used by the AArch32
translator.
Inspired-by: NArd Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: NPeter Maydell <peter.maydell@linaro.org>
Message-id: 1401386724-26529-4-git-send-email-peter.maydell@linaro.org
上级 526d0096
...@@ -468,6 +468,7 @@ static uint32_t get_elf_hwcap2(void) ...@@ -468,6 +468,7 @@ static uint32_t get_elf_hwcap2(void)
uint32_t hwcaps = 0; uint32_t hwcaps = 0;
GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP2_ARM_AES); GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP2_ARM_AES);
GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP2_ARM_PMULL);
GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP2_ARM_SHA1); GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP2_ARM_SHA1);
GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP2_ARM_SHA2); GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP2_ARM_SHA2);
GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP2_ARM_CRC32); GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP2_ARM_CRC32);
......
...@@ -319,6 +319,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) ...@@ -319,6 +319,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
set_feature(env, ARM_FEATURE_V8_AES); set_feature(env, ARM_FEATURE_V8_AES);
set_feature(env, ARM_FEATURE_V8_SHA1); set_feature(env, ARM_FEATURE_V8_SHA1);
set_feature(env, ARM_FEATURE_V8_SHA256); set_feature(env, ARM_FEATURE_V8_SHA256);
set_feature(env, ARM_FEATURE_V8_PMULL);
} }
if (arm_feature(env, ARM_FEATURE_V7)) { if (arm_feature(env, ARM_FEATURE_V7)) {
set_feature(env, ARM_FEATURE_VAPA); set_feature(env, ARM_FEATURE_VAPA);
......
...@@ -637,6 +637,7 @@ enum arm_features { ...@@ -637,6 +637,7 @@ enum arm_features {
ARM_FEATURE_EL3, /* has EL3 Secure monitor support */ ARM_FEATURE_EL3, /* has EL3 Secure monitor support */
ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */ ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */
ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */ ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */
ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */
}; };
static inline int arm_feature(CPUARMState *env, int feature) static inline int arm_feature(CPUARMState *env, int feature)
......
...@@ -186,36 +186,6 @@ uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices, ...@@ -186,36 +186,6 @@ uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
return result; return result;
} }
/* Helper function for 64 bit polynomial multiply case:
* perform PolynomialMult(op1, op2) and return either the top or
* bottom half of the 128 bit result.
*/
uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
{
int bitnum;
uint64_t res = 0;
for (bitnum = 0; bitnum < 64; bitnum++) {
if (op1 & (1ULL << bitnum)) {
res ^= op2 << bitnum;
}
}
return res;
}
uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
{
int bitnum;
uint64_t res = 0;
/* bit 0 of op1 can't influence the high 64 bits at all */
for (bitnum = 1; bitnum < 64; bitnum++) {
if (op1 & (1ULL << bitnum)) {
res ^= op2 >> (64 - bitnum);
}
}
return res;
}
/* 64bit/double versions of the neon float compare functions */ /* 64bit/double versions of the neon float compare functions */
uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp) uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
{ {
......
...@@ -28,8 +28,6 @@ DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr) ...@@ -28,8 +28,6 @@ DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr) DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr) DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32) DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr) DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr) DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr) DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
......
...@@ -525,6 +525,9 @@ DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) ...@@ -525,6 +525,9 @@ DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_2(dc_zva, void, env, i64) DEF_HELPER_2(dc_zva, void, env, i64)
DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
#ifdef TARGET_AARCH64 #ifdef TARGET_AARCH64
#include "helper-a64.h" #include "helper-a64.h"
#endif #endif
...@@ -2211,3 +2211,33 @@ void HELPER(neon_zip16)(CPUARMState *env, uint32_t rd, uint32_t rm) ...@@ -2211,3 +2211,33 @@ void HELPER(neon_zip16)(CPUARMState *env, uint32_t rd, uint32_t rm)
env->vfp.regs[rm] = make_float64(m0); env->vfp.regs[rm] = make_float64(m0);
env->vfp.regs[rd] = make_float64(d0); env->vfp.regs[rd] = make_float64(d0);
} }
/* Helper function for 64 bit polynomial multiply case:
* perform PolynomialMult(op1, op2) and return either the top or
* bottom half of the 128 bit result.
*/
uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
{
int bitnum;
uint64_t res = 0;
for (bitnum = 0; bitnum < 64; bitnum++) {
if (op1 & (1ULL << bitnum)) {
res ^= op2 << bitnum;
}
}
return res;
}
uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
{
int bitnum;
uint64_t res = 0;
/* bit 0 of op1 can't influence the high 64 bits at all */
for (bitnum = 1; bitnum < 64; bitnum++) {
if (op1 & (1ULL << bitnum)) {
res ^= op2 >> (64 - bitnum);
}
}
return res;
}
...@@ -5977,7 +5977,7 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins ...@@ -5977,7 +5977,7 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
{0, 0, 0, 9}, /* VQDMLSL */ {0, 0, 0, 9}, /* VQDMLSL */
{0, 0, 0, 0}, /* Integer VMULL */ {0, 0, 0, 0}, /* Integer VMULL */
{0, 0, 0, 1}, /* VQDMULL */ {0, 0, 0, 1}, /* VQDMULL */
{0, 0, 0, 15}, /* Polynomial VMULL */ {0, 0, 0, 0xa}, /* Polynomial VMULL */
{0, 0, 0, 7}, /* Reserved: always UNDEF */ {0, 0, 0, 7}, /* Reserved: always UNDEF */
}; };
...@@ -5996,6 +5996,30 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins ...@@ -5996,6 +5996,30 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
return 1; return 1;
} }
/* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
* outside the loop below as it only performs a single pass.
*/
if (op == 14 && size == 2) {
TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
if (!arm_feature(env, ARM_FEATURE_V8_PMULL)) {
return 1;
}
tcg_rn = tcg_temp_new_i64();
tcg_rm = tcg_temp_new_i64();
tcg_rd = tcg_temp_new_i64();
neon_load_reg64(tcg_rn, rn);
neon_load_reg64(tcg_rm, rm);
gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
neon_store_reg64(tcg_rd, rd);
gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
neon_store_reg64(tcg_rd, rd + 1);
tcg_temp_free_i64(tcg_rn);
tcg_temp_free_i64(tcg_rm);
tcg_temp_free_i64(tcg_rd);
return 0;
}
/* Avoid overlapping operands. Wide source operands are /* Avoid overlapping operands. Wide source operands are
always aligned so will never overlap with wide always aligned so will never overlap with wide
destinations in problematic ways. */ destinations in problematic ways. */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册