diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c index 33ff422542c4933a6d7876fc3ee3c778817f9695..f311595d6756555212582295745d5a044a1b75c0 100644 --- a/hw/arm/vexpress.c +++ b/hw/arm/vexpress.c @@ -28,6 +28,7 @@ #include "net/net.h" #include "sysemu/sysemu.h" #include "hw/boards.h" +#include "hw/loader.h" #include "exec/address-spaces.h" #include "sysemu/blockdev.h" #include "hw/block/flash.h" @@ -528,6 +529,18 @@ static void vexpress_common_init(VEDBoardInfo *daughterboard, daughterboard->init(daughterboard, machine->ram_size, machine->cpu_model, pic); + /* + * If a bios file was provided, attempt to map it into memory + */ + if (bios_name) { + const char *fn = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + if (!fn || load_image_targphys(fn, map[VE_NORFLASH0], + VEXPRESS_FLASH_SIZE) < 0) { + error_report("Could not load ROM image '%s'", bios_name); + exit(1); + } + } + /* Motherboard peripherals: the wiring is the same but the * addresses vary between the legacy and A-Series memory maps. */ diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 995f999768e00e3f544f59f3d168110f25bb98b6..68b979364907f7895e26239fd2f4484154e7944e 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -468,6 +468,9 @@ static uint32_t get_elf_hwcap2(void) uint32_t hwcaps = 0; GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP2_ARM_AES); + GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP2_ARM_PMULL); + GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP2_ARM_SHA1); + GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP2_ARM_SHA2); GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP2_ARM_CRC32); return hwcaps; } @@ -536,7 +539,11 @@ static uint32_t get_elf_hwcap(void) /* probe for the extra features */ #define GET_FEATURE(feat, hwcap) \ do { if (arm_feature(&cpu->env, feat)) { hwcaps |= hwcap; } } while (0) - GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP_A64_PMULL); + GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP_A64_AES); + GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP_A64_PMULL); + GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP_A64_SHA1); + GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP_A64_SHA2); + GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP_A64_CRC32); #undef GET_FEATURE return hwcaps; diff --git a/target-arm/cpu.c b/target-arm/cpu.c index 794dcb9fb70102ab74b116db818447d6687765f2..b8778350f7e5bb14a786c0b632aca19a5ac5e007 100644 --- a/target-arm/cpu.c +++ b/target-arm/cpu.c @@ -316,7 +316,6 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) set_feature(env, ARM_FEATURE_V7); set_feature(env, ARM_FEATURE_ARM_DIV); set_feature(env, ARM_FEATURE_LPAE); - set_feature(env, ARM_FEATURE_V8_AES); } if (arm_feature(env, ARM_FEATURE_V7)) { set_feature(env, ARM_FEATURE_VAPA); @@ -349,6 +348,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } if (arm_feature(env, ARM_FEATURE_VFP4)) { set_feature(env, ARM_FEATURE_VFP3); + set_feature(env, ARM_FEATURE_VFP_FP16); } if (arm_feature(env, ARM_FEATURE_VFP3)) { set_feature(env, ARM_FEATURE_VFP); @@ -745,7 +745,6 @@ static void cortex_a15_initfn(Object *obj) cpu->dtb_compatible = "arm,cortex-a15"; set_feature(&cpu->env, ARM_FEATURE_V7); set_feature(&cpu->env, ARM_FEATURE_VFP4); - set_feature(&cpu->env, ARM_FEATURE_VFP_FP16); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_THUMB2EE); set_feature(&cpu->env, ARM_FEATURE_ARM_DIV); @@ -954,15 +953,13 @@ static void arm_any_initfn(Object *obj) ARMCPU *cpu = ARM_CPU(obj); set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_VFP4); - set_feature(&cpu->env, ARM_FEATURE_VFP_FP16); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_THUMB2EE); - set_feature(&cpu->env, ARM_FEATURE_ARM_DIV); - set_feature(&cpu->env, ARM_FEATURE_V7MP); + set_feature(&cpu->env, ARM_FEATURE_V8_AES); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); + set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); set_feature(&cpu->env, ARM_FEATURE_CRC); -#ifdef TARGET_AARCH64 - set_feature(&cpu->env, ARM_FEATURE_AARCH64); -#endif cpu->midr = 0xffffffff; } #endif diff --git a/target-arm/cpu.h b/target-arm/cpu.h index 7d8332e8be20ebf0fc87a7d6480aa15bcf414834..79e7f82515157a72b3548d12f1451d6c0f5fdbef 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -635,6 +635,9 @@ enum arm_features { ARM_FEATURE_CBAR_RO, /* has cp15 CBAR and it is read-only */ ARM_FEATURE_EL2, /* has EL2 Virtualization support */ ARM_FEATURE_EL3, /* has EL3 Secure monitor support */ + ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */ + ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */ + ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */ }; static inline int arm_feature(CPUARMState *env, int feature) diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c index 8daa622bdcee97612d75d4c92c33d7910d02887e..8b2081c246490fa6f6fa727cff938d410700d203 100644 --- a/target-arm/cpu64.c +++ b/target-arm/cpu64.c @@ -93,11 +93,15 @@ static void aarch64_a57_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_VFP4); - set_feature(&cpu->env, ARM_FEATURE_VFP_FP16); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_V8_AES); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); + set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); + set_feature(&cpu->env, ARM_FEATURE_CRC); cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A57; cpu->midr = 0x411fd070; cpu->reset_fpsid = 0x41034070; @@ -128,6 +132,7 @@ static void aarch64_a57_initfn(Object *obj) cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */ cpu->dcz_blocksize = 4; /* 64 bytes */ + define_arm_cp_regs(cpu, cortexa57_cp_reginfo); } #ifdef CONFIG_USER_ONLY @@ -137,11 +142,13 @@ static void aarch64_any_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_VFP4); - set_feature(&cpu->env, ARM_FEATURE_VFP_FP16); set_feature(&cpu->env, ARM_FEATURE_NEON); - set_feature(&cpu->env, ARM_FEATURE_ARM_DIV); - set_feature(&cpu->env, ARM_FEATURE_V7MP); set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_V8_AES); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA1); + set_feature(&cpu->env, ARM_FEATURE_V8_SHA256); + set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); + set_feature(&cpu->env, ARM_FEATURE_CRC); cpu->ctr = 0x80030003; /* 32 byte I and D cacheline size, VIPT icache */ cpu->dcz_blocksize = 7; /* 512 bytes */ } diff --git a/target-arm/crypto_helper.c b/target-arm/crypto_helper.c index d8898ed8059f8eee4aa6bee3974337b40380350e..3e4b5f7c48080004d3d8a5a02ee6013e6cb4fbe4 100644 --- a/target-arm/crypto_helper.c +++ b/target-arm/crypto_helper.c @@ -1,7 +1,7 @@ /* * crypto_helper.c - emulate v8 Crypto Extensions instructions * - * Copyright (C) 2013 Linaro Ltd + * Copyright (C) 2013 - 2014 Linaro Ltd * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -15,9 +15,9 @@ #include "exec/exec-all.h" #include "exec/helper-proto.h" -union AES_STATE { +union CRYPTO_STATE { uint8_t bytes[16]; - uint32_t cols[4]; + uint32_t words[4]; uint64_t l[2]; }; @@ -99,11 +99,11 @@ void HELPER(crypto_aese)(CPUARMState *env, uint32_t rd, uint32_t rm, /* ShiftRows permutation vector for decryption */ { 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3 }, }; - union AES_STATE rk = { .l = { + union CRYPTO_STATE rk = { .l = { float64_val(env->vfp.regs[rm]), float64_val(env->vfp.regs[rm + 1]) } }; - union AES_STATE st = { .l = { + union CRYPTO_STATE st = { .l = { float64_val(env->vfp.regs[rd]), float64_val(env->vfp.regs[rd + 1]) } }; @@ -260,7 +260,7 @@ void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm, 0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5, 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d, } }; - union AES_STATE st = { .l = { + union CRYPTO_STATE st = { .l = { float64_val(env->vfp.regs[rm]), float64_val(env->vfp.regs[rm + 1]) } }; @@ -269,7 +269,7 @@ void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm, assert(decrypt < 2); for (i = 0; i < 16; i += 4) { - st.cols[i >> 2] = cpu_to_le32( + st.words[i >> 2] = cpu_to_le32( mc[decrypt][st.bytes[i]] ^ rol32(mc[decrypt][st.bytes[i + 1]], 8) ^ rol32(mc[decrypt][st.bytes[i + 2]], 16) ^ @@ -279,3 +279,246 @@ void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm, env->vfp.regs[rd] = make_float64(st.l[0]); env->vfp.regs[rd + 1] = make_float64(st.l[1]); } + +/* + * SHA-1 logical functions + */ + +static uint32_t cho(uint32_t x, uint32_t y, uint32_t z) +{ + return (x & (y ^ z)) ^ z; +} + +static uint32_t par(uint32_t x, uint32_t y, uint32_t z) +{ + return x ^ y ^ z; +} + +static uint32_t maj(uint32_t x, uint32_t y, uint32_t z) +{ + return (x & y) | ((x | y) & z); +} + +void HELPER(crypto_sha1_3reg)(CPUARMState *env, uint32_t rd, uint32_t rn, + uint32_t rm, uint32_t op) +{ + union CRYPTO_STATE d = { .l = { + float64_val(env->vfp.regs[rd]), + float64_val(env->vfp.regs[rd + 1]) + } }; + union CRYPTO_STATE n = { .l = { + float64_val(env->vfp.regs[rn]), + float64_val(env->vfp.regs[rn + 1]) + } }; + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + + if (op == 3) { /* sha1su0 */ + d.l[0] ^= d.l[1] ^ m.l[0]; + d.l[1] ^= n.l[0] ^ m.l[1]; + } else { + int i; + + for (i = 0; i < 4; i++) { + uint32_t t; + + switch (op) { + case 0: /* sha1c */ + t = cho(d.words[1], d.words[2], d.words[3]); + break; + case 1: /* sha1p */ + t = par(d.words[1], d.words[2], d.words[3]); + break; + case 2: /* sha1m */ + t = maj(d.words[1], d.words[2], d.words[3]); + break; + default: + g_assert_not_reached(); + } + t += rol32(d.words[0], 5) + n.words[0] + m.words[i]; + + n.words[0] = d.words[3]; + d.words[3] = d.words[2]; + d.words[2] = ror32(d.words[1], 2); + d.words[1] = d.words[0]; + d.words[0] = t; + } + } + env->vfp.regs[rd] = make_float64(d.l[0]); + env->vfp.regs[rd + 1] = make_float64(d.l[1]); +} + +void HELPER(crypto_sha1h)(CPUARMState *env, uint32_t rd, uint32_t rm) +{ + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + + m.words[0] = ror32(m.words[0], 2); + m.words[1] = m.words[2] = m.words[3] = 0; + + env->vfp.regs[rd] = make_float64(m.l[0]); + env->vfp.regs[rd + 1] = make_float64(m.l[1]); +} + +void HELPER(crypto_sha1su1)(CPUARMState *env, uint32_t rd, uint32_t rm) +{ + union CRYPTO_STATE d = { .l = { + float64_val(env->vfp.regs[rd]), + float64_val(env->vfp.regs[rd + 1]) + } }; + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + + d.words[0] = rol32(d.words[0] ^ m.words[1], 1); + d.words[1] = rol32(d.words[1] ^ m.words[2], 1); + d.words[2] = rol32(d.words[2] ^ m.words[3], 1); + d.words[3] = rol32(d.words[3] ^ d.words[0], 1); + + env->vfp.regs[rd] = make_float64(d.l[0]); + env->vfp.regs[rd + 1] = make_float64(d.l[1]); +} + +/* + * The SHA-256 logical functions, according to + * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf + */ + +static uint32_t S0(uint32_t x) +{ + return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); +} + +static uint32_t S1(uint32_t x) +{ + return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); +} + +static uint32_t s0(uint32_t x) +{ + return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); +} + +static uint32_t s1(uint32_t x) +{ + return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); +} + +void HELPER(crypto_sha256h)(CPUARMState *env, uint32_t rd, uint32_t rn, + uint32_t rm) +{ + union CRYPTO_STATE d = { .l = { + float64_val(env->vfp.regs[rd]), + float64_val(env->vfp.regs[rd + 1]) + } }; + union CRYPTO_STATE n = { .l = { + float64_val(env->vfp.regs[rn]), + float64_val(env->vfp.regs[rn + 1]) + } }; + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + int i; + + for (i = 0; i < 4; i++) { + uint32_t t = cho(n.words[0], n.words[1], n.words[2]) + n.words[3] + + S1(n.words[0]) + m.words[i]; + + n.words[3] = n.words[2]; + n.words[2] = n.words[1]; + n.words[1] = n.words[0]; + n.words[0] = d.words[3] + t; + + t += maj(d.words[0], d.words[1], d.words[2]) + S0(d.words[0]); + + d.words[3] = d.words[2]; + d.words[2] = d.words[1]; + d.words[1] = d.words[0]; + d.words[0] = t; + } + + env->vfp.regs[rd] = make_float64(d.l[0]); + env->vfp.regs[rd + 1] = make_float64(d.l[1]); +} + +void HELPER(crypto_sha256h2)(CPUARMState *env, uint32_t rd, uint32_t rn, + uint32_t rm) +{ + union CRYPTO_STATE d = { .l = { + float64_val(env->vfp.regs[rd]), + float64_val(env->vfp.regs[rd + 1]) + } }; + union CRYPTO_STATE n = { .l = { + float64_val(env->vfp.regs[rn]), + float64_val(env->vfp.regs[rn + 1]) + } }; + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + int i; + + for (i = 0; i < 4; i++) { + uint32_t t = cho(d.words[0], d.words[1], d.words[2]) + d.words[3] + + S1(d.words[0]) + m.words[i]; + + d.words[3] = d.words[2]; + d.words[2] = d.words[1]; + d.words[1] = d.words[0]; + d.words[0] = n.words[3 - i] + t; + } + + env->vfp.regs[rd] = make_float64(d.l[0]); + env->vfp.regs[rd + 1] = make_float64(d.l[1]); +} + +void HELPER(crypto_sha256su0)(CPUARMState *env, uint32_t rd, uint32_t rm) +{ + union CRYPTO_STATE d = { .l = { + float64_val(env->vfp.regs[rd]), + float64_val(env->vfp.regs[rd + 1]) + } }; + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + + d.words[0] += s0(d.words[1]); + d.words[1] += s0(d.words[2]); + d.words[2] += s0(d.words[3]); + d.words[3] += s0(m.words[0]); + + env->vfp.regs[rd] = make_float64(d.l[0]); + env->vfp.regs[rd + 1] = make_float64(d.l[1]); +} + +void HELPER(crypto_sha256su1)(CPUARMState *env, uint32_t rd, uint32_t rn, + uint32_t rm) +{ + union CRYPTO_STATE d = { .l = { + float64_val(env->vfp.regs[rd]), + float64_val(env->vfp.regs[rd + 1]) + } }; + union CRYPTO_STATE n = { .l = { + float64_val(env->vfp.regs[rn]), + float64_val(env->vfp.regs[rn + 1]) + } }; + union CRYPTO_STATE m = { .l = { + float64_val(env->vfp.regs[rm]), + float64_val(env->vfp.regs[rm + 1]) + } }; + + d.words[0] += s1(m.words[2]) + n.words[1]; + d.words[1] += s1(m.words[3]) + n.words[2]; + d.words[2] += s1(d.words[0]) + n.words[3]; + d.words[3] += s1(d.words[1]) + m.words[0]; + + env->vfp.regs[rd] = make_float64(d.l[0]); + env->vfp.regs[rd + 1] = make_float64(d.l[1]); +} diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c index cccda741135478dc2f84289ab61ff94d628990f9..2b4ce6ac60bdc748ea4763a731fe80e1a9539ef1 100644 --- a/target-arm/helper-a64.c +++ b/target-arm/helper-a64.c @@ -24,6 +24,8 @@ #include "sysemu/sysemu.h" #include "qemu/bitops.h" #include "internals.h" +#include "qemu/crc32c.h" +#include /* For crc32 */ /* C2.4.7 Multiply and divide */ /* special cases for 0 and LLONG_MIN are mandated by the standard */ @@ -186,36 +188,6 @@ uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices, return result; } -/* Helper function for 64 bit polynomial multiply case: - * perform PolynomialMult(op1, op2) and return either the top or - * bottom half of the 128 bit result. - */ -uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2) -{ - int bitnum; - uint64_t res = 0; - - for (bitnum = 0; bitnum < 64; bitnum++) { - if (op1 & (1ULL << bitnum)) { - res ^= op2 << bitnum; - } - } - return res; -} -uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2) -{ - int bitnum; - uint64_t res = 0; - - /* bit 0 of op1 can't influence the high 64 bits at all */ - for (bitnum = 1; bitnum < 64; bitnum++) { - if (op1 & (1ULL << bitnum)) { - res ^= op2 >> (64 - bitnum); - } - } - return res; -} - /* 64bit/double versions of the neon float compare functions */ uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp) { @@ -438,6 +410,34 @@ float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env) return r; } +/* 64-bit versions of the CRC helpers. Note that although the operation + * (and the prototypes of crc32c() and crc32() mean that only the bottom + * 32 bits of the accumulator and result are used, we pass and return + * uint64_t for convenience of the generated code. Unlike the 32-bit + * instruction set versions, val may genuinely have 64 bits of data in it. + * The upper bytes of val (above the number specified by 'bytes') must have + * been zeroed out by the caller. + */ +uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes) +{ + uint8_t buf[8]; + + stq_le_p(buf, val); + + /* zlib crc32 converts the accumulator and output to one's complement. */ + return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; +} + +uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) +{ + uint8_t buf[8]; + + stq_le_p(buf, val); + + /* Linux crc32c converts the output to one's complement. */ + return crc32c(acc, buf, bytes) ^ 0xffffffff; +} + /* Handle a CPU exception. */ void aarch64_cpu_do_interrupt(CPUState *cs) { diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h index 3f05bedcca8b413fc4144601b999801bb97dca2a..1d3d10fffbce8dcd4ff9ffb0d609d515c4987b67 100644 --- a/target-arm/helper-a64.h +++ b/target-arm/helper-a64.h @@ -28,8 +28,6 @@ DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr) DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr) DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr) DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32) -DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64) -DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr) DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr) DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr) @@ -46,3 +44,5 @@ DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr) DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env) +DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) +DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) diff --git a/target-arm/helper.c b/target-arm/helper.c index 95af6241265323cde87b8d644faa785fe696d1ec..050c40981b21e40a7cf5d4ee5fc4f6f65fe294a5 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -319,7 +319,7 @@ static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { ARMCPU *cpu = arm_env_get_cpu(env); - env->cp15.c3 = value; + raw_write(env, ri, value); tlb_flush(CPU(cpu), 1); /* Flush TLB as domain not tracked in TLB */ } @@ -327,12 +327,12 @@ static void fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { ARMCPU *cpu = arm_env_get_cpu(env); - if (env->cp15.c13_fcse != value) { + if (raw_read(env, ri) != value) { /* Unlike real hardware the qemu TLB uses virtual addresses, * not modified virtual addresses, so this causes a TLB flush. */ tlb_flush(CPU(cpu), 1); - env->cp15.c13_fcse = value; + raw_write(env, ri, value); } } @@ -341,7 +341,7 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri, { ARMCPU *cpu = arm_env_get_cpu(env); - if (env->cp15.contextidr_el1 != value && !arm_feature(env, ARM_FEATURE_MPU) + if (raw_read(env, ri) != value && !arm_feature(env, ARM_FEATURE_MPU) && !extended_addresses_enabled(env)) { /* For VMSA (when not using the LPAE long descriptor page table * format) this register includes the ASID, so do a TLB flush. @@ -349,7 +349,7 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri, */ tlb_flush(CPU(cpu), 1); } - env->cp15.contextidr_el1 = value; + raw_write(env, ri, value); } static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -693,7 +693,7 @@ static uint64_t ccsidr_read(CPUARMState *env, const ARMCPRegInfo *ri) static void csselr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - env->cp15.c0_cssel = value & 0xf; + raw_write(env, ri, value & 0xf); } static uint64_t isr_read(CPUARMState *env, const ARMCPRegInfo *ri) @@ -1040,16 +1040,16 @@ static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri, int timeridx = ri->crm & 1; uint32_t oldval = env->cp15.c14_timer[timeridx].ctl; - env->cp15.c14_timer[timeridx].ctl = value & 3; + env->cp15.c14_timer[timeridx].ctl = deposit64(oldval, 0, 2, value); if ((oldval ^ value) & 1) { /* Enable toggled */ gt_recalc_timer(cpu, timeridx); - } else if ((oldval & value) & 2) { + } else if ((oldval ^ value) & 2) { /* IMASK toggled: don't need to recalculate, * just set the interrupt line based on ISTATUS */ qemu_set_irq(cpu->gt_timer_outputs[timeridx], - (oldval & 4) && (value & 2)); + (oldval & 4) && !(value & 2)); } } @@ -1216,11 +1216,11 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { static void par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { if (arm_feature(env, ARM_FEATURE_LPAE)) { - env->cp15.par_el1 = value; + raw_write(env, ri, value); } else if (arm_feature(env, ARM_FEATURE_V7)) { - env->cp15.par_el1 = value & 0xfffff6ff; + raw_write(env, ri, value & 0xfffff6ff); } else { - env->cp15.par_el1 = value & 0xfffff1ff; + raw_write(env, ri, value & 0xfffff1ff); } } @@ -1423,7 +1423,7 @@ static void vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri, * for long-descriptor tables the TTBCR fields are used differently * and the c2_mask and c2_base_mask values are meaningless. */ - env->cp15.c2_control = value; + raw_write(env, ri, value); env->cp15.c2_mask = ~(((uint32_t)0xffffffffu) >> maskshift); env->cp15.c2_base_mask = ~((uint32_t)0x3fffu >> maskshift); } @@ -1445,7 +1445,7 @@ static void vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri, static void vmsa_ttbcr_reset(CPUARMState *env, const ARMCPRegInfo *ri) { env->cp15.c2_base_mask = 0xffffc000u; - env->cp15.c2_control = 0; + raw_write(env, ri, 0); env->cp15.c2_mask = 0; } @@ -1456,7 +1456,7 @@ static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri, /* For AArch64 the A1 bit could result in a change of ASID, so TLB flush. */ tlb_flush(CPU(cpu), 1); - env->cp15.c2_control = value; + raw_write(env, ri, value); } static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -2151,14 +2151,14 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, { ARMCPU *cpu = arm_env_get_cpu(env); - if (env->cp15.c1_sys == value) { + if (raw_read(env, ri) == value) { /* Skip the TLB flush if nothing actually changed; Linux likes * to do a lot of pointless SCTLR writes. */ return; } - env->cp15.c1_sys = value; + raw_write(env, ri, value); /* ??? Lots of these bits are not implemented. */ /* This may enable/disable the MMU, so do a TLB flush. */ tlb_flush(CPU(cpu), 1); @@ -3929,13 +3929,8 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address, page_size = (1 << ((granule_sz * (4 - level)) + 3)); descaddr |= (address & (page_size - 1)); /* Extract attributes from the descriptor and merge with table attrs */ - if (arm_feature(env, ARM_FEATURE_V8)) { - attrs = extract64(descriptor, 2, 10) - | (extract64(descriptor, 53, 11) << 10); - } else { - attrs = extract64(descriptor, 2, 10) - | (extract64(descriptor, 52, 12) << 10); - } + attrs = extract64(descriptor, 2, 10) + | (extract64(descriptor, 52, 12) << 10); attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */ attrs |= extract32(tableattrs, 3, 1) << 5; /* APTable[1] => AP[2] */ /* The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1 @@ -3961,8 +3956,12 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address, goto do_fault; } *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; - if (attrs & (1 << 12) || (!is_user && (attrs & (1 << 11)))) { - /* XN or PXN */ + if ((arm_feature(env, ARM_FEATURE_V8) && is_user && (attrs & (1 << 12))) || + (!arm_feature(env, ARM_FEATURE_V8) && (attrs & (1 << 12))) || + (!is_user && (attrs & (1 << 11)))) { + /* XN/UXN or PXN. Since we only implement EL0/EL1 we unconditionally + * treat XN/UXN as UXN for v8. + */ if (access_type == 2) { goto do_fault; } @@ -5560,28 +5559,15 @@ int arm_rmode_to_sf(int rmode) return rmode; } -static void crc_init_buffer(uint8_t *buf, uint32_t val, uint32_t bytes) -{ - memset(buf, 0, 4); - - if (bytes == 1) { - buf[0] = val & 0xff; - } else if (bytes == 2) { - buf[0] = val & 0xff; - buf[1] = (val >> 8) & 0xff; - } else { - buf[0] = val & 0xff; - buf[1] = (val >> 8) & 0xff; - buf[2] = (val >> 16) & 0xff; - buf[3] = (val >> 24) & 0xff; - } -} - +/* CRC helpers. + * The upper bytes of val (above the number specified by 'bytes') must have + * been zeroed out by the caller. + */ uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes) { uint8_t buf[4]; - crc_init_buffer(buf, val, bytes); + stl_le_p(buf, val); /* zlib crc32 converts the accumulator and output to one's complement. */ return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; @@ -5591,7 +5577,7 @@ uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes) { uint8_t buf[4]; - crc_init_buffer(buf, val, bytes); + stl_le_p(buf, val); /* Linux crc32c converts the output to one's complement. */ return crc32c(acc, buf, bytes) ^ 0xffffffff; diff --git a/target-arm/helper.h b/target-arm/helper.h index b63fd0ff1ca216159a2efc632d39dab2d246b590..facfcd2c112edeede0a88987d77259d644fc6fc6 100644 --- a/target-arm/helper.h +++ b/target-arm/helper.h @@ -456,8 +456,6 @@ DEF_HELPER_3(iwmmxt_avgb1, i64, env, i64, i64) DEF_HELPER_3(iwmmxt_avgw0, i64, env, i64, i64) DEF_HELPER_3(iwmmxt_avgw1, i64, env, i64, i64) -DEF_HELPER_2(iwmmxt_msadb, i64, i64, i64) - DEF_HELPER_3(iwmmxt_align, i64, i64, i64, i32) DEF_HELPER_4(iwmmxt_insr, i64, i64, i32, i32, i32) @@ -512,10 +510,22 @@ DEF_HELPER_3(neon_qzip32, void, env, i32, i32) DEF_HELPER_4(crypto_aese, void, env, i32, i32, i32) DEF_HELPER_4(crypto_aesmc, void, env, i32, i32, i32) +DEF_HELPER_5(crypto_sha1_3reg, void, env, i32, i32, i32, i32) +DEF_HELPER_3(crypto_sha1h, void, env, i32, i32) +DEF_HELPER_3(crypto_sha1su1, void, env, i32, i32) + +DEF_HELPER_4(crypto_sha256h, void, env, i32, i32, i32) +DEF_HELPER_4(crypto_sha256h2, void, env, i32, i32, i32) +DEF_HELPER_3(crypto_sha256su0, void, env, i32, i32) +DEF_HELPER_4(crypto_sha256su1, void, env, i32, i32, i32) + DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_2(dc_zva, void, env, i64) +DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #endif diff --git a/target-arm/iwmmxt_helper.c b/target-arm/iwmmxt_helper.c index 398cbcbec7e444865dae436d228c4685f33c80bc..a5069144d19adfc76541d4100294e1e3e1dd2b19 100644 --- a/target-arm/iwmmxt_helper.c +++ b/target-arm/iwmmxt_helper.c @@ -369,15 +369,6 @@ IWMMXT_OP_AVGW(1) #undef IWMMXT_OP_AVGW #undef AVGW -uint64_t HELPER(iwmmxt_msadb)(uint64_t a, uint64_t b) -{ - a = ((((a >> 0 ) & 0xffff) * ((b >> 0) & 0xffff) + - ((a >> 16) & 0xffff) * ((b >> 16) & 0xffff)) & 0xffffffff) | - ((((a >> 32) & 0xffff) * ((b >> 32) & 0xffff) + - ((a >> 48) & 0xffff) * ((b >> 48) & 0xffff)) << 32); - return a; -} - uint64_t HELPER(iwmmxt_align)(uint64_t a, uint64_t b, uint32_t n) { a >>= n << 3; diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c index 492e5007006b5a089725699d489663f423722a23..47d13e908c12f3334e8ede90691e3d9efa4f7c08 100644 --- a/target-arm/neon_helper.c +++ b/target-arm/neon_helper.c @@ -2211,3 +2211,33 @@ void HELPER(neon_zip16)(CPUARMState *env, uint32_t rd, uint32_t rm) env->vfp.regs[rm] = make_float64(m0); env->vfp.regs[rd] = make_float64(d0); } + +/* Helper function for 64 bit polynomial multiply case: + * perform PolynomialMult(op1, op2) and return either the top or + * bottom half of the 128 bit result. + */ +uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2) +{ + int bitnum; + uint64_t res = 0; + + for (bitnum = 0; bitnum < 64; bitnum++) { + if (op1 & (1ULL << bitnum)) { + res ^= op2 << bitnum; + } + } + return res; +} +uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2) +{ + int bitnum; + uint64_t res = 0; + + /* bit 0 of op1 can't influence the high 64 bits at all */ + for (bitnum = 1; bitnum < 64; bitnum++) { + if (op1 & (1ULL << bitnum)) { + res ^= op2 >> (64 - bitnum); + } + } + return res; +} diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index a9c4633517b459aa46bb10a9d722e93ce35b2133..63ad787e9f4aed87154d02d0c89d22c22aec9de5 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -85,6 +85,8 @@ typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32); typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64); +typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32); +typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); /* initialize TCG globals. */ void a64_translate_init(void) @@ -3774,6 +3776,54 @@ static void handle_shift_reg(DisasContext *s, tcg_temp_free_i64(tcg_shift); } +/* CRC32[BHWX], CRC32C[BHWX] */ +static void handle_crc32(DisasContext *s, + unsigned int sf, unsigned int sz, bool crc32c, + unsigned int rm, unsigned int rn, unsigned int rd) +{ + TCGv_i64 tcg_acc, tcg_val; + TCGv_i32 tcg_bytes; + + if (!arm_dc_feature(s, ARM_FEATURE_CRC) + || (sf == 1 && sz != 3) + || (sf == 0 && sz == 3)) { + unallocated_encoding(s); + return; + } + + if (sz == 3) { + tcg_val = cpu_reg(s, rm); + } else { + uint64_t mask; + switch (sz) { + case 0: + mask = 0xFF; + break; + case 1: + mask = 0xFFFF; + break; + case 2: + mask = 0xFFFFFFFF; + break; + default: + g_assert_not_reached(); + } + tcg_val = new_tmp_a64(s); + tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask); + } + + tcg_acc = cpu_reg(s, rn); + tcg_bytes = tcg_const_i32(1 << sz); + + if (crc32c) { + gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); + } else { + gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); + } + + tcg_temp_free_i32(tcg_bytes); +} + /* C3.5.8 Data-processing (2 source) * 31 30 29 28 21 20 16 15 10 9 5 4 0 * +----+---+---+-----------------+------+--------+------+------+ @@ -3821,8 +3871,12 @@ static void disas_data_proc_2src(DisasContext *s, uint32_t insn) case 21: case 22: case 23: /* CRC32 */ - unsupported_encoding(s, insn); + { + int sz = extract32(opcode, 0, 2); + bool crc32c = extract32(opcode, 2, 1); + handle_crc32(s, sf, sz, crc32c, rm, rn, rd); break; + } default: unallocated_encoding(s); break; @@ -8574,7 +8628,7 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) return; } if (size == 3) { - if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)) { + if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) { unallocated_encoding(s); return; } @@ -10497,7 +10551,55 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) */ static void disas_crypto_aes(DisasContext *s, uint32_t insn) { - unsupported_encoding(s, insn); + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + int decrypt; + TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt; + CryptoThreeOpEnvFn *genfn; + + if (!arm_dc_feature(s, ARM_FEATURE_V8_AES) + || size != 0) { + unallocated_encoding(s); + return; + } + + switch (opcode) { + case 0x4: /* AESE */ + decrypt = 0; + genfn = gen_helper_crypto_aese; + break; + case 0x6: /* AESMC */ + decrypt = 0; + genfn = gen_helper_crypto_aesmc; + break; + case 0x5: /* AESD */ + decrypt = 1; + genfn = gen_helper_crypto_aese; + break; + case 0x7: /* AESIMC */ + decrypt = 1; + genfn = gen_helper_crypto_aesmc; + break; + default: + unallocated_encoding(s); + return; + } + + /* Note that we convert the Vx register indexes into the + * index within the vfp.regs[] array, so we can share the + * helper with the AArch32 instructions. + */ + tcg_rd_regno = tcg_const_i32(rd << 1); + tcg_rn_regno = tcg_const_i32(rn << 1); + tcg_decrypt = tcg_const_i32(decrypt); + + genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt); + + tcg_temp_free_i32(tcg_rd_regno); + tcg_temp_free_i32(tcg_rn_regno); + tcg_temp_free_i32(tcg_decrypt); } /* C3.6.20 Crypto three-reg SHA @@ -10508,7 +10610,64 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) */ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) { - unsupported_encoding(s, insn); + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 3); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + CryptoThreeOpEnvFn *genfn; + TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno; + int feature = ARM_FEATURE_V8_SHA256; + + if (size != 0) { + unallocated_encoding(s); + return; + } + + switch (opcode) { + case 0: /* SHA1C */ + case 1: /* SHA1P */ + case 2: /* SHA1M */ + case 3: /* SHA1SU0 */ + genfn = NULL; + feature = ARM_FEATURE_V8_SHA1; + break; + case 4: /* SHA256H */ + genfn = gen_helper_crypto_sha256h; + break; + case 5: /* SHA256H2 */ + genfn = gen_helper_crypto_sha256h2; + break; + case 6: /* SHA256SU1 */ + genfn = gen_helper_crypto_sha256su1; + break; + default: + unallocated_encoding(s); + return; + } + + if (!arm_dc_feature(s, feature)) { + unallocated_encoding(s); + return; + } + + tcg_rd_regno = tcg_const_i32(rd << 1); + tcg_rn_regno = tcg_const_i32(rn << 1); + tcg_rm_regno = tcg_const_i32(rm << 1); + + if (genfn) { + genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno); + } else { + TCGv_i32 tcg_opcode = tcg_const_i32(opcode); + + gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno, + tcg_rn_regno, tcg_rm_regno, tcg_opcode); + tcg_temp_free_i32(tcg_opcode); + } + + tcg_temp_free_i32(tcg_rd_regno); + tcg_temp_free_i32(tcg_rn_regno); + tcg_temp_free_i32(tcg_rm_regno); } /* C3.6.21 Crypto two-reg SHA @@ -10519,7 +10678,49 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) */ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) { - unsupported_encoding(s, insn); + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + CryptoTwoOpEnvFn *genfn; + int feature; + TCGv_i32 tcg_rd_regno, tcg_rn_regno; + + if (size != 0) { + unallocated_encoding(s); + return; + } + + switch (opcode) { + case 0: /* SHA1H */ + feature = ARM_FEATURE_V8_SHA1; + genfn = gen_helper_crypto_sha1h; + break; + case 1: /* SHA1SU1 */ + feature = ARM_FEATURE_V8_SHA1; + genfn = gen_helper_crypto_sha1su1; + break; + case 2: /* SHA256SU0 */ + feature = ARM_FEATURE_V8_SHA256; + genfn = gen_helper_crypto_sha256su0; + break; + default: + unallocated_encoding(s); + return; + } + + if (!arm_dc_feature(s, feature)) { + unallocated_encoding(s); + return; + } + + tcg_rd_regno = tcg_const_i32(rd << 1); + tcg_rn_regno = tcg_const_i32(rn << 1); + + genfn(cpu_env, tcg_rd_regno, tcg_rn_regno); + + tcg_temp_free_i32(tcg_rd_regno); + tcg_temp_free_i32(tcg_rn_regno); } /* C3.6 Data processing - SIMD, inc Crypto diff --git a/target-arm/translate.c b/target-arm/translate.c index d499caa562324cab03386d5dd1902443f8d64b19..cf4e767ff865204bd3e7306feaf00cb0632fc256 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -1382,8 +1382,6 @@ IWMMXT_OP_ENV(avgb1) IWMMXT_OP_ENV(avgw0) IWMMXT_OP_ENV(avgw1) -IWMMXT_OP(msadb) - IWMMXT_OP_ENV(packuw) IWMMXT_OP_ENV(packul) IWMMXT_OP_ENV(packuq) @@ -4776,6 +4774,7 @@ static void gen_neon_narrow_op(int op, int u, int size, #define NEON_3R_VPMIN 21 #define NEON_3R_VQDMULH_VQRDMULH 22 #define NEON_3R_VPADD 23 +#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */ #define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */ #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */ #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */ @@ -4809,6 +4808,7 @@ static const uint8_t neon_3r_sizes[] = { [NEON_3R_VPMIN] = 0x7, [NEON_3R_VQDMULH_VQRDMULH] = 0x6, [NEON_3R_VPADD] = 0x7, + [NEON_3R_SHA] = 0xf, /* size field encodes op type */ [NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */ [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */ [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */ @@ -4842,6 +4842,7 @@ static const uint8_t neon_3r_sizes[] = { #define NEON_2RM_VCEQ0 18 #define NEON_2RM_VCLE0 19 #define NEON_2RM_VCLT0 20 +#define NEON_2RM_SHA1H 21 #define NEON_2RM_VABS 22 #define NEON_2RM_VNEG 23 #define NEON_2RM_VCGT0_F 24 @@ -4858,6 +4859,7 @@ static const uint8_t neon_3r_sizes[] = { #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */ #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */ #define NEON_2RM_VSHLL 38 +#define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */ #define NEON_2RM_VRINTN 40 #define NEON_2RM_VRINTX 41 #define NEON_2RM_VRINTA 42 @@ -4918,6 +4920,7 @@ static const uint8_t neon_2rm_sizes[] = { [NEON_2RM_VCEQ0] = 0x7, [NEON_2RM_VCLE0] = 0x7, [NEON_2RM_VCLT0] = 0x7, + [NEON_2RM_SHA1H] = 0x4, [NEON_2RM_VABS] = 0x7, [NEON_2RM_VNEG] = 0x7, [NEON_2RM_VCGT0_F] = 0x4, @@ -4934,6 +4937,7 @@ static const uint8_t neon_2rm_sizes[] = { [NEON_2RM_VMOVN] = 0x7, [NEON_2RM_VQMOVN] = 0x7, [NEON_2RM_VSHLL] = 0x7, + [NEON_2RM_SHA1SU1] = 0x4, [NEON_2RM_VRINTN] = 0x4, [NEON_2RM_VRINTX] = 0x4, [NEON_2RM_VRINTA] = 0x4, @@ -5011,6 +5015,49 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins if (q && ((rd | rn | rm) & 1)) { return 1; } + /* + * The SHA-1/SHA-256 3-register instructions require special treatment + * here, as their size field is overloaded as an op type selector, and + * they all consume their input in a single pass. + */ + if (op == NEON_3R_SHA) { + if (!q) { + return 1; + } + if (!u) { /* SHA-1 */ + if (!arm_feature(env, ARM_FEATURE_V8_SHA1)) { + return 1; + } + tmp = tcg_const_i32(rd); + tmp2 = tcg_const_i32(rn); + tmp3 = tcg_const_i32(rm); + tmp4 = tcg_const_i32(size); + gen_helper_crypto_sha1_3reg(cpu_env, tmp, tmp2, tmp3, tmp4); + tcg_temp_free_i32(tmp4); + } else { /* SHA-256 */ + if (!arm_feature(env, ARM_FEATURE_V8_SHA256) || size == 3) { + return 1; + } + tmp = tcg_const_i32(rd); + tmp2 = tcg_const_i32(rn); + tmp3 = tcg_const_i32(rm); + switch (size) { + case 0: + gen_helper_crypto_sha256h(cpu_env, tmp, tmp2, tmp3); + break; + case 1: + gen_helper_crypto_sha256h2(cpu_env, tmp, tmp2, tmp3); + break; + case 2: + gen_helper_crypto_sha256su1(cpu_env, tmp, tmp2, tmp3); + break; + } + } + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + tcg_temp_free_i32(tmp3); + return 0; + } if (size == 3 && op != NEON_3R_LOGIC) { /* 64-bit element instructions. */ for (pass = 0; pass < (q ? 2 : 1); pass++) { @@ -5905,10 +5952,11 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins int src1_wide; int src2_wide; int prewiden; - /* undefreq: bit 0 : UNDEF if size != 0 - * bit 1 : UNDEF if size == 0 - * bit 2 : UNDEF if U == 1 - * Note that [1:0] set implies 'always UNDEF' + /* undefreq: bit 0 : UNDEF if size == 0 + * bit 1 : UNDEF if size == 1 + * bit 2 : UNDEF if size == 2 + * bit 3 : UNDEF if U == 1 + * Note that [2:0] set implies 'always UNDEF' */ int undefreq; /* prewiden, src1_wide, src2_wide, undefreq */ @@ -5922,13 +5970,13 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins {0, 1, 1, 0}, /* VSUBHN */ {0, 0, 0, 0}, /* VABDL */ {0, 0, 0, 0}, /* VMLAL */ - {0, 0, 0, 6}, /* VQDMLAL */ + {0, 0, 0, 9}, /* VQDMLAL */ {0, 0, 0, 0}, /* VMLSL */ - {0, 0, 0, 6}, /* VQDMLSL */ + {0, 0, 0, 9}, /* VQDMLSL */ {0, 0, 0, 0}, /* Integer VMULL */ - {0, 0, 0, 2}, /* VQDMULL */ - {0, 0, 0, 5}, /* Polynomial VMULL */ - {0, 0, 0, 3}, /* Reserved: always UNDEF */ + {0, 0, 0, 1}, /* VQDMULL */ + {0, 0, 0, 0xa}, /* Polynomial VMULL */ + {0, 0, 0, 7}, /* Reserved: always UNDEF */ }; prewiden = neon_3reg_wide[op][0]; @@ -5936,9 +5984,8 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins src2_wide = neon_3reg_wide[op][2]; undefreq = neon_3reg_wide[op][3]; - if (((undefreq & 1) && (size != 0)) || - ((undefreq & 2) && (size == 0)) || - ((undefreq & 4) && u)) { + if ((undefreq & (1 << size)) || + ((undefreq & 8) && u)) { return 1; } if ((src1_wide && (rn & 1)) || @@ -5947,6 +5994,30 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins return 1; } + /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply) + * outside the loop below as it only performs a single pass. + */ + if (op == 14 && size == 2) { + TCGv_i64 tcg_rn, tcg_rm, tcg_rd; + + if (!arm_feature(env, ARM_FEATURE_V8_PMULL)) { + return 1; + } + tcg_rn = tcg_temp_new_i64(); + tcg_rm = tcg_temp_new_i64(); + tcg_rd = tcg_temp_new_i64(); + neon_load_reg64(tcg_rn, rn); + neon_load_reg64(tcg_rm, rm); + gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm); + neon_store_reg64(tcg_rd, rd); + gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm); + neon_store_reg64(tcg_rd, rd + 1); + tcg_temp_free_i64(tcg_rn); + tcg_temp_free_i64(tcg_rm); + tcg_temp_free_i64(tcg_rd); + return 0; + } + /* Avoid overlapping operands. Wide source operands are always aligned so will never overlap with wide destinations in problematic ways. */ @@ -6486,6 +6557,41 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins tcg_temp_free_i32(tmp2); tcg_temp_free_i32(tmp3); break; + case NEON_2RM_SHA1H: + if (!arm_feature(env, ARM_FEATURE_V8_SHA1) + || ((rm | rd) & 1)) { + return 1; + } + tmp = tcg_const_i32(rd); + tmp2 = tcg_const_i32(rm); + + gen_helper_crypto_sha1h(cpu_env, tmp, tmp2); + + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + break; + case NEON_2RM_SHA1SU1: + if ((rm | rd) & 1) { + return 1; + } + /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */ + if (q) { + if (!arm_feature(env, ARM_FEATURE_V8_SHA256)) { + return 1; + } + } else if (!arm_feature(env, ARM_FEATURE_V8_SHA1)) { + return 1; + } + tmp = tcg_const_i32(rd); + tmp2 = tcg_const_i32(rm); + if (q) { + gen_helper_crypto_sha256su0(cpu_env, tmp, tmp2); + } else { + gen_helper_crypto_sha1su1(cpu_env, tmp, tmp2); + } + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(tmp2); + break; default: elementwise: for (pass = 0; pass < (q ? 4 : 2); pass++) { @@ -7698,6 +7804,11 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s) tmp = load_reg(s, rn); tmp2 = load_reg(s, rm); + if (op1 == 0) { + tcg_gen_andi_i32(tmp2, tmp2, 0xff); + } else if (op1 == 1) { + tcg_gen_andi_i32(tmp2, tmp2, 0xffff); + } tmp3 = tcg_const_i32(1 << op1); if (c & 0x2) { gen_helper_crc32c(tmp, tmp, tmp2, tmp3); @@ -9330,6 +9441,11 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw } tmp2 = load_reg(s, rm); + if (sz == 0) { + tcg_gen_andi_i32(tmp2, tmp2, 0xff); + } else if (sz == 1) { + tcg_gen_andi_i32(tmp2, tmp2, 0xffff); + } tmp3 = tcg_const_i32(1 << sz); if (c) { gen_helper_crc32c(tmp, tmp, tmp2, tmp3);