提交 7721a304 编写于 作者: P Peter Maydell

Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20140609-1' into staging

```-------------------------------------------------------------
target-arm queue:
 * support -bios option in vexpress boards
 * register the Cortex-A57 impdef system registers
 * fix handling of UXN bit in ARMv8 page tables
 * complete support of crypto insns in A32/T32
 * implement CRC and crypto insns in A64
 * fix bugs in generic timer control register
```

-------------------------------------------------------------

# gpg: Signature made Mon 09 Jun 2014 16:08:26 BST using RSA key ID 14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"

* remotes/pmaydell/tags/pull-target-arm-20140609-1:
  target-arm: Delete unused iwmmxt_msadb helper
  target-arm: Fix errors in writes to generic timer control registers
  target-arm: A64: Implement two-register SHA instructions
  target-arm: A64: Implement 3-register SHA instructions
  target-arm: A64: Implement AES instructions
  target-arm: A32/T32: Mask CRC value in calling code, not helper
  target-arm: A64: Implement CRC instructions
  target-arm: VFPv4 implies half-precision extension
  target-arm: Clean up handling of ARMv8 optional feature bits
  target-arm: Remove unnecessary setting of feature bits
  target-arm: arm_any_initfn() should never set ARM_FEATURE_AARCH64
  target-arm: A64: Use PMULL feature bit for PMULL
  target-arm: add support for v8 VMULL.P64 instruction
  target-arm: Allow 3reg_wide undefreq to encode more bad size options
  target-arm: add support for v8 SHA1 and SHA256 instructions
  target-arm: Correct handling of UXN bit in ARMv8 LPAE page tables
  target-arm: Prepare cpreg writefns/readfns for EL3/SecExt
  target-arm/cpu64.c: Actually register Cortex-A57 impdef registers
  vexpress: Add support for the -bios flag to provide firmware
Signed-off-by: NPeter Maydell <peter.maydell@linaro.org>
......@@ -28,6 +28,7 @@
#include "net/net.h"
#include "sysemu/sysemu.h"
#include "hw/boards.h"
#include "hw/loader.h"
#include "exec/address-spaces.h"
#include "sysemu/blockdev.h"
#include "hw/block/flash.h"
......@@ -528,6 +529,18 @@ static void vexpress_common_init(VEDBoardInfo *daughterboard,
daughterboard->init(daughterboard, machine->ram_size, machine->cpu_model,
pic);
/*
* If a bios file was provided, attempt to map it into memory
*/
if (bios_name) {
const char *fn = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
if (!fn || load_image_targphys(fn, map[VE_NORFLASH0],
VEXPRESS_FLASH_SIZE) < 0) {
error_report("Could not load ROM image '%s'", bios_name);
exit(1);
}
}
/* Motherboard peripherals: the wiring is the same but the
* addresses vary between the legacy and A-Series memory maps.
*/
......
......@@ -468,6 +468,9 @@ static uint32_t get_elf_hwcap2(void)
uint32_t hwcaps = 0;
GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP2_ARM_AES);
GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP2_ARM_PMULL);
GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP2_ARM_SHA1);
GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP2_ARM_SHA2);
GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP2_ARM_CRC32);
return hwcaps;
}
......@@ -536,7 +539,11 @@ static uint32_t get_elf_hwcap(void)
/* probe for the extra features */
#define GET_FEATURE(feat, hwcap) \
do { if (arm_feature(&cpu->env, feat)) { hwcaps |= hwcap; } } while (0)
GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP_A64_PMULL);
GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP_A64_AES);
GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP_A64_PMULL);
GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP_A64_SHA1);
GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP_A64_SHA2);
GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP_A64_CRC32);
#undef GET_FEATURE
return hwcaps;
......
......@@ -316,7 +316,6 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
set_feature(env, ARM_FEATURE_V7);
set_feature(env, ARM_FEATURE_ARM_DIV);
set_feature(env, ARM_FEATURE_LPAE);
set_feature(env, ARM_FEATURE_V8_AES);
}
if (arm_feature(env, ARM_FEATURE_V7)) {
set_feature(env, ARM_FEATURE_VAPA);
......@@ -349,6 +348,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
}
if (arm_feature(env, ARM_FEATURE_VFP4)) {
set_feature(env, ARM_FEATURE_VFP3);
set_feature(env, ARM_FEATURE_VFP_FP16);
}
if (arm_feature(env, ARM_FEATURE_VFP3)) {
set_feature(env, ARM_FEATURE_VFP);
......@@ -745,7 +745,6 @@ static void cortex_a15_initfn(Object *obj)
cpu->dtb_compatible = "arm,cortex-a15";
set_feature(&cpu->env, ARM_FEATURE_V7);
set_feature(&cpu->env, ARM_FEATURE_VFP4);
set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
set_feature(&cpu->env, ARM_FEATURE_NEON);
set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
......@@ -954,15 +953,13 @@ static void arm_any_initfn(Object *obj)
ARMCPU *cpu = ARM_CPU(obj);
set_feature(&cpu->env, ARM_FEATURE_V8);
set_feature(&cpu->env, ARM_FEATURE_VFP4);
set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
set_feature(&cpu->env, ARM_FEATURE_NEON);
set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
set_feature(&cpu->env, ARM_FEATURE_V7MP);
set_feature(&cpu->env, ARM_FEATURE_V8_AES);
set_feature(&cpu->env, ARM_FEATURE_V8_SHA1);
set_feature(&cpu->env, ARM_FEATURE_V8_SHA256);
set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
set_feature(&cpu->env, ARM_FEATURE_CRC);
#ifdef TARGET_AARCH64
set_feature(&cpu->env, ARM_FEATURE_AARCH64);
#endif
cpu->midr = 0xffffffff;
}
#endif
......
......@@ -635,6 +635,9 @@ enum arm_features {
ARM_FEATURE_CBAR_RO, /* has cp15 CBAR and it is read-only */
ARM_FEATURE_EL2, /* has EL2 Virtualization support */
ARM_FEATURE_EL3, /* has EL3 Secure monitor support */
ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */
ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */
ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */
};
static inline int arm_feature(CPUARMState *env, int feature)
......
......@@ -93,11 +93,15 @@ static void aarch64_a57_initfn(Object *obj)
set_feature(&cpu->env, ARM_FEATURE_V8);
set_feature(&cpu->env, ARM_FEATURE_VFP4);
set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
set_feature(&cpu->env, ARM_FEATURE_NEON);
set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
set_feature(&cpu->env, ARM_FEATURE_AARCH64);
set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
set_feature(&cpu->env, ARM_FEATURE_V8_AES);
set_feature(&cpu->env, ARM_FEATURE_V8_SHA1);
set_feature(&cpu->env, ARM_FEATURE_V8_SHA256);
set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
set_feature(&cpu->env, ARM_FEATURE_CRC);
cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A57;
cpu->midr = 0x411fd070;
cpu->reset_fpsid = 0x41034070;
......@@ -128,6 +132,7 @@ static void aarch64_a57_initfn(Object *obj)
cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */
cpu->dcz_blocksize = 4; /* 64 bytes */
define_arm_cp_regs(cpu, cortexa57_cp_reginfo);
}
#ifdef CONFIG_USER_ONLY
......@@ -137,11 +142,13 @@ static void aarch64_any_initfn(Object *obj)
set_feature(&cpu->env, ARM_FEATURE_V8);
set_feature(&cpu->env, ARM_FEATURE_VFP4);
set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
set_feature(&cpu->env, ARM_FEATURE_NEON);
set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
set_feature(&cpu->env, ARM_FEATURE_V7MP);
set_feature(&cpu->env, ARM_FEATURE_AARCH64);
set_feature(&cpu->env, ARM_FEATURE_V8_AES);
set_feature(&cpu->env, ARM_FEATURE_V8_SHA1);
set_feature(&cpu->env, ARM_FEATURE_V8_SHA256);
set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
set_feature(&cpu->env, ARM_FEATURE_CRC);
cpu->ctr = 0x80030003; /* 32 byte I and D cacheline size, VIPT icache */
cpu->dcz_blocksize = 7; /* 512 bytes */
}
......
/*
* crypto_helper.c - emulate v8 Crypto Extensions instructions
*
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
* Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
......@@ -15,9 +15,9 @@
#include "exec/exec-all.h"
#include "exec/helper-proto.h"
union AES_STATE {
union CRYPTO_STATE {
uint8_t bytes[16];
uint32_t cols[4];
uint32_t words[4];
uint64_t l[2];
};
......@@ -99,11 +99,11 @@ void HELPER(crypto_aese)(CPUARMState *env, uint32_t rd, uint32_t rm,
/* ShiftRows permutation vector for decryption */
{ 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3 },
};
union AES_STATE rk = { .l = {
union CRYPTO_STATE rk = { .l = {
float64_val(env->vfp.regs[rm]),
float64_val(env->vfp.regs[rm + 1])
} };
union AES_STATE st = { .l = {
union CRYPTO_STATE st = { .l = {
float64_val(env->vfp.regs[rd]),
float64_val(env->vfp.regs[rd + 1])
} };
......@@ -260,7 +260,7 @@ void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm,
0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
} };
union AES_STATE st = { .l = {
union CRYPTO_STATE st = { .l = {
float64_val(env->vfp.regs[rm]),
float64_val(env->vfp.regs[rm + 1])
} };
......@@ -269,7 +269,7 @@ void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm,
assert(decrypt < 2);
for (i = 0; i < 16; i += 4) {
st.cols[i >> 2] = cpu_to_le32(
st.words[i >> 2] = cpu_to_le32(
mc[decrypt][st.bytes[i]] ^
rol32(mc[decrypt][st.bytes[i + 1]], 8) ^
rol32(mc[decrypt][st.bytes[i + 2]], 16) ^
......@@ -279,3 +279,246 @@ void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm,
env->vfp.regs[rd] = make_float64(st.l[0]);
env->vfp.regs[rd + 1] = make_float64(st.l[1]);
}
/*
* SHA-1 logical functions
*/
static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
{
return (x & (y ^ z)) ^ z;
}
static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
{
return x ^ y ^ z;
}
static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
{
return (x & y) | ((x | y) & z);
}
void HELPER(crypto_sha1_3reg)(CPUARMState *env, uint32_t rd, uint32_t rn,
uint32_t rm, uint32_t op)
{
union CRYPTO_STATE d = { .l = {
float64_val(env->vfp.regs[rd]),
float64_val(env->vfp.regs[rd + 1])
} };
union CRYPTO_STATE n = { .l = {
float64_val(env->vfp.regs[rn]),
float64_val(env->vfp.regs[rn + 1])
} };
union CRYPTO_STATE m = { .l = {
float64_val(env->vfp.regs[rm]),
float64_val(env->vfp.regs[rm + 1])
} };
if (op == 3) { /* sha1su0 */
d.l[0] ^= d.l[1] ^ m.l[0];
d.l[1] ^= n.l[0] ^ m.l[1];
} else {
int i;
for (i = 0; i < 4; i++) {
uint32_t t;
switch (op) {
case 0: /* sha1c */
t = cho(d.words[1], d.words[2], d.words[3]);
break;
case 1: /* sha1p */
t = par(d.words[1], d.words[2], d.words[3]);
break;
case 2: /* sha1m */
t = maj(d.words[1], d.words[2], d.words[3]);
break;
default:
g_assert_not_reached();
}
t += rol32(d.words[0], 5) + n.words[0] + m.words[i];
n.words[0] = d.words[3];
d.words[3] = d.words[2];
d.words[2] = ror32(d.words[1], 2);
d.words[1] = d.words[0];
d.words[0] = t;
}
}
env->vfp.regs[rd] = make_float64(d.l[0]);
env->vfp.regs[rd + 1] = make_float64(d.l[1]);
}
void HELPER(crypto_sha1h)(CPUARMState *env, uint32_t rd, uint32_t rm)
{
union CRYPTO_STATE m = { .l = {
float64_val(env->vfp.regs[rm]),
float64_val(env->vfp.regs[rm + 1])
} };
m.words[0] = ror32(m.words[0], 2);
m.words[1] = m.words[2] = m.words[3] = 0;
env->vfp.regs[rd] = make_float64(m.l[0]);
env->vfp.regs[rd + 1] = make_float64(m.l[1]);
}
void HELPER(crypto_sha1su1)(CPUARMState *env, uint32_t rd, uint32_t rm)
{
union CRYPTO_STATE d = { .l = {
float64_val(env->vfp.regs[rd]),
float64_val(env->vfp.regs[rd + 1])
} };
union CRYPTO_STATE m = { .l = {
float64_val(env->vfp.regs[rm]),
float64_val(env->vfp.regs[rm + 1])
} };
d.words[0] = rol32(d.words[0] ^ m.words[1], 1);
d.words[1] = rol32(d.words[1] ^ m.words[2], 1);
d.words[2] = rol32(d.words[2] ^ m.words[3], 1);
d.words[3] = rol32(d.words[3] ^ d.words[0], 1);
env->vfp.regs[rd] = make_float64(d.l[0]);
env->vfp.regs[rd + 1] = make_float64(d.l[1]);
}
/*
* The SHA-256 logical functions, according to
* http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
*/
static uint32_t S0(uint32_t x)
{
return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
}
static uint32_t S1(uint32_t x)
{
return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
}
static uint32_t s0(uint32_t x)
{
return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
}
static uint32_t s1(uint32_t x)
{
return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
}
void HELPER(crypto_sha256h)(CPUARMState *env, uint32_t rd, uint32_t rn,
uint32_t rm)
{
union CRYPTO_STATE d = { .l = {
float64_val(env->vfp.regs[rd]),
float64_val(env->vfp.regs[rd + 1])
} };
union CRYPTO_STATE n = { .l = {
float64_val(env->vfp.regs[rn]),
float64_val(env->vfp.regs[rn + 1])
} };
union CRYPTO_STATE m = { .l = {
float64_val(env->vfp.regs[rm]),
float64_val(env->vfp.regs[rm + 1])
} };
int i;
for (i = 0; i < 4; i++) {
uint32_t t = cho(n.words[0], n.words[1], n.words[2]) + n.words[3]
+ S1(n.words[0]) + m.words[i];
n.words[3] = n.words[2];
n.words[2] = n.words[1];
n.words[1] = n.words[0];
n.words[0] = d.words[3] + t;
t += maj(d.words[0], d.words[1], d.words[2]) + S0(d.words[0]);
d.words[3] = d.words[2];
d.words[2] = d.words[1];
d.words[1] = d.words[0];
d.words[0] = t;
}
env->vfp.regs[rd] = make_float64(d.l[0]);
env->vfp.regs[rd + 1] = make_float64(d.l[1]);
}
void HELPER(crypto_sha256h2)(CPUARMState *env, uint32_t rd, uint32_t rn,
uint32_t rm)
{
union CRYPTO_STATE d = { .l = {
float64_val(env->vfp.regs[rd]),
float64_val(env->vfp.regs[rd + 1])
} };
union CRYPTO_STATE n = { .l = {
float64_val(env->vfp.regs[rn]),
float64_val(env->vfp.regs[rn + 1])
} };
union CRYPTO_STATE m = { .l = {
float64_val(env->vfp.regs[rm]),
float64_val(env->vfp.regs[rm + 1])
} };
int i;
for (i = 0; i < 4; i++) {
uint32_t t = cho(d.words[0], d.words[1], d.words[2]) + d.words[3]
+ S1(d.words[0]) + m.words[i];
d.words[3] = d.words[2];
d.words[2] = d.words[1];
d.words[1] = d.words[0];
d.words[0] = n.words[3 - i] + t;
}
env->vfp.regs[rd] = make_float64(d.l[0]);
env->vfp.regs[rd + 1] = make_float64(d.l[1]);
}
void HELPER(crypto_sha256su0)(CPUARMState *env, uint32_t rd, uint32_t rm)
{
union CRYPTO_STATE d = { .l = {
float64_val(env->vfp.regs[rd]),
float64_val(env->vfp.regs[rd + 1])
} };
union CRYPTO_STATE m = { .l = {
float64_val(env->vfp.regs[rm]),
float64_val(env->vfp.regs[rm + 1])
} };
d.words[0] += s0(d.words[1]);
d.words[1] += s0(d.words[2]);
d.words[2] += s0(d.words[3]);
d.words[3] += s0(m.words[0]);
env->vfp.regs[rd] = make_float64(d.l[0]);
env->vfp.regs[rd + 1] = make_float64(d.l[1]);
}
void HELPER(crypto_sha256su1)(CPUARMState *env, uint32_t rd, uint32_t rn,
uint32_t rm)
{
union CRYPTO_STATE d = { .l = {
float64_val(env->vfp.regs[rd]),
float64_val(env->vfp.regs[rd + 1])
} };
union CRYPTO_STATE n = { .l = {
float64_val(env->vfp.regs[rn]),
float64_val(env->vfp.regs[rn + 1])
} };
union CRYPTO_STATE m = { .l = {
float64_val(env->vfp.regs[rm]),
float64_val(env->vfp.regs[rm + 1])
} };
d.words[0] += s1(m.words[2]) + n.words[1];
d.words[1] += s1(m.words[3]) + n.words[2];
d.words[2] += s1(d.words[0]) + n.words[3];
d.words[3] += s1(d.words[1]) + m.words[0];
env->vfp.regs[rd] = make_float64(d.l[0]);
env->vfp.regs[rd + 1] = make_float64(d.l[1]);
}
......@@ -24,6 +24,8 @@
#include "sysemu/sysemu.h"
#include "qemu/bitops.h"
#include "internals.h"
#include "qemu/crc32c.h"
#include <zlib.h> /* For crc32 */
/* C2.4.7 Multiply and divide */
/* special cases for 0 and LLONG_MIN are mandated by the standard */
......@@ -186,36 +188,6 @@ uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
return result;
}
/* Helper function for 64 bit polynomial multiply case:
* perform PolynomialMult(op1, op2) and return either the top or
* bottom half of the 128 bit result.
*/
uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
{
int bitnum;
uint64_t res = 0;
for (bitnum = 0; bitnum < 64; bitnum++) {
if (op1 & (1ULL << bitnum)) {
res ^= op2 << bitnum;
}
}
return res;
}
uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
{
int bitnum;
uint64_t res = 0;
/* bit 0 of op1 can't influence the high 64 bits at all */
for (bitnum = 1; bitnum < 64; bitnum++) {
if (op1 & (1ULL << bitnum)) {
res ^= op2 >> (64 - bitnum);
}
}
return res;
}
/* 64bit/double versions of the neon float compare functions */
uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
{
......@@ -438,6 +410,34 @@ float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env)
return r;
}
/* 64-bit versions of the CRC helpers. Note that although the operation
* (and the prototypes of crc32c() and crc32() mean that only the bottom
* 32 bits of the accumulator and result are used, we pass and return
* uint64_t for convenience of the generated code. Unlike the 32-bit
* instruction set versions, val may genuinely have 64 bits of data in it.
* The upper bytes of val (above the number specified by 'bytes') must have
* been zeroed out by the caller.
*/
uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes)
{
uint8_t buf[8];
stq_le_p(buf, val);
/* zlib crc32 converts the accumulator and output to one's complement. */
return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
}
uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
{
uint8_t buf[8];
stq_le_p(buf, val);
/* Linux crc32c converts the output to one's complement. */
return crc32c(acc, buf, bytes) ^ 0xffffffff;
}
/* Handle a CPU exception. */
void aarch64_cpu_do_interrupt(CPUState *cs)
{
......
......@@ -28,8 +28,6 @@ DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
......@@ -46,3 +44,5 @@ DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env)
DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
......@@ -319,7 +319,7 @@ static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
{
ARMCPU *cpu = arm_env_get_cpu(env);
env->cp15.c3 = value;
raw_write(env, ri, value);
tlb_flush(CPU(cpu), 1); /* Flush TLB as domain not tracked in TLB */
}
......@@ -327,12 +327,12 @@ static void fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
{
ARMCPU *cpu = arm_env_get_cpu(env);
if (env->cp15.c13_fcse != value) {
if (raw_read(env, ri) != value) {
/* Unlike real hardware the qemu TLB uses virtual addresses,
* not modified virtual addresses, so this causes a TLB flush.
*/
tlb_flush(CPU(cpu), 1);
env->cp15.c13_fcse = value;
raw_write(env, ri, value);
}
}
......@@ -341,7 +341,7 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri,
{
ARMCPU *cpu = arm_env_get_cpu(env);
if (env->cp15.contextidr_el1 != value && !arm_feature(env, ARM_FEATURE_MPU)
if (raw_read(env, ri) != value && !arm_feature(env, ARM_FEATURE_MPU)
&& !extended_addresses_enabled(env)) {
/* For VMSA (when not using the LPAE long descriptor page table
* format) this register includes the ASID, so do a TLB flush.
......@@ -349,7 +349,7 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri,
*/
tlb_flush(CPU(cpu), 1);
}
env->cp15.contextidr_el1 = value;
raw_write(env, ri, value);
}
static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
......@@ -693,7 +693,7 @@ static uint64_t ccsidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
static void csselr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
env->cp15.c0_cssel = value & 0xf;
raw_write(env, ri, value & 0xf);
}
static uint64_t isr_read(CPUARMState *env, const ARMCPRegInfo *ri)
......@@ -1040,16 +1040,16 @@ static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
int timeridx = ri->crm & 1;
uint32_t oldval = env->cp15.c14_timer[timeridx].ctl;
env->cp15.c14_timer[timeridx].ctl = value & 3;
env->cp15.c14_timer[timeridx].ctl = deposit64(oldval, 0, 2, value);
if ((oldval ^ value) & 1) {
/* Enable toggled */
gt_recalc_timer(cpu, timeridx);
} else if ((oldval & value) & 2) {
} else if ((oldval ^ value) & 2) {
/* IMASK toggled: don't need to recalculate,
* just set the interrupt line based on ISTATUS
*/
qemu_set_irq(cpu->gt_timer_outputs[timeridx],
(oldval & 4) && (value & 2));
(oldval & 4) && !(value & 2));
}
}
......@@ -1216,11 +1216,11 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
static void par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
{
if (arm_feature(env, ARM_FEATURE_LPAE)) {
env->cp15.par_el1 = value;
raw_write(env, ri, value);
} else if (arm_feature(env, ARM_FEATURE_V7)) {
env->cp15.par_el1 = value & 0xfffff6ff;
raw_write(env, ri, value & 0xfffff6ff);
} else {
env->cp15.par_el1 = value & 0xfffff1ff;
raw_write(env, ri, value & 0xfffff1ff);
}
}
......@@ -1423,7 +1423,7 @@ static void vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
* for long-descriptor tables the TTBCR fields are used differently
* and the c2_mask and c2_base_mask values are meaningless.
*/
env->cp15.c2_control = value;
raw_write(env, ri, value);
env->cp15.c2_mask = ~(((uint32_t)0xffffffffu) >> maskshift);
env->cp15.c2_base_mask = ~((uint32_t)0x3fffu >> maskshift);
}
......@@ -1445,7 +1445,7 @@ static void vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
static void vmsa_ttbcr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
{
env->cp15.c2_base_mask = 0xffffc000u;
env->cp15.c2_control = 0;
raw_write(env, ri, 0);
env->cp15.c2_mask = 0;
}
......@@ -1456,7 +1456,7 @@ static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri,
/* For AArch64 the A1 bit could result in a change of ASID, so TLB flush. */
tlb_flush(CPU(cpu), 1);
env->cp15.c2_control = value;
raw_write(env, ri, value);
}
static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
......@@ -2151,14 +2151,14 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri,
{
ARMCPU *cpu = arm_env_get_cpu(env);
if (env->cp15.c1_sys == value) {
if (raw_read(env, ri) == value) {
/* Skip the TLB flush if nothing actually changed; Linux likes
* to do a lot of pointless SCTLR writes.
*/
return;
}
env->cp15.c1_sys = value;
raw_write(env, ri, value);
/* ??? Lots of these bits are not implemented. */
/* This may enable/disable the MMU, so do a TLB flush. */
tlb_flush(CPU(cpu), 1);
......@@ -3929,13 +3929,8 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address,
page_size = (1 << ((granule_sz * (4 - level)) + 3));
descaddr |= (address & (page_size - 1));
/* Extract attributes from the descriptor and merge with table attrs */
if (arm_feature(env, ARM_FEATURE_V8)) {
attrs = extract64(descriptor, 2, 10)
| (extract64(descriptor, 53, 11) << 10);
} else {
attrs = extract64(descriptor, 2, 10)
| (extract64(descriptor, 52, 12) << 10);
}
attrs = extract64(descriptor, 2, 10)
| (extract64(descriptor, 52, 12) << 10);
attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */
attrs |= extract32(tableattrs, 3, 1) << 5; /* APTable[1] => AP[2] */
/* The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1
......@@ -3961,8 +3956,12 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address,
goto do_fault;
}
*prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
if (attrs & (1 << 12) || (!is_user && (attrs & (1 << 11)))) {
/* XN or PXN */
if ((arm_feature(env, ARM_FEATURE_V8) && is_user && (attrs & (1 << 12))) ||
(!arm_feature(env, ARM_FEATURE_V8) && (attrs & (1 << 12))) ||
(!is_user && (attrs & (1 << 11)))) {
/* XN/UXN or PXN. Since we only implement EL0/EL1 we unconditionally
* treat XN/UXN as UXN for v8.
*/
if (access_type == 2) {
goto do_fault;
}
......@@ -5560,28 +5559,15 @@ int arm_rmode_to_sf(int rmode)
return rmode;
}
static void crc_init_buffer(uint8_t *buf, uint32_t val, uint32_t bytes)
{
memset(buf, 0, 4);
if (bytes == 1) {
buf[0] = val & 0xff;
} else if (bytes == 2) {
buf[0] = val & 0xff;
buf[1] = (val >> 8) & 0xff;
} else {
buf[0] = val & 0xff;
buf[1] = (val >> 8) & 0xff;
buf[2] = (val >> 16) & 0xff;
buf[3] = (val >> 24) & 0xff;
}
}
/* CRC helpers.
* The upper bytes of val (above the number specified by 'bytes') must have
* been zeroed out by the caller.
*/
uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes)
{
uint8_t buf[4];
crc_init_buffer(buf, val, bytes);
stl_le_p(buf, val);
/* zlib crc32 converts the accumulator and output to one's complement. */
return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
......@@ -5591,7 +5577,7 @@ uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes)
{
uint8_t buf[4];
crc_init_buffer(buf, val, bytes);
stl_le_p(buf, val);
/* Linux crc32c converts the output to one's complement. */
return crc32c(acc, buf, bytes) ^ 0xffffffff;
......
......@@ -456,8 +456,6 @@ DEF_HELPER_3(iwmmxt_avgb1, i64, env, i64, i64)
DEF_HELPER_3(iwmmxt_avgw0, i64, env, i64, i64)
DEF_HELPER_3(iwmmxt_avgw1, i64, env, i64, i64)
DEF_HELPER_2(iwmmxt_msadb, i64, i64, i64)
DEF_HELPER_3(iwmmxt_align, i64, i64, i64, i32)
DEF_HELPER_4(iwmmxt_insr, i64, i64, i32, i32, i32)
......@@ -512,10 +510,22 @@ DEF_HELPER_3(neon_qzip32, void, env, i32, i32)
DEF_HELPER_4(crypto_aese, void, env, i32, i32, i32)
DEF_HELPER_4(crypto_aesmc, void, env, i32, i32, i32)
DEF_HELPER_5(crypto_sha1_3reg, void, env, i32, i32, i32, i32)
DEF_HELPER_3(crypto_sha1h, void, env, i32, i32)
DEF_HELPER_3(crypto_sha1su1, void, env, i32, i32)
DEF_HELPER_4(crypto_sha256h, void, env, i32, i32, i32)
DEF_HELPER_4(crypto_sha256h2, void, env, i32, i32, i32)
DEF_HELPER_3(crypto_sha256su0, void, env, i32, i32)
DEF_HELPER_4(crypto_sha256su1, void, env, i32, i32, i32)
DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_2(dc_zva, void, env, i64)
DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#endif
......@@ -369,15 +369,6 @@ IWMMXT_OP_AVGW(1)
#undef IWMMXT_OP_AVGW
#undef AVGW
uint64_t HELPER(iwmmxt_msadb)(uint64_t a, uint64_t b)
{
a = ((((a >> 0 ) & 0xffff) * ((b >> 0) & 0xffff) +
((a >> 16) & 0xffff) * ((b >> 16) & 0xffff)) & 0xffffffff) |
((((a >> 32) & 0xffff) * ((b >> 32) & 0xffff) +
((a >> 48) & 0xffff) * ((b >> 48) & 0xffff)) << 32);
return a;
}
uint64_t HELPER(iwmmxt_align)(uint64_t a, uint64_t b, uint32_t n)
{
a >>= n << 3;
......
......@@ -2211,3 +2211,33 @@ void HELPER(neon_zip16)(CPUARMState *env, uint32_t rd, uint32_t rm)
env->vfp.regs[rm] = make_float64(m0);
env->vfp.regs[rd] = make_float64(d0);
}
/* Helper function for 64 bit polynomial multiply case:
* perform PolynomialMult(op1, op2) and return either the top or
* bottom half of the 128 bit result.
*/
uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
{
int bitnum;
uint64_t res = 0;
for (bitnum = 0; bitnum < 64; bitnum++) {
if (op1 & (1ULL << bitnum)) {
res ^= op2 << bitnum;
}
}
return res;
}
uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
{
int bitnum;
uint64_t res = 0;
/* bit 0 of op1 can't influence the high 64 bits at all */
for (bitnum = 1; bitnum < 64; bitnum++) {
if (op1 & (1ULL << bitnum)) {
res ^= op2 >> (64 - bitnum);
}
}
return res;
}
......@@ -85,6 +85,8 @@ typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
/* initialize TCG globals. */
void a64_translate_init(void)
......@@ -3774,6 +3776,54 @@ static void handle_shift_reg(DisasContext *s,
tcg_temp_free_i64(tcg_shift);
}
/* CRC32[BHWX], CRC32C[BHWX] */
static void handle_crc32(DisasContext *s,
unsigned int sf, unsigned int sz, bool crc32c,
unsigned int rm, unsigned int rn, unsigned int rd)
{
TCGv_i64 tcg_acc, tcg_val;
TCGv_i32 tcg_bytes;
if (!arm_dc_feature(s, ARM_FEATURE_CRC)
|| (sf == 1 && sz != 3)
|| (sf == 0 && sz == 3)) {
unallocated_encoding(s);
return;
}
if (sz == 3) {
tcg_val = cpu_reg(s, rm);
} else {
uint64_t mask;
switch (sz) {
case 0:
mask = 0xFF;
break;
case 1:
mask = 0xFFFF;
break;
case 2:
mask = 0xFFFFFFFF;
break;
default:
g_assert_not_reached();
}
tcg_val = new_tmp_a64(s);
tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
}
tcg_acc = cpu_reg(s, rn);
tcg_bytes = tcg_const_i32(1 << sz);
if (crc32c) {
gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
} else {
gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
}
tcg_temp_free_i32(tcg_bytes);
}
/* C3.5.8 Data-processing (2 source)
* 31 30 29 28 21 20 16 15 10 9 5 4 0
* +----+---+---+-----------------+------+--------+------+------+
......@@ -3821,8 +3871,12 @@ static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
case 21:
case 22:
case 23: /* CRC32 */
unsupported_encoding(s, insn);
{
int sz = extract32(opcode, 0, 2);
bool crc32c = extract32(opcode, 2, 1);
handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
break;
}
default:
unallocated_encoding(s);
break;
......@@ -8574,7 +8628,7 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
return;
}
if (size == 3) {
if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)) {
if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
unallocated_encoding(s);
return;
}
......@@ -10497,7 +10551,55 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
*/
static void disas_crypto_aes(DisasContext *s, uint32_t insn)
{
unsupported_encoding(s, insn);
int size = extract32(insn, 22, 2);
int opcode = extract32(insn, 12, 5);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
int decrypt;
TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
CryptoThreeOpEnvFn *genfn;
if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
|| size != 0) {
unallocated_encoding(s);
return;
}
switch (opcode) {
case 0x4: /* AESE */
decrypt = 0;
genfn = gen_helper_crypto_aese;
break;
case 0x6: /* AESMC */
decrypt = 0;
genfn = gen_helper_crypto_aesmc;
break;
case 0x5: /* AESD */
decrypt = 1;
genfn = gen_helper_crypto_aese;
break;
case 0x7: /* AESIMC */
decrypt = 1;
genfn = gen_helper_crypto_aesmc;
break;
default:
unallocated_encoding(s);
return;
}
/* Note that we convert the Vx register indexes into the
* index within the vfp.regs[] array, so we can share the
* helper with the AArch32 instructions.
*/
tcg_rd_regno = tcg_const_i32(rd << 1);
tcg_rn_regno = tcg_const_i32(rn << 1);
tcg_decrypt = tcg_const_i32(decrypt);
genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
tcg_temp_free_i32(tcg_rd_regno);
tcg_temp_free_i32(tcg_rn_regno);
tcg_temp_free_i32(tcg_decrypt);
}
/* C3.6.20 Crypto three-reg SHA
......@@ -10508,7 +10610,64 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn)
*/
static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
{
unsupported_encoding(s, insn);
int size = extract32(insn, 22, 2);
int opcode = extract32(insn, 12, 3);
int rm = extract32(insn, 16, 5);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
CryptoThreeOpEnvFn *genfn;
TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
int feature = ARM_FEATURE_V8_SHA256;
if (size != 0) {
unallocated_encoding(s);
return;
}
switch (opcode) {
case 0: /* SHA1C */
case 1: /* SHA1P */
case 2: /* SHA1M */
case 3: /* SHA1SU0 */
genfn = NULL;
feature = ARM_FEATURE_V8_SHA1;
break;
case 4: /* SHA256H */
genfn = gen_helper_crypto_sha256h;
break;
case 5: /* SHA256H2 */
genfn = gen_helper_crypto_sha256h2;
break;
case 6: /* SHA256SU1 */
genfn = gen_helper_crypto_sha256su1;
break;
default:
unallocated_encoding(s);
return;
}
if (!arm_dc_feature(s, feature)) {
unallocated_encoding(s);
return;
}
tcg_rd_regno = tcg_const_i32(rd << 1);
tcg_rn_regno = tcg_const_i32(rn << 1);
tcg_rm_regno = tcg_const_i32(rm << 1);
if (genfn) {
genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
} else {
TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
tcg_rn_regno, tcg_rm_regno, tcg_opcode);
tcg_temp_free_i32(tcg_opcode);
}
tcg_temp_free_i32(tcg_rd_regno);
tcg_temp_free_i32(tcg_rn_regno);
tcg_temp_free_i32(tcg_rm_regno);
}
/* C3.6.21 Crypto two-reg SHA
......@@ -10519,7 +10678,49 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
*/
static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
{
unsupported_encoding(s, insn);
int size = extract32(insn, 22, 2);
int opcode = extract32(insn, 12, 5);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
CryptoTwoOpEnvFn *genfn;
int feature;
TCGv_i32 tcg_rd_regno, tcg_rn_regno;
if (size != 0) {
unallocated_encoding(s);
return;
}
switch (opcode) {
case 0: /* SHA1H */
feature = ARM_FEATURE_V8_SHA1;
genfn = gen_helper_crypto_sha1h;
break;
case 1: /* SHA1SU1 */
feature = ARM_FEATURE_V8_SHA1;
genfn = gen_helper_crypto_sha1su1;
break;
case 2: /* SHA256SU0 */
feature = ARM_FEATURE_V8_SHA256;
genfn = gen_helper_crypto_sha256su0;
break;
default:
unallocated_encoding(s);
return;
}
if (!arm_dc_feature(s, feature)) {
unallocated_encoding(s);
return;
}
tcg_rd_regno = tcg_const_i32(rd << 1);
tcg_rn_regno = tcg_const_i32(rn << 1);
genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
tcg_temp_free_i32(tcg_rd_regno);
tcg_temp_free_i32(tcg_rn_regno);
}
/* C3.6 Data processing - SIMD, inc Crypto
......
......@@ -1382,8 +1382,6 @@ IWMMXT_OP_ENV(avgb1)
IWMMXT_OP_ENV(avgw0)
IWMMXT_OP_ENV(avgw1)
IWMMXT_OP(msadb)
IWMMXT_OP_ENV(packuw)
IWMMXT_OP_ENV(packul)
IWMMXT_OP_ENV(packuq)
......@@ -4776,6 +4774,7 @@ static void gen_neon_narrow_op(int op, int u, int size,
#define NEON_3R_VPMIN 21
#define NEON_3R_VQDMULH_VQRDMULH 22
#define NEON_3R_VPADD 23
#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
#define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */
#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
......@@ -4809,6 +4808,7 @@ static const uint8_t neon_3r_sizes[] = {
[NEON_3R_VPMIN] = 0x7,
[NEON_3R_VQDMULH_VQRDMULH] = 0x6,
[NEON_3R_VPADD] = 0x7,
[NEON_3R_SHA] = 0xf, /* size field encodes op type */
[NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */
[NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
[NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
......@@ -4842,6 +4842,7 @@ static const uint8_t neon_3r_sizes[] = {
#define NEON_2RM_VCEQ0 18
#define NEON_2RM_VCLE0 19
#define NEON_2RM_VCLT0 20
#define NEON_2RM_SHA1H 21
#define NEON_2RM_VABS 22
#define NEON_2RM_VNEG 23
#define NEON_2RM_VCGT0_F 24
......@@ -4858,6 +4859,7 @@ static const uint8_t neon_3r_sizes[] = {
#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
#define NEON_2RM_VSHLL 38
#define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
#define NEON_2RM_VRINTN 40
#define NEON_2RM_VRINTX 41
#define NEON_2RM_VRINTA 42
......@@ -4918,6 +4920,7 @@ static const uint8_t neon_2rm_sizes[] = {
[NEON_2RM_VCEQ0] = 0x7,
[NEON_2RM_VCLE0] = 0x7,
[NEON_2RM_VCLT0] = 0x7,
[NEON_2RM_SHA1H] = 0x4,
[NEON_2RM_VABS] = 0x7,
[NEON_2RM_VNEG] = 0x7,
[NEON_2RM_VCGT0_F] = 0x4,
......@@ -4934,6 +4937,7 @@ static const uint8_t neon_2rm_sizes[] = {
[NEON_2RM_VMOVN] = 0x7,
[NEON_2RM_VQMOVN] = 0x7,
[NEON_2RM_VSHLL] = 0x7,
[NEON_2RM_SHA1SU1] = 0x4,
[NEON_2RM_VRINTN] = 0x4,
[NEON_2RM_VRINTX] = 0x4,
[NEON_2RM_VRINTA] = 0x4,
......@@ -5011,6 +5015,49 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
if (q && ((rd | rn | rm) & 1)) {
return 1;
}
/*
* The SHA-1/SHA-256 3-register instructions require special treatment
* here, as their size field is overloaded as an op type selector, and
* they all consume their input in a single pass.
*/
if (op == NEON_3R_SHA) {
if (!q) {
return 1;
}
if (!u) { /* SHA-1 */
if (!arm_feature(env, ARM_FEATURE_V8_SHA1)) {
return 1;
}
tmp = tcg_const_i32(rd);
tmp2 = tcg_const_i32(rn);
tmp3 = tcg_const_i32(rm);
tmp4 = tcg_const_i32(size);
gen_helper_crypto_sha1_3reg(cpu_env, tmp, tmp2, tmp3, tmp4);
tcg_temp_free_i32(tmp4);
} else { /* SHA-256 */
if (!arm_feature(env, ARM_FEATURE_V8_SHA256) || size == 3) {
return 1;
}
tmp = tcg_const_i32(rd);
tmp2 = tcg_const_i32(rn);
tmp3 = tcg_const_i32(rm);
switch (size) {
case 0:
gen_helper_crypto_sha256h(cpu_env, tmp, tmp2, tmp3);
break;
case 1:
gen_helper_crypto_sha256h2(cpu_env, tmp, tmp2, tmp3);
break;
case 2:
gen_helper_crypto_sha256su1(cpu_env, tmp, tmp2, tmp3);
break;
}
}
tcg_temp_free_i32(tmp);
tcg_temp_free_i32(tmp2);
tcg_temp_free_i32(tmp3);
return 0;
}
if (size == 3 && op != NEON_3R_LOGIC) {
/* 64-bit element instructions. */
for (pass = 0; pass < (q ? 2 : 1); pass++) {
......@@ -5905,10 +5952,11 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
int src1_wide;
int src2_wide;
int prewiden;
/* undefreq: bit 0 : UNDEF if size != 0
* bit 1 : UNDEF if size == 0
* bit 2 : UNDEF if U == 1
* Note that [1:0] set implies 'always UNDEF'
/* undefreq: bit 0 : UNDEF if size == 0
* bit 1 : UNDEF if size == 1
* bit 2 : UNDEF if size == 2
* bit 3 : UNDEF if U == 1
* Note that [2:0] set implies 'always UNDEF'
*/
int undefreq;
/* prewiden, src1_wide, src2_wide, undefreq */
......@@ -5922,13 +5970,13 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
{0, 1, 1, 0}, /* VSUBHN */
{0, 0, 0, 0}, /* VABDL */
{0, 0, 0, 0}, /* VMLAL */
{0, 0, 0, 6}, /* VQDMLAL */
{0, 0, 0, 9}, /* VQDMLAL */
{0, 0, 0, 0}, /* VMLSL */
{0, 0, 0, 6}, /* VQDMLSL */
{0, 0, 0, 9}, /* VQDMLSL */
{0, 0, 0, 0}, /* Integer VMULL */
{0, 0, 0, 2}, /* VQDMULL */
{0, 0, 0, 5}, /* Polynomial VMULL */
{0, 0, 0, 3}, /* Reserved: always UNDEF */
{0, 0, 0, 1}, /* VQDMULL */
{0, 0, 0, 0xa}, /* Polynomial VMULL */
{0, 0, 0, 7}, /* Reserved: always UNDEF */
};
prewiden = neon_3reg_wide[op][0];
......@@ -5936,9 +5984,8 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
src2_wide = neon_3reg_wide[op][2];
undefreq = neon_3reg_wide[op][3];
if (((undefreq & 1) && (size != 0)) ||
((undefreq & 2) && (size == 0)) ||
((undefreq & 4) && u)) {
if ((undefreq & (1 << size)) ||
((undefreq & 8) && u)) {
return 1;
}
if ((src1_wide && (rn & 1)) ||
......@@ -5947,6 +5994,30 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
return 1;
}
/* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
* outside the loop below as it only performs a single pass.
*/
if (op == 14 && size == 2) {
TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
if (!arm_feature(env, ARM_FEATURE_V8_PMULL)) {
return 1;
}
tcg_rn = tcg_temp_new_i64();
tcg_rm = tcg_temp_new_i64();
tcg_rd = tcg_temp_new_i64();
neon_load_reg64(tcg_rn, rn);
neon_load_reg64(tcg_rm, rm);
gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
neon_store_reg64(tcg_rd, rd);
gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
neon_store_reg64(tcg_rd, rd + 1);
tcg_temp_free_i64(tcg_rn);
tcg_temp_free_i64(tcg_rm);
tcg_temp_free_i64(tcg_rd);
return 0;
}
/* Avoid overlapping operands. Wide source operands are
always aligned so will never overlap with wide
destinations in problematic ways. */
......@@ -6486,6 +6557,41 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
tcg_temp_free_i32(tmp2);
tcg_temp_free_i32(tmp3);
break;
case NEON_2RM_SHA1H:
if (!arm_feature(env, ARM_FEATURE_V8_SHA1)
|| ((rm | rd) & 1)) {
return 1;
}
tmp = tcg_const_i32(rd);
tmp2 = tcg_const_i32(rm);
gen_helper_crypto_sha1h(cpu_env, tmp, tmp2);
tcg_temp_free_i32(tmp);
tcg_temp_free_i32(tmp2);
break;
case NEON_2RM_SHA1SU1:
if ((rm | rd) & 1) {
return 1;
}
/* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
if (q) {
if (!arm_feature(env, ARM_FEATURE_V8_SHA256)) {
return 1;
}
} else if (!arm_feature(env, ARM_FEATURE_V8_SHA1)) {
return 1;
}
tmp = tcg_const_i32(rd);
tmp2 = tcg_const_i32(rm);
if (q) {
gen_helper_crypto_sha256su0(cpu_env, tmp, tmp2);
} else {
gen_helper_crypto_sha1su1(cpu_env, tmp, tmp2);
}
tcg_temp_free_i32(tmp);
tcg_temp_free_i32(tmp2);
break;
default:
elementwise:
for (pass = 0; pass < (q ? 4 : 2); pass++) {
......@@ -7698,6 +7804,11 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
tmp = load_reg(s, rn);
tmp2 = load_reg(s, rm);
if (op1 == 0) {
tcg_gen_andi_i32(tmp2, tmp2, 0xff);
} else if (op1 == 1) {
tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
}
tmp3 = tcg_const_i32(1 << op1);
if (c & 0x2) {
gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
......@@ -9330,6 +9441,11 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
}
tmp2 = load_reg(s, rm);
if (sz == 0) {
tcg_gen_andi_i32(tmp2, tmp2, 0xff);
} else if (sz == 1) {
tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
}
tmp3 = tcg_const_i32(1 << sz);
if (c) {
gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册