未验证 提交 e56b35ca 编写于 作者: F Fan Yang 提交者: GitHub

Support PolynomialMultiplyWideningLower and PolynomialMultiplyWideningUpper (#48525)

上级 875c0d90
......@@ -8185,7 +8185,7 @@ parse_cpu_features (const gchar *attr)
else if (!strcmp (attr + prefix, "crc"))
feature = MONO_CPU_ARM64_CRC;
else if (!strcmp (attr + prefix, "simd"))
feature = MONO_CPU_ARM64_ADVSIMD;
feature = MONO_CPU_ARM64_NEON;
#elif defined(TARGET_WASM)
if (!strcmp (attr + prefix, "simd"))
feature = MONO_CPU_WASM_SIMD;
......
......@@ -277,6 +277,7 @@ INTRINS(AARCH64_SHA256SU0, aarch64_crypto_sha256su0)
INTRINS(AARCH64_SHA256SU1, aarch64_crypto_sha256su1)
INTRINS(AARCH64_SHA256H, aarch64_crypto_sha256h)
INTRINS(AARCH64_SHA256H2, aarch64_crypto_sha256h2)
INTRINS(AARCH64_PMULL64, aarch64_neon_pmull64)
INTRINS_OVR(AARCH64_ADV_SIMD_ABS_FLOAT, fabs, sse_r4_t)
INTRINS_OVR(AARCH64_ADV_SIMD_ABS_DOUBLE, fabs, sse_r8_t)
INTRINS_OVR(AARCH64_ADV_SIMD_ABS_INT8, aarch64_neon_abs, sse_i1_t)
......
......@@ -9057,7 +9057,8 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb)
case OP_XOP_I4_I4_I4:
case OP_XOP_I4_I4_I8: {
IntrinsicId id = (IntrinsicId)0;
gboolean zext_last = FALSE;
gboolean zext_last = FALSE, bitcast_result = FALSE, getElement = FALSE;
int element_idx = -1;
switch (ins->inst_c0) {
case SIMD_OP_ARM64_CRC32B: id = INTRINS_AARCH64_CRC32B; zext_last = TRUE; break;
case SIMD_OP_ARM64_CRC32H: id = INTRINS_AARCH64_CRC32H; zext_last = TRUE; break;
......@@ -9079,32 +9080,50 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb)
case SIMD_OP_ARM64_DABSOLUTE_COMPARE_LESS_THAN: id = INTRINS_AARCH64_ADV_SIMD_ABS_COMPARE_LT_DOUBLE; break;
case SIMD_OP_ARM64_FABSOLUTE_COMPARE_LESS_THAN_OR_EQUAL: id = INTRINS_AARCH64_ADV_SIMD_ABS_COMPARE_LTE_FLOAT; break;
case SIMD_OP_ARM64_DABSOLUTE_COMPARE_LESS_THAN_OR_EQUAL: id = INTRINS_AARCH64_ADV_SIMD_ABS_COMPARE_LTE_DOUBLE; break;
case SIMD_OP_ARM64_PMULL64_LOWER:
id = INTRINS_AARCH64_PMULL64;
getElement = TRUE;
element_idx = 0;
bitcast_result = TRUE;
break;
case SIMD_OP_ARM64_PMULL64_UPPER:
id = INTRINS_AARCH64_PMULL64;
getElement = TRUE;
element_idx = 1;
bitcast_result = TRUE;
break;
default: g_assert_not_reached (); break;
}
LLVMValueRef arg1 = rhs;
if (zext_last)
arg1 = LLVMBuildZExt (ctx->builder, arg1, LLVMInt32Type (), "");
LLVMValueRef args [] = { lhs, arg1 };
if (getElement) {
args [0] = LLVMBuildExtractElement (ctx->builder, args [0], const_int32 (element_idx), "");
args [1] = LLVMBuildExtractElement (ctx->builder, args [1], const_int32 (element_idx), "");
}
values [ins->dreg] = call_intrins (ctx, id, args, "");
if (bitcast_result)
values [ins->dreg] = convert (ctx, values [ins->dreg], LLVMVectorType (LLVMInt64Type (), 2));
break;
}
case OP_XOP_X_X_X_X: {
IntrinsicId id = (IntrinsicId)0;
gboolean getLowerElement = FALSE;
int idx = -1;
int arg_idx = -1;
switch (ins->inst_c0) {
case SIMD_OP_ARM64_SHA1SU0: id = INTRINS_AARCH64_SHA1SU0; break;
case SIMD_OP_ARM64_SHA256H: id = INTRINS_AARCH64_SHA256H; break;
case SIMD_OP_ARM64_SHA256H2: id = INTRINS_AARCH64_SHA256H2; break;
case SIMD_OP_ARM64_SHA256SU1: id = INTRINS_AARCH64_SHA256SU1; break;
case SIMD_OP_ARM64_SHA1C: id = INTRINS_AARCH64_SHA1C; getLowerElement = TRUE; idx = 1; break;
case SIMD_OP_ARM64_SHA1M: id = INTRINS_AARCH64_SHA1M; getLowerElement = TRUE; idx = 1; break;
case SIMD_OP_ARM64_SHA1P: id = INTRINS_AARCH64_SHA1P; getLowerElement = TRUE; idx = 1; break;
case SIMD_OP_ARM64_SHA1C: id = INTRINS_AARCH64_SHA1C; getLowerElement = TRUE; arg_idx = 1; break;
case SIMD_OP_ARM64_SHA1M: id = INTRINS_AARCH64_SHA1M; getLowerElement = TRUE; arg_idx = 1; break;
case SIMD_OP_ARM64_SHA1P: id = INTRINS_AARCH64_SHA1P; getLowerElement = TRUE; arg_idx = 1; break;
default: g_assert_not_reached (); break;
}
LLVMValueRef args [] = { lhs, rhs, arg3 };
if (getLowerElement)
args [idx] = LLVMBuildExtractElement (ctx->builder, args [idx], const_int32 (0), "");
args [arg_idx] = LLVMBuildExtractElement (ctx->builder, args [arg_idx], const_int32 (0), "");
values [ins->dreg] = call_intrins (ctx, id, args, "");
break;
}
......@@ -11888,7 +11907,7 @@ MonoCPUFeatures mono_llvm_get_cpu_features (void)
#if defined(TARGET_ARM64)
{ "crc", MONO_CPU_ARM64_CRC },
{ "crypto", MONO_CPU_ARM64_CRYPTO },
{ "neon", MONO_CPU_ARM64_ADVSIMD }
{ "neon", MONO_CPU_ARM64_NEON }
#endif
#if defined(TARGET_WASM)
{ "simd", MONO_CPU_WASM_SIMD },
......
......@@ -2852,7 +2852,7 @@ typedef enum {
MONO_CPU_ARM64_BASE = 1 << 1,
MONO_CPU_ARM64_CRC = 1 << 2,
MONO_CPU_ARM64_CRYPTO = 1 << 3,
MONO_CPU_ARM64_ADVSIMD = 1 << 4,
MONO_CPU_ARM64_NEON = 1 << 4,
#endif
} MonoCPUFeatures;
......@@ -2997,7 +2997,9 @@ typedef enum {
SIMD_OP_ARM64_SHA256H,
SIMD_OP_ARM64_SHA256H2,
SIMD_OP_ARM64_SHA256SU0,
SIMD_OP_ARM64_SHA256SU1
SIMD_OP_ARM64_SHA256SU1,
SIMD_OP_ARM64_PMULL64_LOWER,
SIMD_OP_ARM64_PMULL64_UPPER
} SimdOp;
const char *mono_arch_xregname (int reg);
......
......@@ -841,6 +841,11 @@ static SimdIntrinsic crypto_aes_methods [] = {
{SN_get_IsSupported}
};
static SimdIntrinsic neon_aes_methods [] = {
{SN_PolynomialMultiplyWideningLower, OP_XOP_X_X_X, SIMD_OP_ARM64_PMULL64_LOWER},
{SN_PolynomialMultiplyWideningUpper, OP_XOP_X_X_X, SIMD_OP_ARM64_PMULL64_UPPER}
};
static SimdIntrinsic sha1_methods [] = {
{SN_FixedRotate, OP_XOP_X_X, SIMD_OP_ARM64_SHA1H},
{SN_HashUpdateChoose, OP_XOP_X_X_X_X, SIMD_OP_ARM64_SHA1C},
......@@ -978,7 +983,11 @@ emit_arm64_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignatur
intrinsics_size = sizeof (sha1_methods);
}
if (is_hw_intrinsics_class (klass, "Aes", &is_64bit)) {
if (is_hw_intrinsics_class (klass, "Aes", &is_64bit) && (!strcmp (cmethod->name, "PolynomialMultiplyWideningLower") || !strcmp (cmethod->name, "PolynomialMultiplyWideningUpper"))) {
feature = MONO_CPU_ARM64_NEON;
intrinsics = neon_aes_methods;
intrinsics_size = sizeof (neon_aes_methods);
} else if (is_hw_intrinsics_class (klass, "Aes", &is_64bit)) {
feature = MONO_CPU_ARM64_CRYPTO;
intrinsics = crypto_aes_methods;
intrinsics_size = sizeof (crypto_aes_methods);
......@@ -1026,7 +1035,7 @@ emit_arm64_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignatur
if (!info)
return NULL;
supported = (mini_get_cpu_features (cfg) & MONO_CPU_ARM64_ADVSIMD) != 0;
supported = (mini_get_cpu_features (cfg) & MONO_CPU_ARM64_NEON) != 0;
switch (info -> id) {
case SN_Abs: {
......
......@@ -225,6 +225,8 @@ METHOD(Encrypt)
METHOD(EncryptLast)
METHOD(InverseMixColumns)
METHOD(KeygenAssist)
METHOD(PolynomialMultiplyWideningLower)
METHOD(PolynomialMultiplyWideningUpper)
// Pclmulqdq
METHOD(CarrylessMultiply)
// ArmBase
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册