提交 5e16a627 编写于 作者: P Phoebe Chen 提交者: Hugo Landau

riscv: Provide vector crypto implementation of AES-CBC mode.

To accelerate the performance of the AES-128/192/256-CBC block cipher
encryption, we used the vaesz, vaesem and vaesef instructions, which
implement a single round of AES encryption.

Similarly, to optimize the performance of AES-128/192/256-CBC block
cipher decryption, we have utilized the vaesz, vaesdm, and vaesdf
instructions, which facilitate a single round of AES decryption.

Furthermore, we optimize the key and initialization vector (IV) step by
keeping the rounding key in vector registers.
Signed-off-by: NPhoebe Chen <phoebe.chen@sifive.com>
Reviewed-by: NTomas Mraz <tomas@openssl.org>
Reviewed-by: NPaul Dale <pauli@openssl.org>
Reviewed-by: NHugo Landau <hlandau@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/21923)
上级 3645eb0b
......@@ -11,6 +11,7 @@
# or
#
# Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
# Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
......@@ -35,8 +36,8 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# - RV64I
# - RISC-V vector ('V') with VLEN >= 128
# - RISC-V vector crypto AES extension ('Zvkned')
# - RISC-V Vector ('V') with VLEN >= 128
# - RISC-V Vector AES block cipher extension ('Zvkned')
use strict;
use warnings;
......@@ -57,6 +58,533 @@ my $code=<<___;
.text
___
{
###############################################################################
# void rv64i_zvkned_cbc_encrypt(const unsigned char *in, unsigned char *out,
# size_t length, const AES_KEY *key,
# unsigned char *ivec, const int enc);
my ($INP, $OUTP, $LEN, $KEYP, $IVP, $ENC) = ("a0", "a1", "a2", "a3", "a4", "a5");
my ($T0, $T1, $rounds) = ("t0", "t1", "t2");
my ($v0, $v1, $v2, $v3, $v4, $v5, $v6, $v7,
$v8, $v9, $v10, $v11, $v12, $v13, $v14, $v15,
$v16, $v17, $v18, $v19, $v20, $v21, $v22, $v23,
$v24, $v25, $v26, $v27, $v28, $v29, $v30, $v31,
) = map("v$_",(0..31));
# Load all 11 round keys to v1-v11 registers.
sub aes_128_load_key {
my $code=<<___;
@{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
@{[vle32_v $v1, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v2, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v3, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v4, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v5, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v6, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v7, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v8, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v9, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v10, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v11, $KEYP]}
___
return $code;
}
# Load all 13 round keys to v1-v13 registers.
sub aes_192_load_key {
my $code=<<___;
@{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
@{[vle32_v $v1, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v2, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v3, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v4, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v5, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v6, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v7, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v8, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v9, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v10, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v11, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v12, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v13, $KEYP]}
___
return $code;
}
# Load all 15 round keys to v1-v15 registers.
sub aes_256_load_key {
my $code=<<___;
@{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
@{[vle32_v $v1, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v2, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v3, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v4, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v5, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v6, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v7, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v8, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v9, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v10, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v11, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v12, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v13, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v14, $KEYP]}
addi $KEYP, $KEYP, 16
@{[vle32_v $v15, $KEYP]}
___
return $code;
}
# aes-128 encryption with round keys v1-v11
sub aes_128_encrypt {
my $code=<<___;
@{[vaesz_vs $v24, $v1]} # with round key w[ 0, 3]
@{[vaesem_vs $v24, $v2]} # with round key w[ 4, 7]
@{[vaesem_vs $v24, $v3]} # with round key w[ 8,11]
@{[vaesem_vs $v24, $v4]} # with round key w[12,15]
@{[vaesem_vs $v24, $v5]} # with round key w[16,19]
@{[vaesem_vs $v24, $v6]} # with round key w[20,23]
@{[vaesem_vs $v24, $v7]} # with round key w[24,27]
@{[vaesem_vs $v24, $v8]} # with round key w[28,31]
@{[vaesem_vs $v24, $v9]} # with round key w[32,35]
@{[vaesem_vs $v24, $v10]} # with round key w[36,39]
@{[vaesef_vs $v24, $v11]} # with round key w[40,43]
___
return $code;
}
# aes-128 decryption with round keys v1-v11
sub aes_128_decrypt {
my $code=<<___;
@{[vaesz_vs $v24, $v11]} # with round key w[40,43]
@{[vaesdm_vs $v24, $v10]} # with round key w[36,39]
@{[vaesdm_vs $v24, $v9]} # with round key w[32,35]
@{[vaesdm_vs $v24, $v8]} # with round key w[28,31]
@{[vaesdm_vs $v24, $v7]} # with round key w[24,27]
@{[vaesdm_vs $v24, $v6]} # with round key w[20,23]
@{[vaesdm_vs $v24, $v5]} # with round key w[16,19]
@{[vaesdm_vs $v24, $v4]} # with round key w[12,15]
@{[vaesdm_vs $v24, $v3]} # with round key w[ 8,11]
@{[vaesdm_vs $v24, $v2]} # with round key w[ 4, 7]
@{[vaesdf_vs $v24, $v1]} # with round key w[ 0, 3]
___
return $code;
}
# aes-192 encryption with round keys v1-v13
sub aes_192_encrypt {
my $code=<<___;
@{[vaesz_vs $v24, $v1]} # with round key w[ 0, 3]
@{[vaesem_vs $v24, $v2]} # with round key w[ 4, 7]
@{[vaesem_vs $v24, $v3]} # with round key w[ 8,11]
@{[vaesem_vs $v24, $v4]} # with round key w[12,15]
@{[vaesem_vs $v24, $v5]} # with round key w[16,19]
@{[vaesem_vs $v24, $v6]} # with round key w[20,23]
@{[vaesem_vs $v24, $v7]} # with round key w[24,27]
@{[vaesem_vs $v24, $v8]} # with round key w[28,31]
@{[vaesem_vs $v24, $v9]} # with round key w[32,35]
@{[vaesem_vs $v24, $v10]} # with round key w[36,39]
@{[vaesem_vs $v24, $v11]} # with round key w[40,43]
@{[vaesem_vs $v24, $v12]} # with round key w[44,47]
@{[vaesef_vs $v24, $v13]} # with round key w[48,51]
___
return $code;
}
# aes-192 decryption with round keys v1-v13
sub aes_192_decrypt {
my $code=<<___;
@{[vaesz_vs $v24, $v13]} # with round key w[48,51]
@{[vaesdm_vs $v24, $v12]} # with round key w[44,47]
@{[vaesdm_vs $v24, $v11]} # with round key w[40,43]
@{[vaesdm_vs $v24, $v10]} # with round key w[36,39]
@{[vaesdm_vs $v24, $v9]} # with round key w[32,35]
@{[vaesdm_vs $v24, $v8]} # with round key w[28,31]
@{[vaesdm_vs $v24, $v7]} # with round key w[24,27]
@{[vaesdm_vs $v24, $v6]} # with round key w[20,23]
@{[vaesdm_vs $v24, $v5]} # with round key w[16,19]
@{[vaesdm_vs $v24, $v4]} # with round key w[12,15]
@{[vaesdm_vs $v24, $v3]} # with round key w[ 8,11]
@{[vaesdm_vs $v24, $v2]} # with round key w[ 4, 7]
@{[vaesdf_vs $v24, $v1]} # with round key w[ 0, 3]
___
return $code;
}
# aes-256 encryption with round keys v1-v15
sub aes_256_encrypt {
my $code=<<___;
@{[vaesz_vs $v24, $v1]} # with round key w[ 0, 3]
@{[vaesem_vs $v24, $v2]} # with round key w[ 4, 7]
@{[vaesem_vs $v24, $v3]} # with round key w[ 8,11]
@{[vaesem_vs $v24, $v4]} # with round key w[12,15]
@{[vaesem_vs $v24, $v5]} # with round key w[16,19]
@{[vaesem_vs $v24, $v6]} # with round key w[20,23]
@{[vaesem_vs $v24, $v7]} # with round key w[24,27]
@{[vaesem_vs $v24, $v8]} # with round key w[28,31]
@{[vaesem_vs $v24, $v9]} # with round key w[32,35]
@{[vaesem_vs $v24, $v10]} # with round key w[36,39]
@{[vaesem_vs $v24, $v11]} # with round key w[40,43]
@{[vaesem_vs $v24, $v12]} # with round key w[44,47]
@{[vaesem_vs $v24, $v13]} # with round key w[48,51]
@{[vaesem_vs $v24, $v14]} # with round key w[52,55]
@{[vaesef_vs $v24, $v15]} # with round key w[56,59]
___
return $code;
}
# aes-256 decryption with round keys v1-v15
sub aes_256_decrypt {
my $code=<<___;
@{[vaesz_vs $v24, $v15]} # with round key w[56,59]
@{[vaesdm_vs $v24, $v14]} # with round key w[52,55]
@{[vaesdm_vs $v24, $v13]} # with round key w[48,51]
@{[vaesdm_vs $v24, $v12]} # with round key w[44,47]
@{[vaesdm_vs $v24, $v11]} # with round key w[40,43]
@{[vaesdm_vs $v24, $v10]} # with round key w[36,39]
@{[vaesdm_vs $v24, $v9]} # with round key w[32,35]
@{[vaesdm_vs $v24, $v8]} # with round key w[28,31]
@{[vaesdm_vs $v24, $v7]} # with round key w[24,27]
@{[vaesdm_vs $v24, $v6]} # with round key w[20,23]
@{[vaesdm_vs $v24, $v5]} # with round key w[16,19]
@{[vaesdm_vs $v24, $v4]} # with round key w[12,15]
@{[vaesdm_vs $v24, $v3]} # with round key w[ 8,11]
@{[vaesdm_vs $v24, $v2]} # with round key w[ 4, 7]
@{[vaesdf_vs $v24, $v1]} # with round key w[ 0, 3]
___
return $code;
}
$code .= <<___;
.p2align 3
.globl rv64i_zvkned_cbc_encrypt
.type rv64i_zvkned_cbc_encrypt,\@function
rv64i_zvkned_cbc_encrypt:
# check whether the length is a multiple of 16 and >= 16
li $T1, 16
blt $LEN, $T1, L_end
andi $T1, $LEN, 15
bnez $T1, L_end
# Load number of rounds
lwu $rounds, 240($KEYP)
# Get proper routine for key size
li $T0, 10
beq $rounds, $T0, L_cbc_enc_128
li $T0, 12
beq $rounds, $T0, L_cbc_enc_192
li $T0, 14
beq $rounds, $T0, L_cbc_enc_256
ret
.size rv64i_zvkned_cbc_encrypt,.-rv64i_zvkned_cbc_encrypt
___
$code .= <<___;
.p2align 3
L_cbc_enc_128:
# Load all 11 round keys to v1-v11 registers.
@{[aes_128_load_key]}
# Load IV.
@{[vle32_v $v16, ($IVP)]}
@{[vle32_v $v24, ($INP)]}
@{[vxor_vv $v24, $v24, $v16]}
j 2f
1:
@{[vle32_v $v17, ($INP)]}
@{[vxor_vv $v24, $v24, $v17]}
2:
# AES body
@{[aes_128_encrypt]}
@{[vse32_v $v24, ($OUTP)]}
addi $INP, $INP, 16
addi $OUTP, $OUTP, 16
addi $LEN, $LEN, -16
bnez $LEN, 1b
@{[vse32_v $v24, ($IVP)]}
ret
.size L_cbc_enc_128,.-L_cbc_enc_128
___
$code .= <<___;
.p2align 3
L_cbc_enc_192:
# Load all 13 round keys to v1-v13 registers.
@{[aes_192_load_key]}
# Load IV.
@{[vle32_v $v16, ($IVP)]}
@{[vle32_v $v24, ($INP)]}
@{[vxor_vv $v24, $v24, $v16]}
j 2f
1:
@{[vle32_v $v17, ($INP)]}
@{[vxor_vv $v24, $v24, $v17]}
2:
# AES body
@{[aes_192_encrypt]}
@{[vse32_v $v24, ($OUTP)]}
addi $INP, $INP, 16
addi $OUTP, $OUTP, 16
addi $LEN, $LEN, -16
bnez $LEN, 1b
@{[vse32_v $v24, ($IVP)]}
ret
.size L_cbc_enc_192,.-L_cbc_enc_192
___
$code .= <<___;
.p2align 3
L_cbc_enc_256:
# Load all 15 round keys to v1-v15 registers.
@{[aes_256_load_key]}
# Load IV.
@{[vle32_v $v16, ($IVP)]}
@{[vle32_v $v24, ($INP)]}
@{[vxor_vv $v24, $v24, $v16]}
j 2f
1:
@{[vle32_v $v17, ($INP)]}
@{[vxor_vv $v24, $v24, $v17]}
2:
# AES body
@{[aes_256_encrypt]}
@{[vse32_v $v24, ($OUTP)]}
addi $INP, $INP, 16
addi $OUTP, $OUTP, 16
addi $LEN, $LEN, -16
bnez $LEN, 1b
@{[vse32_v $v24, ($IVP)]}
ret
.size L_cbc_enc_256,.-L_cbc_enc_256
___
###############################################################################
# void rv64i_zvkned_cbc_decrypt(const unsigned char *in, unsigned char *out,
# size_t length, const AES_KEY *key,
# unsigned char *ivec, const int enc);
my ($INP, $OUTP, $LEN, $KEYP, $IVP, $ENC) = ("a0", "a1", "a2", "a3", "a4", "a5");
my ($T0, $T1, $rounds) = ("t0", "t1", "t2");
my ($v0, $v1, $v2, $v3, $v4, $v5, $v6, $v7,
$v8, $v9, $v10, $v11, $v12, $v13, $v14, $v15,
$v16, $v17, $v18, $v19, $v20, $v21, $v22, $v23,
$v24, $v25, $v26, $v27, $v28, $v29, $v30, $v31,
) = map("v$_",(0..31));
$code .= <<___;
.p2align 3
.globl rv64i_zvkned_cbc_decrypt
.type rv64i_zvkned_cbc_decrypt,\@function
rv64i_zvkned_cbc_decrypt:
# check whether the length is a multiple of 16 and >= 16
li $T1, 16
blt $LEN, $T1, L_end
andi $T1, $LEN, 15
bnez $T1, L_end
# Load number of rounds
lwu $rounds, 240($KEYP)
# Get proper routine for key size
li $T0, 10
beq $rounds, $T0, L_cbc_dec_128
li $T0, 12
beq $rounds, $T0, L_cbc_dec_192
li $T0, 14
beq $rounds, $T0, L_cbc_dec_256
ret
.size rv64i_zvkned_cbc_decrypt,.-rv64i_zvkned_cbc_decrypt
___
$code .= <<___;
.p2align 3
L_cbc_dec_128:
# Load all 11 round keys to v1-v11 registers.
@{[aes_128_load_key]}
# Load IV.
@{[vle32_v $v16, ($IVP)]}
@{[vle32_v $v24, ($INP)]}
@{[vmv_v_v $v17, $v24]}
j 2f
1:
@{[vle32_v $v24, ($INP)]}
@{[vmv_v_v $v17, $v24]}
addi $OUTP, $OUTP, 16
2:
# AES body
@{[aes_128_decrypt]}
@{[vxor_vv $v24, $v24, $v16]}
@{[vse32_v $v24, ($OUTP)]}
@{[vmv_v_v $v16, $v17]}
addi $LEN, $LEN, -16
addi $INP, $INP, 16
bnez $LEN, 1b
@{[vse32_v $v16, ($IVP)]}
ret
.size L_cbc_dec_128,.-L_cbc_dec_128
___
$code .= <<___;
.p2align 3
L_cbc_dec_192:
# Load all 13 round keys to v1-v13 registers.
@{[aes_192_load_key]}
# Load IV.
@{[vle32_v $v16, ($IVP)]}
@{[vle32_v $v24, ($INP)]}
@{[vmv_v_v $v17, $v24]}
j 2f
1:
@{[vle32_v $v24, ($INP)]}
@{[vmv_v_v $v17, $v24]}
addi $OUTP, $OUTP, 16
2:
# AES body
@{[aes_192_decrypt]}
@{[vxor_vv $v24, $v24, $v16]}
@{[vse32_v $v24, ($OUTP)]}
@{[vmv_v_v $v16, $v17]}
addi $LEN, $LEN, -16
addi $INP, $INP, 16
bnez $LEN, 1b
@{[vse32_v $v16, ($IVP)]}
ret
.size L_cbc_dec_192,.-L_cbc_dec_192
___
$code .= <<___;
.p2align 3
L_cbc_dec_256:
# Load all 15 round keys to v1-v15 registers.
@{[aes_256_load_key]}
# Load IV.
@{[vle32_v $v16, ($IVP)]}
@{[vle32_v $v24, ($INP)]}
@{[vmv_v_v $v17, $v24]}
j 2f
1:
@{[vle32_v $v24, ($INP)]}
@{[vmv_v_v $v17, $v24]}
addi $OUTP, $OUTP, 16
2:
# AES body
@{[aes_256_decrypt]}
@{[vxor_vv $v24, $v24, $v16]}
@{[vse32_v $v24, ($OUTP)]}
@{[vmv_v_v $v16, $v17]}
addi $LEN, $LEN, -16
addi $INP, $INP, 16
bnez $LEN, 1b
@{[vse32_v $v16, ($IVP)]}
ret
.size L_cbc_dec_256,.-L_cbc_dec_256
___
}
################################################################################
# int rv64i_zvkned_set_encrypt_key(const unsigned char *userKey, const int bits,
# AES_KEY *key)
......@@ -627,6 +1155,10 @@ L_fail_m2:
li a0, -2
ret
.size L_fail_m2,.-L_fail_m2
L_end:
ret
.size L_end,.-L_end
___
print $code;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册