提交 17073066 编写于 作者: C Charalampos Mitrodimas 提交者: Hugo Landau

riscv: sha256: Provide a Zvknha-based implementation

The upcoming RISC-V vector crypto extensions feature
a Zvknha extension, that provides sha256-specific instructions.
This patch provides an implementation that utilizes this
extension if available.

Tested on QEMU and no regressions observed.
Signed-off-by: NCharalampos Mitrodimas <charalampos.mitrodimas@vrull.eu>
Signed-off-by: NChristoph Müllner <christoph.muellner@vrull.eu>
Reviewed-by: NTomas Mraz <tomas@openssl.org>
Reviewed-by: NPaul Dale <pauli@openssl.org>
Reviewed-by: NHugo Landau <hlandau@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/21923)
上级 204a1c98
......@@ -281,6 +281,22 @@ sub rev8 {
# Vector instructions
sub vadd_vv {
# vadd.vv vd, vs2, vs1
my $template = 0b0000001_00000_00000_000_00000_1010111;
my $vd = read_vreg shift;
my $vs2 = read_vreg shift;
my $vs1 = read_vreg shift;
return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7));
}
sub vid_v {
# vid.v vd
my $template = 0b0101001_00000_10001_010_00000_1010111;
my $vd = read_vreg shift;
return ".word ".($template | ($vd << 7));
}
sub vle32_v {
# vle32.v vd, (rs1)
my $template = 0b0000001_00000_00000_110_00000_0000111;
......@@ -297,6 +313,15 @@ sub vle64_v {
return ".word ".($template | ($rs1 << 15) | ($vd << 7));
}
sub vlse32_v {
# vlse32.v vd, (rs1), rs2
my $template = 0b0000101_00000_00000_110_00000_0000111;
my $vd = read_vreg shift;
my $rs1 = read_reg shift;
my $rs2 = read_reg shift;
return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($vd << 7));
}
sub vlse64_v {
# vlse64.v vd, (rs1), rs2
my $template = 0b0000101_00000_00000_111_00000_0000111;
......@@ -315,6 +340,24 @@ sub vmerge_vim {
return ".word ".($template | ($vs2 << 20) | ($imm << 15) | ($vd << 7));
}
sub vmerge_vvm {
# vmerge.vvm vd vs2 vs1
my $template = 0b0101110_00000_00000_000_00000_1010111;
my $vd = read_vreg shift;
my $vs2 = read_vreg shift;
my $vs1 = read_vreg shift;
return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7))
}
sub vmseq_vi {
# vmseq vd vs1, imm
my $template = 0b0110001_00000_00000_011_00000_1010111;
my $vd = read_vreg shift;
my $vs1 = read_vreg shift;
my $imm = shift;
return ".word ".($template | ($vs1 << 20) | ($imm << 15) | ($vd << 7))
}
sub vmv_v_i {
# vmv.v.i vd, imm
my $template = 0b0101111_00000_00000_011_00000_1010111;
......@@ -411,6 +454,15 @@ sub vsrl_vx {
return ".word ".($template | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7));
}
sub vsse32_v {
# vse32.v vs3, (rs1), rs2
my $template = 0b0000101_00000_00000_110_00000_0100111;
my $vs3 = read_vreg shift;
my $rs1 = read_reg shift;
my $rs2 = read_reg shift;
return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($vs3 << 7));
}
sub vsse64_v {
# vsse64.v vs3, (rs1), rs2
my $template = 0b0000101_00000_00000_111_00000_0100111;
......@@ -558,4 +610,33 @@ sub vaesz_vs {
return ".word ".($template | ($vs2 << 20) | ($vd << 7));
}
## Zvknha instructions
sub vsha2ms_vv {
# vsha2ms.vv vd, vs2, vs1
my $template = 0b1011011_00000_00000_010_00000_1110111;
my $vd = read_vreg shift;
my $vs2 = read_vreg shift;
my $vs1 = read_vreg shift;
return ".word ".($template | ($vs2 << 20)| ($vs1 << 15 )| ($vd << 7));
}
sub vsha2ch_vv {
# vsha2ch.vv vd, vs2, vs1
my $template = 0b101110_10000_00000_001_00000_01110111;
my $vd = read_vreg shift;
my $vs2 = read_vreg shift;
my $vs1 = read_vreg shift;
return ".word ".($template | ($vs2 << 20)| ($vs1 << 15 )| ($vd << 7));
}
sub vsha2cl_vv {
# vsha2cl.vv vd, vs2, vs1
my $template = 0b101111_10000_00000_001_00000_01110111;
my $vd = read_vreg shift;
my $vs2 = read_vreg shift;
my $vs1 = read_vreg shift;
return ".word ".($template | ($vs2 << 20)| ($vs1 << 15 )| ($vd << 7));
}
1;
#! /usr/bin/env perl
# This file is dual-licensed, meaning that you can use it under your
# choice of either of the following two licenses:
#
# Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the Apache License 2.0 (the "License"). You can obtain
# a copy in the file LICENSE in the source distribution or at
# https://www.openssl.org/source/license.html
#
# or
#
# Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# The generated code of this file depends on the following RISC-V extensions:
# - RV64I
# - RISC-V vector ('V') with VLEN >= 128
# - Vector Bit-manipulation used in Cryptography ('Zvbb')
# - Vector SHA-2 Secure Hash ('Zvknha')
use strict;
use warnings;
use FindBin qw($Bin);
use lib "$Bin";
use lib "$Bin/../../perlasm";
use riscv;
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$output and open STDOUT,">$output";
my $code=<<___;
.text
___
my ($V0, $V10, $V11, $V12, $V13, $V14, $V15, $V16, $V17) = ("v0", "v10", "v11", "v12", "v13", "v14","v15", "v16", "v17");
my ($V26, $V27) = ("v26", "v27");
my $K256 = "K256";
# Function arguments
my ($H, $INP, $LEN, $KT, $STRIDE) = ("a0", "a1", "a2", "a3", "t3");
################################################################################
# void sha256_block_data_order_zvbb_zvknha(void *c, const void *p, size_t len)
$code .= <<___;
.p2align 2
.globl sha256_block_data_order_zvbb_zvknha
.type sha256_block_data_order_zvbb_zvknha,\@function
sha256_block_data_order_zvbb_zvknha:
@{[vsetivli__x0_4_e32_m1_tu_mu]}
# H is stored as {a,b,c,d},{e,f,g,h}, but we need {f,e,b,a},{h,g,d,c}
# We achieve this by reading with a negative stride followed by
# element sliding.
li $STRIDE, -4
addi $H, $H, 12
@{[vlse32_v $V16, $H, $STRIDE]} # {d,c,b,a}
addi $H, $H, 16
@{[vlse32_v $V17, $H, $STRIDE]} # {h,g,f,e}
# Keep H advanced by 12
addi $H, $H, -16
@{[vmv_v_v $V27, $V16]} # {d,c,b,a}
@{[vslidedown_vi $V26, $V16, 2]} # {b,a,0,0}
@{[vslidedown_vi $V16, $V17, 2]} # {f,e,0,0}
@{[vslideup_vi $V16, $V26, 2]} # {f,e,b,a}
@{[vslideup_vi $V17, $V27, 2]} # {h,g,d,c}
# Keep the old state as we need it later: H' = H+{a',b',c',...,h'}.
@{[vmv_v_v $V26, $V16]}
@{[vmv_v_v $V27, $V17]}
L_round_loop:
la $KT, $K256 # Load round constants K256
# Load the 512-bits of the message block in v10-v13 and perform
# an endian swap on each 4 bytes element.
@{[vle32_v $V10, $INP]}
@{[vrev8_v $V10, $V10]}
add $INP, $INP, 16
@{[vle32_v $V11, $INP]}
@{[vrev8_v $V11, $V11]}
add $INP, $INP, 16
@{[vle32_v $V12, $INP]}
@{[vrev8_v $V12, $V12]}
add $INP, $INP, 16
@{[vle32_v $V13, $INP]}
@{[vrev8_v $V13, $V13]}
add $INP, $INP, 16
# Decrement length by 1
add $LEN, $LEN, -1
# Set v0 up for the vmerge that replaces the first word (idx==0)
@{[vid_v $V0]}
@{[vmseq_vi $V0, $V0, 0x0]} # v0.mask[i] = (i == 0 ? 1 : 0)
# Quad-round 0 (+0, Wt from oldest to newest in v10->v11->v12->v13)
@{[vle32_v $V15, $KT]}
addi $KT, $KT, 16
@{[vadd_vv $V14, $V15, $V10]}
@{[vsha2cl_vv $V17, $V16, $V14]}
@{[vsha2ch_vv $V16, $V17, $V14]}
@{[vmerge_vvm $V14, $V12, $V11, $V0]}
@{[vsha2ms_vv $V10, $V14, $V13]} # Generate W[19:16]
# Quad-round 1 (+1, v11->v12->v13->v10)
@{[vle32_v $V15, $KT]}
addi $KT, $KT, 16
@{[vadd_vv $V14, $V15, $V11]}
@{[vsha2cl_vv $V17, $V16, $V14]}
@{[vsha2ch_vv $V16, $V17, $V14]}
@{[vmerge_vvm $V14, $V13, $V12, $V0]}
@{[vsha2ms_vv $V11, $V14, $V10]} # Generate W[23:20]
# Quad-round 2 (+2, v12->v13->v10->v11)
@{[vle32_v $V15, $KT]}
addi $KT, $KT, 16
@{[vadd_vv $V14, $V15, $V12]}
@{[vsha2cl_vv $V17, $V16, $V14]}
@{[vsha2ch_vv $V16, $V17, $V14]}
@{[vmerge_vvm $V14, $V10, $V13, $V0]}
@{[vsha2ms_vv $V12, $V14, $V11]} # Generate W[27:24]
# Quad-round 3 (+3, v13->v10->v11->v12)
@{[vle32_v $V15, $KT]}
addi $KT, $KT, 16
@{[vadd_vv $V14, $V15, $V13]}
@{[vsha2cl_vv $V17, $V16, $V14]}
@{[vsha2ch_vv $V16, $V17, $V14]}
@{[vmerge_vvm $V14, $V11, $V10, $V0]}
@{[vsha2ms_vv $V13, $V14, $V12]} # Generate W[31:28]
# Quad-round 4 (+0, v10->v11->v12->v13)
@{[vle32_v $V15, $KT]}
addi $KT, $KT, 16
@{[vadd_vv $V14, $V15, $V10]}
@{[vsha2cl_vv $V17, $V16, $V14]}
@{[vsha2ch_vv $V16, $V17, $V14]}
@{[vmerge_vvm $V14, $V12, $V11, $V0]}
@{[vsha2ms_vv $V10, $V14, $V13]} # Generate W[35:32]
# Quad-round 5 (+1, v11->v12->v13->v10)
@{[vle32_v $V15, $KT]}
addi $KT, $KT, 16
@{[vadd_vv $V14, $V15, $V11]}
@{[vsha2cl_vv $V17, $V16, $V14]}
@{[vsha2ch_vv $V16, $V17, $V14]}
@{[vmerge_vvm $V14, $V13, $V12, $V0]}
@{[vsha2ms_vv $V11, $V14, $V10]} # Generate W[39:36]
# Quad-round 6 (+2, v12->v13->v10->v11)
@{[vle32_v $V15, $KT]}
addi $KT, $KT, 16
@{[vadd_vv $V14, $V15, $V12]}
@{[vsha2cl_vv $V17, $V16, $V14]}
@{[vsha2ch_vv $V16, $V17, $V14]}
@{[vmerge_vvm $V14, $V10, $V13, $V0]}
@{[vsha2ms_vv $V12, $V14, $V11]} # Generate W[43:40]
# Quad-round 7 (+3, v13->v10->v11->v12)
@{[vle32_v $V15, $KT]}
addi $KT, $KT, 16
@{[vadd_vv $V14, $V15, $V13]}
@{[vsha2cl_vv $V17, $V16, $V14]}
@{[vsha2ch_vv $V16, $V17, $V14]}
@{[vmerge_vvm $V14, $V11, $V10, $V0]}
@{[vsha2ms_vv $V13, $V14, $V12]} # Generate W[47:44]
# Quad-round 8 (+0, v10->v11->v12->v13)
@{[vle32_v $V15, $KT]}
addi $KT, $KT, 16
@{[vadd_vv $V14, $V15, $V10]}
@{[vsha2cl_vv $V17, $V16, $V14]}
@{[vsha2ch_vv $V16, $V17, $V14]}
@{[vmerge_vvm $V14, $V12, $V11, $V0]}
@{[vsha2ms_vv $V10, $V14, $V13]} # Generate W[51:48]
# Quad-round 9 (+1, v11->v12->v13->v10)
@{[vle32_v $V15, $KT]}
addi $KT, $KT, 16
@{[vadd_vv $V14, $V15, $V11]}
@{[vsha2cl_vv $V17, $V16, $V14]}
@{[vsha2ch_vv $V16, $V17, $V14]}
@{[vmerge_vvm $V14, $V13, $V12, $V0]}
@{[vsha2ms_vv $V11, $V14, $V10]} # Generate W[55:52]
# Quad-round 10 (+2, v12->v13->v10->v11)
@{[vle32_v $V15, $KT]}
addi $KT, $KT, 16
@{[vadd_vv $V14, $V15, $V12]}
@{[vsha2cl_vv $V17, $V16, $V14]}
@{[vsha2ch_vv $V16, $V17, $V14]}
@{[vmerge_vvm $V14, $V10, $V13, $V0]}
@{[vsha2ms_vv $V12, $V14, $V11]} # Generate W[59:56]
# Quad-round 11 (+3, v13->v10->v11->v12)
@{[vle32_v $V15, $KT]}
addi $KT, $KT, 16
@{[vadd_vv $V14, $V15, $V13]}
@{[vsha2cl_vv $V17, $V16, $V14]}
@{[vsha2ch_vv $V16, $V17, $V14]}
@{[vmerge_vvm $V14, $V11, $V10, $V0]}
@{[vsha2ms_vv $V13, $V14, $V12]} # Generate W[63:60]
# Quad-round 12 (+0, v10->v11->v12->v13)
# Note that we stop generating new message schedule words (Wt, v10-13)
# as we already generated all the words we end up consuming (i.e., W[63:60]).
@{[vle32_v $V15, $KT]}
addi $KT, $KT, 16
@{[vadd_vv $V14, $V15, $V10]}
@{[vsha2cl_vv $V17, $V16, $V14]}
@{[vsha2ch_vv $V16, $V17, $V14]}
# Quad-round 13 (+1, v11->v12->v13->v10)
@{[vle32_v $V15, $KT]}
addi $KT, $KT, 16
@{[vadd_vv $V14, $V15, $V11]}
@{[vsha2cl_vv $V17, $V16, $V14]}
@{[vsha2ch_vv $V16, $V17, $V14]}
# Quad-round 14 (+2, v12->v13->v10->v11)
@{[vle32_v $V15, $KT]}
addi $KT, $KT, 16
@{[vadd_vv $V14, $V15, $V12]}
@{[vsha2cl_vv $V17, $V16, $V14]}
@{[vsha2ch_vv $V16, $V17, $V14]}
# Quad-round 15 (+3, v13->v10->v11->v12)
@{[vle32_v $V15, $KT]}
# No kt increment needed.
@{[vadd_vv $V14, $V15, $V13]}
@{[vsha2cl_vv $V17, $V16, $V14]}
@{[vsha2ch_vv $V16, $V17, $V14]}
# H' = H+{a',b',c',...,h'}
@{[vadd_vv $V16, $V26, $V16]}
@{[vadd_vv $V17, $V27, $V17]}
@{[vmv_v_v $V26, $V16]}
@{[vmv_v_v $V27, $V17]}
bnez $LEN, L_round_loop
# v26 = v16 = {f,e,b,a}
# v27 = v17 = {h,g,d,c}
# Let's do the opposit transformation like on entry.
@{[vslideup_vi $V17, $V16, 2]} # {h,g,f,e}
@{[vslidedown_vi $V16, $V27, 2]} # {d,c,0,0}
@{[vslidedown_vi $V26, $V26, 2]} # {b,a,0,0}
@{[vslideup_vi $V16, $V26, 2]} # {d,c,b,a}
# H is already advanced by 12
@{[vsse32_v $V16, $H, $STRIDE]} # {a,b,c,d}
addi $H, $H, 16
@{[vsse32_v $V17, $H, $STRIDE]} # {e,f,g,h}
ret
.size sha256_block_data_order_zvbb_zvknha,.-sha256_block_data_order_zvbb_zvknha
.p2align 2
.type $K256,\@object
$K256:
.word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
.word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
.word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
.word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
.word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
.word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
.word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
.word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
.word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
.word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
.word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
.word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
.word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
.word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
.word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
.word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
.size $K256,.-$K256
___
print $code;
close STDOUT or die "error closing STDOUT: $!";
......@@ -46,6 +46,9 @@ IF[{- !$disabled{asm} -}]
$SHA1ASM_c64xplus=sha1-c64xplus.s sha256-c64xplus.s sha512-c64xplus.s
$SHA1DEF_c64xplus=SHA1_ASM SHA256_ASM SHA512_ASM
$SHA1ASM_riscv64=sha_riscv.c sha256-riscv64-zvbb-zvknha.S
$SHA1DEF_riscv64=SHA256_ASM INCLUDE_C_SHA256
# Now that we have defined all the arch specific variables, use the
# appropriate one, and define the appropriate macros
IF[$SHA1ASM_{- $target{asm_arch} -}]
......@@ -168,6 +171,8 @@ GENERATE[sha256-c64xplus.S]=asm/sha256-c64xplus.pl
GENERATE[sha512-c64xplus.S]=asm/sha512-c64xplus.pl
GENERATE[keccak1600-c64x.S]=asm/keccak1600-c64x.pl
GENERATE[sha256-riscv64-zvbb-zvknha.S]=asm/sha256-riscv64-zvbb-zvknha.pl
# These are not yet used
GENERATE[keccak1600-avx2.S]=asm/keccak1600-avx2.pl
GENERATE[keccak1600-avx512.S]=asm/keccak1600-avx512.pl
......
/*
* Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
* in the file LICENSE in the source distribution or at
* https://www.openssl.org/source/license.html
*/
#include <stdlib.h>
#include <string.h>
#include <openssl/opensslconf.h>
#include <openssl/sha.h>
#include "crypto/riscv_arch.h"
void sha256_block_data_order_zvbb_zvknha(void *ctx, const void *in, size_t num);
void sha256_block_data_order_c(void *ctx, const void *in, size_t num);
void sha256_block_data_order(SHA256_CTX *ctx, const void *in, size_t num);
void sha256_block_data_order(SHA256_CTX *ctx, const void *in, size_t num)
{
if (RISCV_HAS_ZVBB_AND_ZVKNHA() && riscv_vlen() >= 128) {
sha256_block_data_order_zvbb_zvknha(ctx, in, num);
} else {
sha256_block_data_order_c(ctx, in, num);
}
}
......@@ -37,6 +37,7 @@ RISCV_DEFINE_CAP(ZVBB, 0, 15)
RISCV_DEFINE_CAP(ZVBC, 0, 16)
RISCV_DEFINE_CAP(ZVKG, 0, 17)
RISCV_DEFINE_CAP(ZVKNED, 0, 18)
RISCV_DEFINE_CAP(ZVKNHA, 0, 19)
/*
* In the future ...
......
......@@ -60,6 +60,7 @@ static const size_t kRISCVNumCaps =
#define RISCV_HAS_ZBB_AND_ZBC() (RISCV_HAS_ZBB() && RISCV_HAS_ZBC())
#define RISCV_HAS_ZBKB_AND_ZKND_AND_ZKNE() (RISCV_HAS_ZBKB() && RISCV_HAS_ZKND() && RISCV_HAS_ZKNE())
#define RISCV_HAS_ZKND_AND_ZKNE() (RISCV_HAS_ZKND() && RISCV_HAS_ZKNE())
#define RISCV_HAS_ZVBB_AND_ZVKNHA() (RISCV_HAS_ZVBB() && RISCV_HAS_ZVKNHA())
/*
* Get the size of a vector register in bits (VLEN).
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册