ARMv8 assembly pack: add Cortex performance numbers.

0f777aeb · Andy Polyakov · eef1827f · 0f777aeb · 0f777aeb · 0f777aeb
Showing with 12 addition and 4 deletion

crypto/modes/asm/ghashv8-armx.pl crypto/modes/asm/ghashv8-armx.pl +2 -1

crypto/sha/asm/sha1-armv8.pl crypto/sha/asm/sha1-armv8.pl +2 -1

crypto/sha/asm/sha512-armv8.pl crypto/sha/asm/sha512-armv8.pl +8 -2

未找到文件。
--- a/crypto/modes/asm/ghashv8-armx.pl
+++ b/crypto/modes/asm/ghashv8-armx.pl
@@ -20,7 +20,8 @@
 #
 #		PMULL[2]	32-bit NEON(*)
 # Apple A7	1.76		5.62
-# Cortex-A5x	n/a		n/a
+# Cortex-A53	1.45		8.39
+# Cortex-A57	2.22		7.61
 #
 # (*)	presented for reference/comparison purposes;


--- a/crypto/sha/asm/sha1-armv8.pl
+++ b/crypto/sha/asm/sha1-armv8.pl
@@ -14,7 +14,8 @@
 #
 #		hardware-assisted	software(*)
 # Apple A7	2.31			4.13 (+14%)
-# Cortex-A5x	n/a			n/a
+# Cortex-A53	2.19			8.73 (+108%)
+# Cortex-A57	2.35			7.88 (+74%)
 #
 # (*)	Software results are presented mostly for reference purposes.


--- a/crypto/sha/asm/sha512-armv8.pl
+++ b/crypto/sha/asm/sha512-armv8.pl
@@ -14,12 +14,18 @@
 #
 #		SHA256-hw	SHA256(*)	SHA512
 # Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
-# Cortex-A5x	n/a		n/a		n/a
+# Cortex-A53	2.38		15.6 (+110%)	10.1 (+190%(***))
+# Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
 # 
 # (*)	Software SHA256 results are of lesser relevance, presented
 #	mostly for informational purposes.
 # (**)	The result is a trade-off: it's possible to improve it by
-#	10%, but at the cost of 20% loss on Cortex-A5x.
+#	10% (or by 1 cycle per round), but at the cost of 20% loss
+#	on Cortex-A53 (or by 4 cycles per round).
+# (***)	Super-impressive coefficients over gcc-generated code are
+#	indication of some compiler "pathology", most notably code
+#	generated with -mgeneral-regs-only is significanty faster
+#	and lags behind assembly only by 50-90%.

 $flavour=shift;
 $output=shift;