Conversion to UTF-8 where needed

This leaves behind files with names ending with '.iso-8859-1'. These should be safe to remove. If something went wrong when re-encoding, there will be some files with names ending with '.utf8' left behind. Reviewed-by: N Rich Salz <rsalz@openssl.org>

Conversion to UTF-8 where needed
This leaves behind files with names ending with '.iso-8859-1'. These should be safe to remove. If something went wrong when re-encoding, there will be some files with names ending with '.utf8' left behind. Reviewed-by: N Rich Salz <rsalz@openssl.org>
053fa39a · Richard Levitte · f608b406 · 053fa39a · 053fa39a · 053fa39a
28 changed file
--- a/CHANGES
+++ b/CHANGES
--- a/crypto/aes/asm/aes-586.pl
+++ b/crypto/aes/asm/aes-586.pl
@@ -45,7 +45,7 @@
 # the undertaken effort was that it appeared that in tight IA-32
 # register window little-endian flavor could achieve slightly higher
 # Instruction Level Parallelism, and it indeed resulted in up to 15%
 # better performance on most recent µ-archs...
 #
 # Third version adds AES_cbc_encrypt implementation, which resulted in
 # up to 40% performance imrovement of CBC benchmark results. 40% was
@@ -224,7 +224,7 @@ sub _data_word() { my $i; while(defined($i=shift)) { &data_word($i,$i); } }
 $speed_limit=512;	# chunks smaller than $speed_limit are
 			# processed with compact routine in CBC mode
 $small_footprint=1;	# $small_footprint=1 code is ~5% slower [on
 			# recent µ-archs], but ~5 times smaller!
 			# I favor compact code to minimize cache
 			# contention and in hope to "collect" 5% back
 			# in real-life applications...
@@ -565,7 +565,7 @@ sub enctransform()
 # Performance is not actually extraordinary in comparison to pure
 # x86 code. In particular encrypt performance is virtually the same.
 # Decrypt performance on the other hand is 15-20% better on newer
 # µ-archs [but we're thankful for *any* improvement here], and ~50%
 # better on PIII:-) And additionally on the pros side this code
 # eliminates redundant references to stack and thus relieves/
 # minimizes the pressure on the memory bus.

--- a/crypto/aes/asm/aes-c64xplus.pl
+++ b/crypto/aes/asm/aes-c64xplus.pl
@@ -891,7 +891,7 @@ ret?:						; B0 holds rounds or zero
 	MVC	B0,ILC
 ||	SUB	B0,1,B0
-	GMPY4	$K[0],A24,$Kx9[0]		; 0x09
+	GMPY4	$K[0],A24,$Kx9[0]		; ·0x09
 ||	GMPY4	$K[1],A24,$Kx9[1]
 ||	MVK	0x00000D0D,A25
 ||	MVK	0x00000E0E,B25
@@ -900,14 +900,14 @@ ret?:						; B0 holds rounds or zero
 ||	MVKH	0x0D0D0000,A25
 ||	MVKH	0x0E0E0000,B25
-	GMPY4	$K[0],B24,$KxB[0]		; 0x0B
+	GMPY4	$K[0],B24,$KxB[0]		; ·0x0B
 ||	GMPY4	$K[1],B24,$KxB[1]
 	GMPY4	$K[2],B24,$KxB[2]
 ||	GMPY4	$K[3],B24,$KxB[3]
 	SPLOOP	11				; InvMixColumns
 ;;====================================================================
-	GMPY4	$K[0],A25,$KxD[0]		; 0x0D
+	GMPY4	$K[0],A25,$KxD[0]		; ·0x0D
 ||	GMPY4	$K[1],A25,$KxD[1]
 ||	SWAP2	$Kx9[0],$Kx9[0]			; rotate by 16
 ||	SWAP2	$Kx9[1],$Kx9[1]
@@ -924,7 +924,7 @@ ret?:						; B0 holds rounds or zero
 || [B0]	LDW	*${KPA}[6],$K[2]
 || [B0]	LDW	*${KPB}[7],$K[3]
-	GMPY4	$s[0],B25,$KxE[0]		; 0x0E
+	GMPY4	$s[0],B25,$KxE[0]		; ·0x0E
 ||	GMPY4	$s[1],B25,$KxE[1]
 ||	XOR	$Kx9[0],$KxB[0],$KxB[0]
 ||	XOR	$Kx9[1],$KxB[1],$KxB[1]
@@ -944,7 +944,7 @@ ret?:						; B0 holds rounds or zero
 	XOR	$KxE[0],$KxD[0],$KxE[0]
 ||	XOR	$KxE[1],$KxD[1],$KxE[1]
-|| [B0]	GMPY4	$K[0],A24,$Kx9[0]		; 0x09
+|| [B0]	GMPY4	$K[0],A24,$Kx9[0]		; ·0x09
 || [B0]	GMPY4	$K[1],A24,$Kx9[1]
 ||	ADDAW	$KPA,4,$KPA
 	XOR	$KxE[2],$KxD[2],$KxE[2]
@@ -955,7 +955,7 @@ ret?:						; B0 holds rounds or zero
 	XOR	$KxB[0],$KxE[0],$KxE[0]
 ||	XOR	$KxB[1],$KxE[1],$KxE[1]
-|| [B0]	GMPY4	$K[0],B24,$KxB[0]		; 0x0B
+|| [B0]	GMPY4	$K[0],B24,$KxB[0]		; ·0x0B
 || [B0]	GMPY4	$K[1],B24,$KxB[1]
 	XOR	$KxB[2],$KxE[2],$KxE[2]
 ||	XOR	$KxB[3],$KxE[3],$KxE[3]

--- a/crypto/bn/asm/armv4-gf2m.pl
+++ b/crypto/bn/asm/armv4-gf2m.pl
@@ -27,7 +27,7 @@
 # referred below, which improves ECDH and ECDSA verify benchmarks
 # by 18-40%.
 #
-# Cmara, D.; Gouva, C. P. L.; Lpez, J. & Dahab, R.: Fast Software
+# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
 # Polynomial Multiplication on ARM Processors using the NEON Engine.
 # 
 # http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
@@ -148,7 +148,7 @@ ___
 ################
 # void	bn_GF2m_mul_2x2(BN_ULONG *r,
 #	BN_ULONG a1,BN_ULONG a0,
-#	BN_ULONG b1,BN_ULONG b0);	# r[3..0]=a1a0b1b0
+#	BN_ULONG b1,BN_ULONG b0);	# r[3..0]=a1a0·b1b0
 {
 $code.=<<___;
 .global	bn_GF2m_mul_2x2
@@ -171,7 +171,7 @@ $code.=<<___;
 	mov	$mask,#7<<2
 	sub	sp,sp,#32		@ allocate tab[8]
-	bl	mul_1x1_ialu		@ a1b1
+	bl	mul_1x1_ialu		@ a1·b1
 	str	$lo,[$ret,#8]
 	str	$hi,[$ret,#12]
@@ -181,13 +181,13 @@ $code.=<<___;
 	 eor	r2,r2,$a
 	eor	$b,$b,r3
 	 eor	$a,$a,r2
-	bl	mul_1x1_ialu		@ a0b0
+	bl	mul_1x1_ialu		@ a0·b0
 	str	$lo,[$ret]
 	str	$hi,[$ret,#4]
 	eor	$a,$a,r2
 	eor	$b,$b,r3
-	bl	mul_1x1_ialu		@ (a1+a0)(b1+b0)
+	bl	mul_1x1_ialu		@ (a1+a0)·(b1+b0)
 ___
 @r=map("r$_",(6..9));
 $code.=<<___;

--- a/crypto/bn/asm/c64xplus-gf2m.pl
+++ b/crypto/bn/asm/c64xplus-gf2m.pl
@@ -120,26 +120,26 @@ _bn_GF2m_mul_2x2:
 	.asmfunc
 	MVK	0xFF,$xFF
 ___
-	&mul_1x1_upper($a0,$b0);		# a0b0
+	&mul_1x1_upper($a0,$b0);		# a0·b0
 $code.=<<___;
 ||	MV	$b1,$B
 	MV	$a1,$A
 ___
-	&mul_1x1_merged("A28","B28",$A,$B);	# a0b0/a1b1
+	&mul_1x1_merged("A28","B28",$A,$B);	# a0·b0/a1·b1
 $code.=<<___;
 ||	XOR	$b0,$b1,$B
 	XOR	$a0,$a1,$A
 ___
-	&mul_1x1_merged("A31","B31",$A,$B);	# a1b1/(a0+a1)(b0+b1)
+	&mul_1x1_merged("A31","B31",$A,$B);	# a1·b1/(a0+a1)·(b0+b1)
 $code.=<<___;
 	XOR	A28,A31,A29
-||	XOR	B28,B31,B29			; a0b0+a1b1
+||	XOR	B28,B31,B29			; a0·b0+a1·b1
 ___
-	&mul_1x1_lower("A30","B30");		# (a0+a1)(b0+b1)
+	&mul_1x1_lower("A30","B30");		# (a0+a1)·(b0+b1)
 $code.=<<___;
 ||	BNOP	B3
 	XOR	A29,A30,A30
-||	XOR	B29,B30,B30			; (a0+a1)(b0+b1)-a0b0-a1b1
+||	XOR	B29,B30,B30			; (a0+a1)·(b0+b1)-a0·b0-a1·b1
 	XOR	B28,A30,A30
 ||	STW	A28,*${rp}[0]
 	XOR	B30,A31,A31

--- a/crypto/bn/asm/ia64.S
+++ b/crypto/bn/asm/ia64.S
@@ -568,7 +568,7 @@ bn_sqr_comba8:
 // I've estimated this routine to run in ~120 ticks, but in reality
 // (i.e. according to ar.itc) it takes ~160 ticks. Are those extra
 // cycles consumed for instructions fetch? Or did I misinterpret some
 // clause in Itanium µ-architecture manual? Comments are welcomed and
 // highly appreciated.
 //
 // On Itanium 2 it takes ~190 ticks. This is because of stalls on

--- a/crypto/bn/asm/s390x-gf2m.pl
+++ b/crypto/bn/asm/s390x-gf2m.pl
@@ -172,19 +172,19 @@ ___
 if ($SIZE_T==8) {
 my @r=map("%r$_",(6..9));
 $code.=<<___;
-	bras	$ra,_mul_1x1			# a1b1
+	bras	$ra,_mul_1x1			# a1·b1
 	stmg	$lo,$hi,16($rp)
 	lg	$a,`$stdframe+128+4*$SIZE_T`($sp)
 	lg	$b,`$stdframe+128+6*$SIZE_T`($sp)
-	bras	$ra,_mul_1x1			# a0b0
+	bras	$ra,_mul_1x1			# a0·b0
 	stmg	$lo,$hi,0($rp)
 	lg	$a,`$stdframe+128+3*$SIZE_T`($sp)
 	lg	$b,`$stdframe+128+5*$SIZE_T`($sp)
 	xg	$a,`$stdframe+128+4*$SIZE_T`($sp)
 	xg	$b,`$stdframe+128+6*$SIZE_T`($sp)
-	bras	$ra,_mul_1x1			# (a0+a1)(b0+b1)
+	bras	$ra,_mul_1x1			# (a0+a1)·(b0+b1)
 	lmg	@r[0],@r[3],0($rp)
 	xgr	$lo,$hi

--- a/crypto/bn/asm/x86-gf2m.pl
+++ b/crypto/bn/asm/x86-gf2m.pl
@@ -14,7 +14,7 @@
 # the time being... Except that it has three code paths: pure integer
 # code suitable for any x86 CPU, MMX code suitable for PIII and later
 # and PCLMULQDQ suitable for Westmere and later. Improvement varies
-# from one benchmark and -arch to another. Below are interval values
+# from one benchmark and µ-arch to another. Below are interval values
 # for 163- and 571-bit ECDH benchmarks relative to compiler-generated
 # code:
 #
@@ -226,22 +226,22 @@ if ($sse2) {
 	&push	("edi");
 	&mov	($a,&wparam(1));
 	&mov	($b,&wparam(3));
-	&call	("_mul_1x1_mmx");	# a1b1
+	&call	("_mul_1x1_mmx");	# a1·b1
 	&movq	("mm7",$R);
 	&mov	($a,&wparam(2));
 	&mov	($b,&wparam(4));
-	&call	("_mul_1x1_mmx");	# a0b0
+	&call	("_mul_1x1_mmx");	# a0·b0
 	&movq	("mm6",$R);
 	&mov	($a,&wparam(1));
 	&mov	($b,&wparam(3));
 	&xor	($a,&wparam(2));
 	&xor	($b,&wparam(4));
-	&call	("_mul_1x1_mmx");	# (a0+a1)(b0+b1)
+	&call	("_mul_1x1_mmx");	# (a0+a1)·(b0+b1)
 	&pxor	($R,"mm7");
 	&mov	($a,&wparam(0));
-	&pxor	($R,"mm6");		# (a0+a1)(b0+b1)-a1b1-a0b0
+	&pxor	($R,"mm6");		# (a0+a1)·(b0+b1)-a1·b1-a0·b0
 	&movq	($A,$R);
 	&psllq	($R,32);
@@ -266,13 +266,13 @@ if ($sse2) {
 	&mov	($a,&wparam(1));
 	&mov	($b,&wparam(3));
-	&call	("_mul_1x1_ialu");	# a1b1
+	&call	("_mul_1x1_ialu");	# a1·b1
 	&mov	(&DWP(8,"esp"),$lo);
 	&mov	(&DWP(12,"esp"),$hi);
 	&mov	($a,&wparam(2));
 	&mov	($b,&wparam(4));
-	&call	("_mul_1x1_ialu");	# a0b0
+	&call	("_mul_1x1_ialu");	# a0·b0
 	&mov	(&DWP(0,"esp"),$lo);
 	&mov	(&DWP(4,"esp"),$hi);
@@ -280,7 +280,7 @@ if ($sse2) {
 	&mov	($b,&wparam(3));
 	&xor	($a,&wparam(2));
 	&xor	($b,&wparam(4));
-	&call	("_mul_1x1_ialu");	# (a0+a1)(b0+b1)
+	&call	("_mul_1x1_ialu");	# (a0+a1)·(b0+b1)
 	&mov	("ebp",&wparam(0));
 		 @r=("ebx","ecx","edi","esi");

--- a/crypto/bn/asm/x86_64-gcc.c
+++ b/crypto/bn/asm/x86_64-gcc.c
@@ -65,7 +65,7 @@
 # undef mul_add
 /*-
- * "m"(a), "+m"(r)      is the way to favor DirectPath -code;
+ * "m"(a), "+m"(r)      is the way to favor DirectPath µ-code;
 * "g"(0)               let the compiler to decide where does it
 *                      want to keep the value of zero;
 */

--- a/crypto/bn/asm/x86_64-gf2m.pl
+++ b/crypto/bn/asm/x86_64-gf2m.pl
@@ -13,7 +13,7 @@
 # in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
 # the time being... Except that it has two code paths: code suitable
 # for any x86_64 CPU and PCLMULQDQ one suitable for Westmere and
-# later. Improvement varies from one benchmark and -arch to another.
+# later. Improvement varies from one benchmark and µ-arch to another.
 # Vanilla code path is at most 20% faster than compiler-generated code
 # [not very impressive], while PCLMULQDQ - whole 85%-160% better on
 # 163- and 571-bit ECDH benchmarks on Intel CPUs. Keep in mind that
@@ -184,13 +184,13 @@ ___
 $code.=<<___;
 	movdqa		%xmm0,%xmm4
 	movdqa		%xmm1,%xmm5
-	pclmulqdq	\$0,%xmm1,%xmm0	# a1b1
+	pclmulqdq	\$0,%xmm1,%xmm0	# a1·b1
 	pxor		%xmm2,%xmm4
 	pxor		%xmm3,%xmm5
-	pclmulqdq	\$0,%xmm3,%xmm2	# a0b0
+	pclmulqdq	\$0,%xmm3,%xmm2	# a0·b0
-	pclmulqdq	\$0,%xmm5,%xmm4	# (a0+a1)(b0+b1)
+	pclmulqdq	\$0,%xmm5,%xmm4	# (a0+a1)·(b0+b1)
 	xorps		%xmm0,%xmm4
-	xorps		%xmm2,%xmm4	# (a0+a1)(b0+b1)-a0b0-a1b1
+	xorps		%xmm2,%xmm4	# (a0+a1)·(b0+b1)-a0·b0-a1·b1
 	movdqa		%xmm4,%xmm5
 	pslldq		\$8,%xmm4
 	psrldq		\$8,%xmm5
@@ -225,13 +225,13 @@ $code.=<<___;
 	mov	\$0xf,$mask
 	mov	$a1,$a
 	mov	$b1,$b
-	call	_mul_1x1		# a1b1
+	call	_mul_1x1		# a1·b1
 	mov	$lo,16(%rsp)
 	mov	$hi,24(%rsp)
 	mov	48(%rsp),$a
 	mov	64(%rsp),$b
-	call	_mul_1x1		# a0b0
+	call	_mul_1x1		# a0·b0
 	mov	$lo,0(%rsp)
 	mov	$hi,8(%rsp)
@@ -239,7 +239,7 @@ $code.=<<___;
 	mov	56(%rsp),$b
 	xor	48(%rsp),$a
 	xor	64(%rsp),$b
-	call	_mul_1x1		# (a0+a1)(b0+b1)
+	call	_mul_1x1		# (a0+a1)·(b0+b1)
 ___
 	@r=("%rbx","%rcx","%rdi","%rsi");
 $code.=<<___;

--- a/crypto/modes/asm/ghash-armv4.pl
+++ b/crypto/modes/asm/ghash-armv4.pl
@@ -45,7 +45,7 @@
 # processes one byte in 8.45 cycles, A9 - in 10.2, A15 - in 7.63,
 # Snapdragon S4 - in 9.33.
 #
-# Cmara, D.; Gouva, C. P. L.; Lpez, J. & Dahab, R.: Fast Software
+# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
 # Polynomial Multiplication on ARM Processors using the NEON Engine.
 # 
 # http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
@@ -449,12 +449,12 @@ gcm_ghash_neon:
 	veor		$IN,$Xl			@ inp^=Xi
 .Lgmult_neon:
 ___
-	&clmul64x64	($Xl,$Hlo,"$IN#lo");	# H.loXi.lo
+	&clmul64x64	($Xl,$Hlo,"$IN#lo");	# H.lo·Xi.lo
 $code.=<<___;
 	veor		$IN#lo,$IN#lo,$IN#hi	@ Karatsuba pre-processing
 ___
-	&clmul64x64	($Xm,$Hhl,"$IN#lo");	# (H.lo+H.hi)(Xi.lo+Xi.hi)
+	&clmul64x64	($Xm,$Hhl,"$IN#lo");	# (H.lo+H.hi)·(Xi.lo+Xi.hi)
-	&clmul64x64	($Xh,$Hhi,"$IN#hi");	# H.hiXi.hi
+	&clmul64x64	($Xh,$Hhi,"$IN#hi");	# H.hi·Xi.hi
 $code.=<<___;
 	veor		$Xm,$Xm,$Xl		@ Karatsuba post-processing
 	veor		$Xm,$Xm,$Xh

--- a/crypto/modes/asm/ghash-c64xplus.pl
+++ b/crypto/modes/asm/ghash-c64xplus.pl
@@ -153,7 +153,7 @@ ___
 #    8/2                                         S1  L1x S2      |        ....
 #####...                                         ................|............
 $code.=<<___;
-	XORMPY	$H0,$xia,$H0x		; 0	; H(Xi[i]<<1)
+	XORMPY	$H0,$xia,$H0x		; 0	; H·(Xi[i]<<1)
 ||	XORMPY	$H01u,$xib,$H01y
 || [A0]	LDBU	*--${xip},$x0
 	XORMPY	$H1,$xia,$H1x		; 1
@@ -162,7 +162,7 @@ $code.=<<___;
 	XORMPY	$H3,$xia,$H3x		; 3
 ||	XORMPY	$H3u,$xib,$H3y
 ||[!A0]	MVK.D	15,A0				; *--${xip} counter
-	XOR.L	$H0x,$Z0,$Z0		; 4	; Z^=H(Xi[i]<<1)
+	XOR.L	$H0x,$Z0,$Z0		; 4	; Z^=H·(Xi[i]<<1)
 || [A0]	SUB.S	A0,1,A0
 	XOR.L	$H1x,$Z1,$Z1		; 5
 ||	AND.D	$H01y,$FF000000,$H0z

--- a/crypto/modes/asm/ghash-sparcv9.pl
+++ b/crypto/modes/asm/ghash-sparcv9.pl
@@ -379,7 +379,7 @@ gcm_init_vis3:
 	or	$V,%lo(0xA0406080),$V
 	or	%l0,%lo(0x20C0E000),%l0
 	sllx	$V,32,$V
-	or	%l0,$V,$V		! (0xE0i)&0xff=0xA040608020C0E000
+	or	%l0,$V,$V		! (0xE0·i)&0xff=0xA040608020C0E000
 	stx	$V,[%i0+16]
 	ret
@@ -399,7 +399,7 @@ gcm_gmult_vis3:
 	mov	0xE1,%l7
 	sllx	%l7,57,$xE1		! 57 is not a typo
-	ldx	[$Htable+16],$V		! (0xE0i)&0xff=0xA040608020C0E000
+	ldx	[$Htable+16],$V		! (0xE0·i)&0xff=0xA040608020C0E000
 	xor	$Hhi,$Hlo,$Hhl		! Karatsuba pre-processing
 	xmulx	$Xlo,$Hlo,$C0
@@ -411,9 +411,9 @@ gcm_gmult_vis3:
 	xmulx	$Xhi,$Hhi,$Xhi
 	sll	$C0,3,$sqr
-	srlx	$V,$sqr,$sqr		! 0xE0 [implicit &(7<<3)]
+	srlx	$V,$sqr,$sqr		! ·0xE0 [implicit &(7<<3)]
 	xor	$C0,$sqr,$sqr
-	sllx	$sqr,57,$sqr		! ($C00xE1)<<1<<56 [implicit &0x7f]
+	sllx	$sqr,57,$sqr		! ($C0·0xE1)<<1<<56 [implicit &0x7f]
 	xor	$C0,$C1,$C1		! Karatsuba post-processing
 	xor	$Xlo,$C2,$C2
@@ -423,7 +423,7 @@ gcm_gmult_vis3:
 	xor	$Xhi,$C2,$C2
 	xor	$Xhi,$C1,$C1
-	xmulxhi	$C0,$xE1,$Xlo		! 0xE1<<1<<56
+	xmulxhi	$C0,$xE1,$Xlo		! ·0xE1<<1<<56
 	 xor	$C0,$C2,$C2
 	xmulx	$C1,$xE1,$C0
 	 xor	$C1,$C3,$C3
@@ -453,7 +453,7 @@ gcm_ghash_vis3:
 	mov	0xE1,%l7
 	sllx	%l7,57,$xE1		! 57 is not a typo
-	ldx	[$Htable+16],$V		! (0xE0i)&0xff=0xA040608020C0E000
+	ldx	[$Htable+16],$V		! (0xE0·i)&0xff=0xA040608020C0E000
 	and	$inp,7,$shl
 	andn	$inp,7,$inp
@@ -490,9 +490,9 @@ gcm_ghash_vis3:
 	xmulx	$Xhi,$Hhi,$Xhi
 	sll	$C0,3,$sqr
-	srlx	$V,$sqr,$sqr		! 0xE0 [implicit &(7<<3)]
+	srlx	$V,$sqr,$sqr		! ·0xE0 [implicit &(7<<3)]
 	xor	$C0,$sqr,$sqr
-	sllx	$sqr,57,$sqr		! ($C00xE1)<<1<<56 [implicit &0x7f]
+	sllx	$sqr,57,$sqr		! ($C0·0xE1)<<1<<56 [implicit &0x7f]
 	xor	$C0,$C1,$C1		! Karatsuba post-processing
 	xor	$Xlo,$C2,$C2
@@ -502,7 +502,7 @@ gcm_ghash_vis3:
 	xor	$Xhi,$C2,$C2
 	xor	$Xhi,$C1,$C1
-	xmulxhi	$C0,$xE1,$Xlo		! 0xE1<<1<<56
+	xmulxhi	$C0,$xE1,$Xlo		! ·0xE1<<1<<56
 	 xor	$C0,$C2,$C2
 	xmulx	$C1,$xE1,$C0
 	 xor	$C1,$C3,$C3

--- a/crypto/modes/asm/ghash-x86.pl
+++ b/crypto/modes/asm/ghash-x86.pl
@@ -358,7 +358,7 @@ $S=12;		# shift factor for rem_4bit
 # effective address calculation and finally merge of value to Z.hi.
 # Reference to rem_4bit is scheduled so late that I had to >>4
 # rem_4bit elements. This resulted in 20-45% procent improvement
 # on contemporary µ-archs.
 {
    my $cnt;
    my $rem_4bit = "eax";

--- a/crypto/modes/asm/ghash-x86_64.pl
+++ b/crypto/modes/asm/ghash-x86_64.pl
@@ -576,15 +576,15 @@ $code.=<<___ if (0 || (&reduction_alg9($Xhi,$Xi)&&0));
 	# experimental alternative. special thing about is that there
 	# no dependency between the two multiplications... 
 	mov		\$`0xE1<<1`,%eax
-	mov		\$0xA040608020C0E000,%r10	# ((7..0)0xE0)&0xff
+	mov		\$0xA040608020C0E000,%r10	# ((7..0)·0xE0)&0xff
 	mov		\$0x07,%r11d
 	movq		%rax,$T1
 	movq		%r10,$T2
 	movq		%r11,$T3		# borrow $T3
 	pand		$Xi,$T3
-	pshufb		$T3,$T2			# ($Xi&7)0xE0
+	pshufb		$T3,$T2			# ($Xi&7)·0xE0
 	movq		%rax,$T3
-	pclmulqdq	\$0x00,$Xi,$T1		# (0xE1<<1)
+	pclmulqdq	\$0x00,$Xi,$T1		# ·(0xE1<<1)
 	pxor		$Xi,$T2
 	pslldq		\$15,$T2
 	paddd		$T2,$T2			# <<(64+56+1)
@@ -657,7 +657,7 @@ $code.=<<___;
 	je		.Lskip4x
 	sub		\$0x30,$len
-	mov		\$0xA040608020C0E000,%rax	# ((7..0)0xE0)&0xff
+	mov		\$0xA040608020C0E000,%rax	# ((7..0)·0xE0)&0xff
 	movdqu		0x30($Htbl),$Hkey3
 	movdqu		0x40($Htbl),$Hkey4

--- a/crypto/modes/asm/ghashp8-ppc.pl
+++ b/crypto/modes/asm/ghashp8-ppc.pl
@@ -118,9 +118,9 @@ $code=<<___;
 	 le?vperm	$IN,$IN,$IN,$lemask
 	vxor		$zero,$zero,$zero
-	vpmsumd		$Xl,$IN,$Hl		# H.loXi.lo
+	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
-	vpmsumd		$Xm,$IN,$H		# H.hiXi.lo+H.loXi.hi
+	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
-	vpmsumd		$Xh,$IN,$Hh		# H.hiXi.hi
+	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
 	vpmsumd		$t2,$Xl,$xC2		# 1st phase
@@ -178,11 +178,11 @@ $code=<<___;
 .align	5
 Loop:
 	 subic		$len,$len,16
-	vpmsumd		$Xl,$IN,$Hl		# H.loXi.lo
+	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
 	 subfe.		r0,r0,r0		# borrow?-1:0
-	vpmsumd		$Xm,$IN,$H		# H.hiXi.lo+H.loXi.hi
+	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
 	 and		r0,r0,$len
-	vpmsumd		$Xh,$IN,$Hh		# H.hiXi.hi
+	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
 	 add		$inp,$inp,r0
 	vpmsumd		$t2,$Xl,$xC2		# 1st phase

--- a/crypto/modes/asm/ghashv8-armx.pl
+++ b/crypto/modes/asm/ghashv8-armx.pl
@@ -144,10 +144,10 @@ gcm_gmult_v8:
 #endif
 	vext.8		$IN,$t1,$t1,#8
-	vpmull.p64	$Xl,$H,$IN		@ H.loXi.lo
+	vpmull.p64	$Xl,$H,$IN		@ H.lo·Xi.lo
 	veor		$t1,$t1,$IN		@ Karatsuba pre-processing
-	vpmull2.p64	$Xh,$H,$IN		@ H.hiXi.hi
+	vpmull2.p64	$Xh,$H,$IN		@ H.hi·Xi.hi
-	vpmull.p64	$Xm,$Hhl,$t1		@ (H.lo+H.hi)(Xi.lo+Xi.hi)
+	vpmull.p64	$Xm,$Hhl,$t1		@ (H.lo+H.hi)·(Xi.lo+Xi.hi)
 	vext.8		$t1,$Xl,$Xh,#8		@ Karatsuba post-processing
 	veor		$t2,$Xl,$Xh
@@ -235,7 +235,7 @@ $code.=<<___;
 #endif
 	vext.8		$In,$t1,$t1,#8
 	veor		$IN,$IN,$Xl		@ I[i]^=Xi
-	vpmull.p64	$Xln,$H,$In		@ HIi+1
+	vpmull.p64	$Xln,$H,$In		@ H·Ii+1
 	veor		$t1,$t1,$In		@ Karatsuba pre-processing
 	vpmull2.p64	$Xhn,$H,$In
 	b		.Loop_mod2x_v8
@@ -244,14 +244,14 @@ $code.=<<___;
 .Loop_mod2x_v8:
 	vext.8		$t2,$IN,$IN,#8
 	subs		$len,$len,#32		@ is there more data?
-	vpmull.p64	$Xl,$H2,$IN		@ H^2.loXi.lo
+	vpmull.p64	$Xl,$H2,$IN		@ H^2.lo·Xi.lo
 	cclr		$inc,lo			@ is it time to zero $inc?
 	 vpmull.p64	$Xmn,$Hhl,$t1
 	veor		$t2,$t2,$IN		@ Karatsuba pre-processing
-	vpmull2.p64	$Xh,$H2,$IN		@ H^2.hiXi.hi
+	vpmull2.p64	$Xh,$H2,$IN		@ H^2.hi·Xi.hi
 	veor		$Xl,$Xl,$Xln		@ accumulate
-	vpmull2.p64	$Xm,$Hhl,$t2		@ (H^2.lo+H^2.hi)(Xi.lo+Xi.hi)
+	vpmull2.p64	$Xm,$Hhl,$t2		@ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
 	 vld1.64	{$t0},[$inp],$inc	@ load [rotated] I[i+2]
 	veor		$Xh,$Xh,$Xhn
@@ -276,7 +276,7 @@ $code.=<<___;
 	 vext.8		$In,$t1,$t1,#8
 	 vext.8		$IN,$t0,$t0,#8
 	veor		$Xl,$Xm,$t2
-	 vpmull.p64	$Xln,$H,$In		@ HIi+1
+	 vpmull.p64	$Xln,$H,$In		@ H·Ii+1
 	veor		$IN,$IN,$Xh		@ accumulate $IN early
 	vext.8		$t2,$Xl,$Xl,#8		@ 2nd phase of reduction
@@ -300,10 +300,10 @@ $code.=<<___;
 	veor		$IN,$IN,$Xl		@ inp^=Xi
 	veor		$t1,$t0,$t2		@ $t1 is rotated inp^Xi
-	vpmull.p64	$Xl,$H,$IN		@ H.loXi.lo
+	vpmull.p64	$Xl,$H,$IN		@ H.lo·Xi.lo
 	veor		$t1,$t1,$IN		@ Karatsuba pre-processing
-	vpmull2.p64	$Xh,$H,$IN		@ H.hiXi.hi
+	vpmull2.p64	$Xh,$H,$IN		@ H.hi·Xi.hi
-	vpmull.p64	$Xm,$Hhl,$t1		@ (H.lo+H.hi)(Xi.lo+Xi.hi)
+	vpmull.p64	$Xm,$Hhl,$t1		@ (H.lo+H.hi)·(Xi.lo+Xi.hi)
 	vext.8		$t1,$Xl,$Xh,#8		@ Karatsuba post-processing
 	veor		$t2,$Xl,$Xh

--- a/crypto/rc4/asm/rc4-586.pl
+++ b/crypto/rc4/asm/rc4-586.pl
@@ -44,7 +44,7 @@
 # Sandy Bridge	5.0/+8%
 # Atom		12.6/+6%
 # VIA Nano	6.4/+9%
-# Ivy Bridge	4.9/0%
+# Ivy Bridge	4.9/±0%
 # Bulldozer	4.9/+15%
 #
 # (*)	PIII can actually deliver 6.6 cycles per byte with MMX code,

--- a/crypto/rc4/asm/rc4-x86_64.pl
+++ b/crypto/rc4/asm/rc4-x86_64.pl
@@ -56,7 +56,7 @@
 # achieves respectful 432MBps on 2.8GHz processor now. For reference.
 # If executed on Xeon, current RC4_CHAR code-path is 2.7x faster than
 # RC4_INT code-path. While if executed on Opteron, it's only 25%
-# slower than the RC4_INT one [meaning that if CPU -arch detection
+# slower than the RC4_INT one [meaning that if CPU µ-arch detection
 # is not implemented, then this final RC4_CHAR code-path should be
 # preferred, as it provides better *all-round* performance].

--- a/crypto/sha/asm/sha1-586.pl
+++ b/crypto/sha/asm/sha1-586.pl
@@ -66,9 +66,9 @@
 # switch to AVX alone improves performance by as little as 4% in
 # comparison to SSSE3 code path. But below result doesn't look like
 # 4% improvement... Trouble is that Sandy Bridge decodes 'ro[rl]' as
 # pair of µ-ops, and it's the additional µ-ops, two per round, that
 # make it run slower than Core2 and Westmere. But 'sh[rl]d' is decoded
 # as single µ-op by Sandy Bridge and it's replacing 'ro[rl]' with
 # equivalent 'sh[rl]d' that is responsible for the impressive 5.1
 # cycles per processed byte. But 'sh[rl]d' is not something that used
 # to be fast, nor does it appear to be fast in upcoming Bulldozer

--- a/crypto/sha/asm/sha256-586.pl
+++ b/crypto/sha/asm/sha256-586.pl
@@ -10,7 +10,7 @@
 # SHA256 block transform for x86. September 2007.
 #
 # Performance improvement over compiler generated code varies from
-# 10% to 40% [see below]. Not very impressive on some -archs, but
+# 10% to 40% [see below]. Not very impressive on some µ-archs, but
 # it's 5 times smaller and optimizies amount of writes.
 #
 # May 2012.

--- a/crypto/sha/asm/sha512-586.pl
+++ b/crypto/sha/asm/sha512-586.pl
@@ -37,7 +37,7 @@
 #
 # IALU code-path is optimized for elder Pentiums. On vanilla Pentium
 # performance improvement over compiler generated code reaches ~60%,
-# while on PIII - ~35%. On newer -archs improvement varies from 15%
+# while on PIII - ~35%. On newer µ-archs improvement varies from 15%
 # to 50%, but it's less important as they are expected to execute SSE2
 # code-path, which is commonly ~2-3x faster [than compiler generated
 # code]. SSE2 code-path is as fast as original sha512-sse2.pl, even

--- a/crypto/sparccpuid.S
+++ b/crypto/sparccpuid.S
@@ -127,7 +127,7 @@ OPENSSL_wipe_cpu:
 			fmovs	%f1,%f3
 			fmovs	%f0,%f2
-	add	%fp,BIAS,%i0	! return pointer to callers top of stack
+	add	%fp,BIAS,%i0	! return pointer to caller´s top of stack
 	ret
 	restore

--- a/crypto/whrlpool/asm/wp-mmx.pl
+++ b/crypto/whrlpool/asm/wp-mmx.pl
@@ -16,7 +16,7 @@
 # table]. I stick to value of 2 for two reasons: 1. smaller table
 # minimizes cache trashing and thus mitigates the hazard of side-
 # channel leakage similar to AES cache-timing one; 2. performance
-# gap among different -archs is smaller.
+# gap among different µ-archs is smaller.
 #
 # Performance table lists rounded amounts of CPU cycles spent by
 # whirlpool_block_mmx routine on single 64 byte input block, i.e.

--- a/crypto/x509v3/v3_pci.c
+++ b/crypto/x509v3/v3_pci.c
@@ -3,7 +3,7 @@
 * Contributed to the OpenSSL Project 2004 by Richard Levitte
 * (richard@levitte.org)
 */
-/* Copyright (c) 2004 Kungliga Tekniska Hgskolan
+/* Copyright (c) 2004 Kungliga Tekniska Högskolan
 * (Royal Institute of Technology, Stockholm, Sweden).
 * All rights reserved.
 *

--- a/crypto/x509v3/v3_pcia.c
+++ b/crypto/x509v3/v3_pcia.c
@@ -3,7 +3,7 @@
 * Contributed to the OpenSSL Project 2004 by Richard Levitte
 * (richard@levitte.org)
 */
 /* Copyright (c) 2004 Kungliga Tekniska Högskolan
 * (Royal Institute of Technology, Stockholm, Sweden).
 * All rights reserved.
 *

--- a/demos/easy_tls/README
+++ b/demos/easy_tls/README
@@ -62,4 +62,4 @@ As noted above, easy_tls.c will be changed to become a library one
 day, which means that future revisions will not be fully compatible to
 the current version.
 Bodo Möller <bodo@openssl.org>
--- a/util/mkrc.pl
+++ b/util/mkrc.pl
@@ -57,7 +57,7 @@ BEGIN
            VALUE "ProductVersion", "$version\\0"
            // Optional:
            //VALUE "Comments", "\\0"
-            VALUE "LegalCopyright", "Copyright  1998-2006 The OpenSSL Project. Copyright  1995-1998 Eric A. Young, Tim J. Hudson. All rights reserved.\\0"
+            VALUE "LegalCopyright", "Copyright © 1998-2006 The OpenSSL Project. Copyright © 1995-1998 Eric A. Young, Tim J. Hudson. All rights reserved.\\0"
            //VALUE "LegalTrademarks", "\\0"
            //VALUE "PrivateBuild", "\\0"
            //VALUE "SpecialBuild", "\\0"