Latest bn_mont.c modification broke ECDSA test. I've got math wrong, which

is fixed now.

Latest bn_mont.c modification broke ECDSA test. I've got math wrong, which
is fixed now.
673c55a2 · Andy Polyakov · 949ce10e · 673c55a2 · 673c55a2 · 673c55a2
11 changed file
--- a/crypto/bn/asm/alpha-mont.pl
+++ b/crypto/bn/asm/alpha-mont.pl
@@ -262,8 +262,6 @@ bn_mul_mont:
 	mov	$rp,$bp		# put rp aside
 	mov	sp,$tp
 	mov	sp,$ap
-	srl	$nj,62,AT	# boundary condition...
-	beq	AT,.Lcopy	# ... is met
 	mov	0,$hi0		# clear borrow bit

 .align	4

--- a/crypto/bn/asm/armv4-mont.pl
+++ b/crypto/bn/asm/armv4-mont.pl
@@ -163,9 +163,6 @@ bn_mul_mont:
 	mov	$ap,$tp			@ "borrow" $ap
 	sub	$np,$np,$aj		@ "rewind" $np to &np[0]

-	movs	$tj,$nj,lsr#30		@ boundary condition...
-	beq	.Lcopy			@ ... is met
-
 	subs	$tj,$tj,$tj		@ "clear" carry flag
 .Lsub:	ldr	$tj,[$tp],#4
 	ldr	$nj,[$np],#4

--- a/crypto/bn/asm/mips3-mont.pl
+++ b/crypto/bn/asm/mips3-mont.pl
@@ -270,9 +270,6 @@ bn_mul_mont:
 	PTR_ADD	$tj,sp,$num	# &tp[num]
 	move	$tp,sp
 	move	$ap,sp
-
-	dsrl	AT,$nj,62	# boundary condition...
-	beqz	AT,.Lcopy	# ... is met
 	li	$hi0,0		# clear borrow bit

 .align	4

--- a/crypto/bn/asm/ppc-mont.pl
+++ b/crypto/bn/asm/ppc-mont.pl
@@ -267,13 +267,11 @@ Linner:
 	addi	$i,$i,$BNSZ
 	ble-	Louter

-	$SHRI.	$nj,$nj,$BITS-2	; check boundary condition
 	addi	$num,$num,2	; restore $num
 	subfc	$j,$j,$j	; j=0 and "clear" XER[CA]
 	addi	$tp,$sp,$FRAME
 	addi	$ap,$sp,$FRAME
 	mtctr	$num
-	beq	Lcopy		; boundary condition is met

 .align	4
 Lsub:	$LDX	$tj,$tp,$j

--- a/crypto/bn/asm/s390x-mont.pl
+++ b/crypto/bn/asm/s390x-mont.pl
@@ -183,12 +183,6 @@ $code.=<<___;
 	la	$ap,8($fp)
 	lgr	$j,$num

-	#lg	$nhi,-8($np)		# buggy assembler
-	lghi	$count,-8		# buggy assembler
-	lg	$nhi,0($count,$np)	# buggy assembler
-	srag	$nhi,$nhi,62	# boundary condition...
-	jz	.Lcopy		# ... is met
-
 	lcgr	$count,$num
 	sra	$count,3	# incidentally clears "borrow"
 .Lsub:	lg	$alo,0($j,$ap)

--- a/crypto/bn/asm/sparcv9-mont.pl
+++ b/crypto/bn/asm/sparcv9-mont.pl
@@ -257,11 +257,6 @@ $fname:
 	add	$rp,$num,$rp
 	mov	$tp,$ap
 	sub	%g0,$num,%o7		! k=-num
-
-	srl	$npj,30,%o0		! boundary condition...
-	brz,pn	%o0,.Lcopy		! ... is met
-	nop
-
 	ba	.Lsub
 	subcc	%g0,%g0,%g0		! clear %icc.c
 .align	16

--- a/crypto/bn/asm/sparcv9a-mont.pl
+++ b/crypto/bn/asm/sparcv9a-mont.pl
@@ -798,15 +798,11 @@ $fname:
 	bnz	%icc,.Louter
 	nop

-	ld	[$np-4],%o1
-	subcc	%g0,%g0,%g0		! clear %icc.c
 	add	$tp,8,$tp		! adjust tp to point at the end
-	srl	%o1,30,%o1		! boundary condition...
 	orn	%g0,%g0,%g4
-	brz,pn	%o1,.Lcopy		! ... is met
 	sub	%g0,$num,%o7		! n=-num
 	ba	.Lsub
-	nop
+	subcc	%g0,%g0,%g0		! clear %icc.c

 .align	32
 .Lsub:

--- a/crypto/bn/asm/via-mont.pl
+++ b/crypto/bn/asm/via-mont.pl
@@ -187,17 +187,12 @@ $sp=&DWP(28,"esp");
 	&data_byte(0xf3,0x0f,0xa6,0xc0);# rep montmul

 	&mov	("ecx","ebp");
-	&xor	("edx","edx");			# i=0
 	&lea	("esi",&DWP(64,"esp"));		# tp
 	# edi still points at the end of padded np copy...
-	&mov	("eax",&DWP(-4-$pad,"edi"));	# np[num-1]
 	&neg	("ebp");
 	&lea	("ebp",&DWP(-$pad,"edi","ebp",4));	# so just "rewind"
 	&mov	("edi",$rp);			# restore rp
-
-	&shr	("eax",30);			# boundary condition...
-	&jz	(&label("copy"));		# ... is met
-	&xor	("edx","edx");			# clear CF
+	&xor	("edx","edx");			# i=0 and clear CF

 &set_label("sub",8);
 	&mov	("eax",&DWP(0,"esi","edx",4));

--- a/crypto/bn/asm/x86-mont.pl
+++ b/crypto/bn/asm/x86-mont.pl
@@ -554,9 +554,6 @@ $sbit=$num;
 	&mov	($np,$_np);			# load modulus pointer
 	&mov	($rp,$_rp);			# load result pointer
 	&lea	($tp,&DWP($frame,"esp"));	# [$ap and $bp are zapped]
-	&mov	("eax",&DWP(0,$np,$num,4));	# np[num-1]
-	&shr	("eax",30);			# check for boundary condition
-	&jz	(&label("copy"));

 	&mov	("eax",&DWP(0,$tp));		# tp[0]
 	&mov	($j,$num);			# j=num-1

--- a/crypto/bn/asm/x86_64-mont.pl
+++ b/crypto/bn/asm/x86_64-mont.pl
@@ -167,11 +167,8 @@ bn_mul_mont:
 	cmp	$num,$i
 	jl	.Louter

-	mov	-8($np,$num,8),%rax	# np[num-1]
 	lea	(%rsp),$ap		# borrow ap for tp
-	shr	\$62,%rax		# check for boundary condition
 	lea	-1($num),$j		# j=num-1
-	jz	.Lcopy

 	mov	($ap),%rax		# tp[0]
 	xor	$i,$i			# i=0 and clear CF!
@@ -198,7 +195,7 @@ bn_mul_mont:
 	mov	$i,(%rsp,$j,8)		# zap temporary vector
 	dec	$j
 	jge	.Lcopy
-	
+
 	mov	8(%rsp,$num,8),%rsp	# restore %rsp
 	mov	\$1,%rax
 	pop	%r15

--- a/crypto/bn/bn_mont.c
+++ b/crypto/bn/bn_mont.c
@@ -243,7 +243,7 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)

 	/* mont->ri will be a multiple of the word size and below code
 	 * is kind of BN_rshift(ret,r,mont->ri) equivalent */
-	if (r->top < ri)
+	if (r->top <= ri)
 		{
 		ret->top=0;
 		return(1);
@@ -259,32 +259,26 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)

 	rp=ret->d;
 	ap=&(r->d[ri]);
-	nrp=ap;

-	/* This 'if' denotes violation of 2*M<r^(n-1) boundary condition
-	 * formulated by C.D.Walter in "Montgomery exponentiation needs
-	 * no final subtractions." Incurred branch can disclose only
-	 * information about modulus length, which is not really secret. */
-	if ((mont->N.d[ri-1]>>(BN_BITS2-2))!=0)
-		{
-		size_t m1,m2;
-
-		v=bn_sub_words(rp,ap,mont->N.d,ri);
-		/* this -----------------------^^ works even in al<ri case
-		 * thanks to zealous zeroing of top of the vector in the
-		 * beginning. */
-
-		/* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */
-		/* in other words if subtraction result is real, then
-		 * trick unconditional memcpy below to perform in-place
-		 * "refresh" instead of actual copy. */
-		m1=0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1);	/* al<ri */
-		m2=0-(size_t)(((ri-al)>>(sizeof(al)*8-1))&1);	/* al>ri */
-		m1|=m2;			/* (al!=ri) */
-		m1|=(0-(size_t)v);	/* (al!=ri || v) */
-		m1&=~m2;		/* (al!=ri || v) && !al>ri */
-		nrp=(BN_ULONG *)(((size_t)rp&~m1)|((size_t)ap&m1));
-		}
+	{
+	size_t m1,m2;
+
+	v=bn_sub_words(rp,ap,np,ri);
+	/* this ----------------^^ works even in al<ri case
+	 * thanks to zealous zeroing of top of the vector in the
+	 * beginning. */
+
+	/* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */
+	/* in other words if subtraction result is real, then
+	 * trick unconditional memcpy below to perform in-place
+	 * "refresh" instead of actual copy. */
+	m1=0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1);	/* al<ri */
+	m2=0-(size_t)(((ri-al)>>(sizeof(al)*8-1))&1);	/* al>ri */
+	m1|=m2;			/* (al!=ri) */
+	m1|=(0-(size_t)v);	/* (al!=ri || v) */
+	m1&=~m2;		/* (al!=ri || v) && !al>ri */
+	nrp=(BN_ULONG *)(((size_t)rp&~m1)|((size_t)ap&m1));
+	}

 	/* 'i<ri' is chosen to eliminate dependency on input data, even
 	 * though it results in redundant copy in al<ri case. */