提交 0066590f 编写于 作者: A Andy Polyakov

Pedantic polish to aes-ia64 and sha512-ia64.

上级 165a28ab
......@@ -24,7 +24,9 @@
rk0=r8; rk1=r9;
prsave=r10;
pfssave=r2;
lcsave=r10;
prsave=r3;
maskff=r11;
twenty4=r14;
sixteen=r15;
......@@ -67,6 +69,9 @@ te0=r40; te1=r41; te2=r42; te3=r43;
// Clobber: r16-r31,rk0-rk1,r32-r43
.align 32
_ia64_AES_encrypt:
.prologue
.altrp b6
.body
{ .mmi; alloc r16=ar.pfs,12,0,0,8
LDKEY t0=[rk0],2*KSZ
mov pr.rot=1<<16 }
......@@ -179,20 +184,21 @@ _ia64_AES_encrypt:
.skip 16
AES_encrypt:
.prologue
.save ar.pfs,r2
{ .mmi; alloc r2=ar.pfs,3,0,12,0
addl out8=@ltoff(AES_Te#),gp
.save ar.lc,r3
mov r3=ar.lc }
{ .mmi; and out0=3,in0
ADDP in0=0,in0
ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds
.save ar.pfs,pfssave
{ .mmi; alloc pfssave=ar.pfs,3,0,12,0
and out0=3,in0
mov r3=ip }
{ .mmi; ADDP in0=0,in0
ADDP out11=KSZ*60,in2 // &AES_KEY->rounds
.save ar.lc,lcsave
mov lcsave=ar.lc };;
.body
{ .mmi; ld8 out8=[out8] // Te0
ld4 out11=[out11] // AES_KEY->rounds
{ .mmi; ld4 out11=[out11] // AES_KEY->rounds
add out8=(AES_Te#-AES_encrypt#),r3 // Te0
.save pr,prsave
mov prsave=pr }
.body
#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
{ .mib; cmp.ne p6,p0=out0,r0
add out0=4,in0
......@@ -215,8 +221,8 @@ AES_encrypt:
ADDP in1=0,in1
(p6) br.spnt .Le_o_unaligned };;
{ .mii; mov ar.pfs=r2
mov ar.lc=r3 }
{ .mii; mov ar.pfs=psfsave
mov ar.lc=lcsave }
{ .mmi; st4 [in1]=r16,8 // s0
st4 [in0]=r20,8 // s1
mov pr=prsave,0x1ffff };;
......@@ -299,10 +305,10 @@ AES_encrypt:
mov pr=prsave,0x1ffff }//;;
{ .mmi; st1 [out1]=r26,4
st1 [out0]=r27,4
mov ar.pfs=r2 };;
mov ar.pfs=pfssave };;
{ .mmi; st1 [out3]=r28
st1 [out2]=r29
mov ar.lc=r3 }//;;
mov ar.lc=lcsave }//;;
{ .mmb; st1 [out1]=r30
st1 [out0]=r31
br.ret.sptk.many b0 };;
......@@ -359,6 +365,9 @@ while(<>) {
// Clobber: r16-r31,rk0-rk1,r32-r43
.align 32
_ia64_AES_decrypt:
.prologue
.altrp b6
.body
{ .mmi; alloc r16=ar.pfs,12,0,0,8
LDKEY t0=[rk0],2*KSZ
mov pr.rot=1<<16 }
......@@ -471,20 +480,21 @@ _ia64_AES_decrypt:
.skip 16
AES_decrypt:
.prologue
.save ar.pfs,r2
{ .mmi; alloc r2=ar.pfs,3,0,12,0
addl out8=@ltoff(AES_Td#),gp
.save ar.lc,r3
mov r3=ar.lc }
{ .mmi; and out0=3,in0
ADDP in0=0,in0
ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds
.save ar.pfs,pfssave
{ .mmi; alloc pfssave=ar.pfs,3,0,12,0
and out0=3,in0
mov r3=ip }
{ .mmi; ADDP in0=0,in0
ADDP out11=KSZ*60,in2 // &AES_KEY->rounds
.save ar.lc,lcsave
mov lcsave=ar.lc };;
.body
{ .mmi; ld8 out8=[out8] // Te0
ld4 out11=[out11] // AES_KEY->rounds
{ .mmi; ld4 out11=[out11] // AES_KEY->rounds
add out8=(AES_Td#-AES_decrypt#),r3 // Td0
.save pr,prsave
mov prsave=pr }
.body
#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
{ .mib; cmp.ne p6,p0=out0,r0
add out0=4,in0
......@@ -507,8 +517,8 @@ AES_decrypt:
ADDP in1=0,in1
(p6) br.spnt .Ld_o_unaligned };;
{ .mii; mov ar.pfs=r2
mov ar.lc=r3 }
{ .mii; mov ar.pfs=pfssave
mov ar.lc=lcsave }
{ .mmi; st4 [in1]=r16,8 // s0
st4 [in0]=r20,8 // s1
mov pr=prsave,0x1ffff };;
......@@ -591,10 +601,10 @@ AES_decrypt:
mov pr=prsave,0x1ffff }//;;
{ .mmi; st1 [out1]=r26,4
st1 [out0]=r27,4
mov ar.pfs=r2 };;
mov ar.pfs=pfssave };;
{ .mmi; st1 [out3]=r28
st1 [out2]=r29
mov ar.lc=r3 }//;;
mov ar.lc=lcsave }//;;
{ .mmb; st1 [out1]=r30
st1 [out0]=r31
br.ret.sptk.many b0 };;
......
......@@ -110,6 +110,8 @@ $code=<<___;
.explicit
.text
pfssave=r2;
lcsave=r3;
prsave=r14;
K=r15;
A=r16; B=r17; C=r18; D=r19;
......@@ -128,20 +130,17 @@ sgm0=r50; sgm1=r51; // small constants
.align 32
$func:
.prologue
.save ar.pfs,r2
{ .mmi; alloc r2=ar.pfs,3,17,0,16
.save ar.pfs,pfssave
{ .mmi; alloc pfssave=ar.pfs,3,17,0,16
$ADDP ctx=0,r32 // 1st arg
.save ar.lc,r3
mov r3=ar.lc }
.save ar.lc,lcsave
mov lcsave=ar.lc }
{ .mmi; $ADDP input=0,r33 // 2nd arg
addl Ktbl=\@ltoff($TABLE#),gp
mov num=r34 // 3rd arg
.save pr,prsave
mov prsave=pr };;
.body
{ .mii; ld8 Ktbl=[Ktbl]
mov num=r34 };; // 3rd arg
{ .mib; add r8=0*$SZ,ctx
add r9=1*$SZ,ctx
brp.loop.imp .L_first16,.L_first16_ctop
......@@ -151,20 +150,23 @@ $func:
brp.loop.imp .L_rest,.L_rest_ctop
};;
// load A-H
.Lpic_point:
{ .mmi; $LDW A=[r8],4*$SZ
$LDW B=[r9],4*$SZ
mov sgm0=$sigma0[2] }
mov Ktbl=ip }
{ .mmi; $LDW C=[r10],4*$SZ
$LDW D=[r11],4*$SZ
mov sgm1=$sigma1[2] };;
mov sgm0=$sigma0[2] };;
{ .mmi; $LDW E=[r8]
$LDW F=[r9] }
$LDW F=[r9]
add Ktbl=($TABLE#-.Lpic_point),Ktbl }
{ .mmi; $LDW G=[r10]
$LDW H=[r11]
cmp.ne p15,p14=0,r35 };; // used in sha256_block
.L_outer:
{ .mii; mov ar.lc=15
{ .mii; mov sgm1=$sigma1[2]
mov ar.lc=15
mov ar.ec=1 };;
.align 32
.L_first16:
......@@ -329,7 +331,7 @@ $code.=<<___;
(p6) add Ktbl=-$SZ*$rounds,Ktbl }
{ .mmi; $LDW r38=[r10],-4*$SZ
$LDW r39=[r11],-4*$SZ
(p7) mov ar.lc=r3 };;
(p7) mov ar.lc=lcsave };;
{ .mmi; add A=A,r32
add B=B,r33
add C=C,r34 }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册