提交 80451542 编写于 作者: A Andy Polyakov

+20% performance improvement of P4-specific RC4_CHAR loop.

上级 81a86fcf
...@@ -200,22 +200,23 @@ sub RC4 ...@@ -200,22 +200,23 @@ sub RC4
&lea ($ty,&DWP(0,$in,$ty)); &lea ($ty,&DWP(0,$in,$ty));
&mov (&swtmp(2),$ty); &mov (&swtmp(2),$ty);
&movz ($tx,&BP(0,$d,$x));
# strangely enough unrolled loop performs over 20% slower... # strangely enough unrolled loop performs over 20% slower...
&set_label("RC4_CHAR_loop"); &set_label("RC4_CHAR_loop");
&movz ($tx,&BP(0,$d,$x));
&add (&LB($y),&LB($tx)); &add (&LB($y),&LB($tx));
&movz ($ty,&BP(0,$d,$y)); &movz ($ty,&BP(0,$d,$y));
&movb (&BP(0,$d,$y),&LB($tx)); &movb (&BP(0,$d,$y),&LB($tx));
&movb (&BP(0,$d,$x),&LB($ty)); &movb (&BP(0,$d,$x),&LB($ty));
&add (&LB($ty),&LB($tx)); &add (&LB($ty),&LB($tx));
&movz ($ty,&BP(0,$d,$ty)); &movz ($ty,&BP(0,$d,$ty));
&add (&LB($x),1);
&xorb (&LB($ty),&BP(0,$in)); &xorb (&LB($ty),&BP(0,$in));
&movb (&BP(0,$out),&LB($ty)); &lea ($in,&BP(1,$in));
&inc (&LB($x)); &movz ($tx,&BP(0,$d,$x));
&inc ($in);
&inc ($out);
&cmp ($in,&swtmp(2)); &cmp ($in,&swtmp(2));
&movb (&BP(0,$out),&LB($ty));
&lea ($out,&BP(1,$out));
&jb (&label("RC4_CHAR_loop")); &jb (&label("RC4_CHAR_loop"));
&set_label("finished"); &set_label("finished");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册