提交 f0d616e7 编写于 作者: M Michael Niedermayer

dirac: dirac_hpel_filter_v does currently not receive aligned data, thus use unaligned accesses.

Signed-off-by: NMichael Niedermayer <michaelni@gmx.at>
上级 49aa3974
......@@ -51,26 +51,26 @@ cglobal dirac_hpel_filter_v_%1, 4,6,8, dst, src, stride, width, src0, stridex3
pxor m7, m7
.loop:
; 7*(src[0] + src[1])
UNPACK_ADD m0, m1, [srcq], [srcq + strideq], a,a
UNPACK_ADD m0, m1, [srcq], [srcq + strideq], u,u
pmullw m0, [pw_7]
pmullw m1, [pw_7]
; 3*( ... + src[-2] + src[3])
UNPACK_ADD m2, m3, [src0q + strideq], [srcq + stridex3q], a,a
UNPACK_ADD m2, m3, [src0q + strideq], [srcq + stridex3q], u,u
paddw m0, m2
paddw m1, m3
pmullw m0, [pw_3]
pmullw m1, [pw_3]
; ... - 7*(src[-1] + src[2])
UNPACK_ADD m2, m3, [src0q + strideq*2], [srcq + strideq*2], a,a
UNPACK_ADD m2, m3, [src0q + strideq*2], [srcq + strideq*2], u,u
pmullw m2, [pw_7]
pmullw m3, [pw_7]
psubw m0, m2
psubw m1, m3
; ... - (src[-3] + src[4])
UNPACK_ADD m2, m3, [src0q], [srcq + strideq*4], a,a
UNPACK_ADD m2, m3, [src0q], [srcq + strideq*4], u,u
psubw m0, m2
psubw m1, m3
......@@ -79,7 +79,7 @@ cglobal dirac_hpel_filter_v_%1, 4,6,8, dst, src, stride, width, src0, stridex3
psraw m0, 5
psraw m1, 5
packuswb m0, m1
mova [dstq], m0
movu [dstq], m0
add dstq, mmsize
add srcq, mmsize
add src0q, mmsize
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册