提交 bcbe9e44 编写于 作者: J James Almer

x86/sbrdsp: zero extend m_max in apply_noise_main

Tested-by: NMichael Niedermayer <michael@niedermayer.cc>
Signed-off-by: NJames Almer <jamrial@gmail.com>
上级 44028547
......@@ -378,24 +378,24 @@ cglobal sbr_hf_apply_noise_3, 5,5+NREGS+UNIX64,8, Y,s_m,q_filt,noise,kx,m_max
apply_noise_main:
%if ARCH_X86_64 == 0 || WIN64
mov kxd, m_maxm
%define count kxq
DEFINE_ARGS Y, s_m, q_filt, noise, count
%else
%define count m_maxq
DEFINE_ARGS Y, s_m, q_filt, noise, kx, count
%endif
movsxdifnidn noiseq, noised
dec noiseq
shl count, 2
shl countd, 2
%ifdef PIC
lea NOISE_TABLE, [sbr_noise_table]
%endif
lea Yq, [Yq + 2*count]
add s_mq, count
add q_filtq, count
lea Yq, [Yq + 2*countq]
add s_mq, countq
add q_filtq, countq
shl noiseq, 3
pxor m5, m5
neg count
neg countq
.loop:
mova m1, [q_filtq + count]
mova m1, [q_filtq + countq]
movu m3, [noiseq + NOISE_TABLE + 1*mmsize]
movu m4, [noiseq + NOISE_TABLE + 2*mmsize]
add noiseq, 2*mmsize
......@@ -404,7 +404,7 @@ apply_noise_main:
punpckldq m1, m1
mulps m1, m3 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
mulps m2, m4 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
mova m3, [s_mq + count]
mova m3, [s_mq + countq]
; TODO: replace by a vpermd in AVX2
punpckhdq m4, m3, m3
punpckldq m3, m3
......@@ -414,15 +414,15 @@ apply_noise_main:
mulps m4, m0 ; s_m[m] * phi_sign
pand m1, m6
pand m2, m7
movu m6, [Yq + 2*count]
movu m7, [Yq + 2*count + mmsize]
movu m6, [Yq + 2*countq]
movu m7, [Yq + 2*countq + mmsize]
addps m3, m1
addps m4, m2
addps m6, m3
addps m7, m4
movu [Yq + 2*count], m6
movu [Yq + 2*count + mmsize], m7
add count, mmsize
movu [Yq + 2*countq], m6
movu [Yq + 2*countq + mmsize], m7
add countq, mmsize
jl .loop
RET
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册