提交 bd4f37f2 编写于 作者: S Shiyou Yin 提交者: Michael Niedermayer

avcodec/mips: Fix segfault in imdct36_mips_float.

'li.s' is a synthesized instruction, it does not work properly
when compiled with clang on mips, and A segfault occurred.
Signed-off-by: NMichael Niedermayer <michael@niedermayer.cc>
上级 1563b4b4
......@@ -293,16 +293,17 @@ static void ps_decorrelate_mips(float (*out)[2], float (*delay)[2],
float phi_fract0 = phi_fract[0];
float phi_fract1 = phi_fract[1];
float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
float f1, f2, f3;
float *p_delay_end = (p_delay + (len << 1));
/* merged 2 loops */
f1 = 0.65143905753106;
f2 = 0.56471812200776;
f3 = 0.48954165955695;
__asm__ volatile(
".set push \n\t"
".set noreorder \n\t"
"li.s %[ag0], 0.65143905753106 \n\t"
"li.s %[ag1], 0.56471812200776 \n\t"
"li.s %[ag2], 0.48954165955695 \n\t"
"mul.s %[ag0], %[ag0], %[g_decay_slope] \n\t"
"mul.s %[ag1], %[ag1], %[g_decay_slope] \n\t"
"mul.s %[ag2], %[ag2], %[g_decay_slope] \n\t"
......@@ -378,10 +379,10 @@ static void ps_decorrelate_mips(float (*out)[2], float (*delay)[2],
[temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
[temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
[temp9]"=&f"(temp9), [p_delay]"+r"(p_delay), [p_ap_delay]"+r"(p_ap_delay),
[p_Q_fract]"+r"(p_Q_fract), [p_t_gain]"+r"(p_t_gain), [p_out]"+r"(p_out),
[ag0]"=&f"(ag0), [ag1]"=&f"(ag1), [ag2]"=&f"(ag2)
[p_Q_fract]"+r"(p_Q_fract), [p_t_gain]"+r"(p_t_gain), [p_out]"+r"(p_out)
: [phi_fract0]"f"(phi_fract0), [phi_fract1]"f"(phi_fract1),
[p_delay_end]"r"(p_delay_end), [g_decay_slope]"f"(g_decay_slope)
[p_delay_end]"r"(p_delay_end), [g_decay_slope]"f"(g_decay_slope),
[ag0]"f"(f1), [ag1]"f"(f2), [ag2]"f"(f3)
: "memory"
);
}
......
......@@ -135,11 +135,11 @@ static void psy_hp_filter_mips(const float *firbuf, float *hpfsmpl, const float
float coeff3 = psy_fir_coeffs[7];
float coeff4 = psy_fir_coeffs[9];
float f1 = 32768.0;
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
"li.s $f12, 32768 \n\t"
"1: \n\t"
"lwc1 $f0, 40(%[fb]) \n\t"
"lwc1 $f1, 4(%[fb]) \n\t"
......@@ -203,14 +203,14 @@ static void psy_hp_filter_mips(const float *firbuf, float *hpfsmpl, const float
"madd.s %[sum2], %[sum2], $f9, %[coeff4] \n\t"
"madd.s %[sum4], %[sum4], $f6, %[coeff4] \n\t"
"madd.s %[sum3], %[sum3], $f3, %[coeff4] \n\t"
"mul.s %[sum1], %[sum1], $f12 \n\t"
"mul.s %[sum2], %[sum2], $f12 \n\t"
"mul.s %[sum1], %[sum1], %[f1] \n\t"
"mul.s %[sum2], %[sum2], %[f1] \n\t"
"madd.s %[sum4], %[sum4], $f11, %[coeff4] \n\t"
"madd.s %[sum3], %[sum3], $f8, %[coeff4] \n\t"
"swc1 %[sum1], 0(%[hp]) \n\t"
"swc1 %[sum2], 4(%[hp]) \n\t"
"mul.s %[sum4], %[sum4], $f12 \n\t"
"mul.s %[sum3], %[sum3], $f12 \n\t"
"mul.s %[sum4], %[sum4], %[f1] \n\t"
"mul.s %[sum3], %[sum3], %[f1] \n\t"
"swc1 %[sum4], 12(%[hp]) \n\t"
"swc1 %[sum3], 8(%[hp]) \n\t"
"bne %[fb], %[fb_end], 1b \n\t"
......@@ -223,9 +223,9 @@ static void psy_hp_filter_mips(const float *firbuf, float *hpfsmpl, const float
[fb]"+r"(fb), [hp]"+r"(hp)
: [coeff0]"f"(coeff0), [coeff1]"f"(coeff1),
[coeff2]"f"(coeff2), [coeff3]"f"(coeff3),
[coeff4]"f"(coeff4), [fb_end]"r"(fb_end)
[coeff4]"f"(coeff4), [fb_end]"r"(fb_end), [f1]"f"(f1)
: "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6",
"$f7", "$f8", "$f9", "$f10", "$f11", "$f12",
"$f7", "$f8", "$f9", "$f10", "$f11",
"memory"
);
}
......
......@@ -71,6 +71,7 @@ static void ff_fft_calc_mips(FFTContext *s, FFTComplex *z)
float temp, temp1, temp3, temp4;
FFTComplex * tmpz_n2, * tmpz_n34, * tmpz_n4;
FFTComplex * tmpz_n2_i, * tmpz_n34_i, * tmpz_n4_i, * tmpz_i;
float f1 = 0.7071067812;
num_transforms = (21845 >> (17 - s->nbits)) | 1;
......@@ -148,7 +149,6 @@ static void ff_fft_calc_mips(FFTContext *s, FFTComplex *z)
"swc1 %[pom2], 4(%[tmpz]) \n\t" // tmpz[0].im = tmpz[0].im + tmp6;
"lwc1 %[pom1], 16(%[tmpz]) \n\t"
"lwc1 %[pom3], 20(%[tmpz]) \n\t"
"li.s %[pom], 0.7071067812 \n\t" // float pom = 0.7071067812f;
"add.s %[temp1],%[tmp1], %[tmp2] \n\t"
"sub.s %[temp], %[pom1], %[tmp8] \n\t"
"add.s %[pom2], %[pom3], %[tmp7] \n\t"
......@@ -159,10 +159,10 @@ static void ff_fft_calc_mips(FFTContext *s, FFTComplex *z)
"add.s %[pom1], %[pom1], %[tmp8] \n\t"
"sub.s %[pom3], %[pom3], %[tmp7] \n\t"
"add.s %[tmp3], %[tmp3], %[tmp4] \n\t"
"mul.s %[tmp5], %[pom], %[temp1] \n\t" // tmp5 = pom * (tmp1 + tmp2);
"mul.s %[tmp7], %[pom], %[temp3] \n\t" // tmp7 = pom * (tmp3 - tmp4);
"mul.s %[tmp6], %[pom], %[temp4] \n\t" // tmp6 = pom * (tmp2 - tmp1);
"mul.s %[tmp8], %[pom], %[tmp3] \n\t" // tmp8 = pom * (tmp3 + tmp4);
"mul.s %[tmp5], %[f1], %[temp1] \n\t" // tmp5 = pom * (tmp1 + tmp2);
"mul.s %[tmp7], %[f1], %[temp3] \n\t" // tmp7 = pom * (tmp3 - tmp4);
"mul.s %[tmp6], %[f1], %[temp4] \n\t" // tmp6 = pom * (tmp2 - tmp1);
"mul.s %[tmp8], %[f1], %[tmp3] \n\t" // tmp8 = pom * (tmp3 + tmp4);
"swc1 %[pom1], 16(%[tmpz]) \n\t" // tmpz[2].re = tmpz[2].re + tmp8;
"swc1 %[pom3], 20(%[tmpz]) \n\t" // tmpz[2].im = tmpz[2].im - tmp7;
"add.s %[tmp1], %[tmp5], %[tmp7] \n\t" // tmp1 = tmp5 + tmp7;
......@@ -193,7 +193,7 @@ static void ff_fft_calc_mips(FFTContext *s, FFTComplex *z)
[tmp3]"=&f"(tmp3), [tmp2]"=&f"(tmp2), [tmp4]"=&f"(tmp4), [tmp5]"=&f"(tmp5), [tmp7]"=&f"(tmp7),
[tmp6]"=&f"(tmp6), [tmp8]"=&f"(tmp8), [pom3]"=&f"(pom3),[temp]"=&f"(temp), [temp1]"=&f"(temp1),
[temp3]"=&f"(temp3), [temp4]"=&f"(temp4)
: [tmpz]"r"(tmpz)
: [tmpz]"r"(tmpz), [f1]"f"(f1)
: "memory"
);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册