avcodec/mips: Fix segfault in imdct36_mips_float.

'li.s' is a synthesized instruction, it does not work properly when compiled with clang on mips, and A segfault occurred. Signed-off-by: N Michael Niedermayer <michael@niedermayer.cc>

avcodec/mips: Fix segfault in imdct36_mips_float.
'li.s' is a synthesized instruction, it does not work properly when compiled with clang on mips, and A segfault occurred. Signed-off-by: N Michael Niedermayer <michael@niedermayer.cc>
bd4f37f2 · Shiyou Yin · Michael Niedermayer · 1563b4b4 · bd4f37f2 · bd4f37f2
4 changed file
--- a/libavcodec/mips/aacpsdsp_mips.c
+++ b/libavcodec/mips/aacpsdsp_mips.c
@@ -293,16 +293,17 @@ static void ps_decorrelate_mips(float (*out)[2], float (*delay)[2],
    float phi_fract0 = phi_fract[0];
    float phi_fract1 = phi_fract[1];
    float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
+    float f1, f2, f3;

    float *p_delay_end = (p_delay + (len << 1));

    /* merged 2 loops */
+    f1 = 0.65143905753106;
+    f2 = 0.56471812200776;
+    f3 = 0.48954165955695;
    __asm__ volatile(
        ".set    push                                                    \n\t"
        ".set    noreorder                                               \n\t"
-        "li.s    %[ag0],        0.65143905753106                         \n\t"
-        "li.s    %[ag1],        0.56471812200776                         \n\t"
-        "li.s    %[ag2],        0.48954165955695                         \n\t"
        "mul.s   %[ag0],        %[ag0],        %[g_decay_slope]          \n\t"
        "mul.s   %[ag1],        %[ag1],        %[g_decay_slope]          \n\t"
        "mul.s   %[ag2],        %[ag2],        %[g_decay_slope]          \n\t"
@@ -378,10 +379,10 @@ static void ps_decorrelate_mips(float (*out)[2], float (*delay)[2],
          [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
          [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
          [temp9]"=&f"(temp9), [p_delay]"+r"(p_delay), [p_ap_delay]"+r"(p_ap_delay),
-          [p_Q_fract]"+r"(p_Q_fract), [p_t_gain]"+r"(p_t_gain), [p_out]"+r"(p_out),
-          [ag0]"=&f"(ag0), [ag1]"=&f"(ag1), [ag2]"=&f"(ag2)
+          [p_Q_fract]"+r"(p_Q_fract), [p_t_gain]"+r"(p_t_gain), [p_out]"+r"(p_out)
        : [phi_fract0]"f"(phi_fract0), [phi_fract1]"f"(phi_fract1),
-          [p_delay_end]"r"(p_delay_end), [g_decay_slope]"f"(g_decay_slope)
+          [p_delay_end]"r"(p_delay_end), [g_decay_slope]"f"(g_decay_slope),
+          [ag0]"f"(f1), [ag1]"f"(f2), [ag2]"f"(f3)
        : "memory"
    );
 }

--- a/libavcodec/mips/aacpsy_mips.h
+++ b/libavcodec/mips/aacpsy_mips.h
@@ -135,11 +135,11 @@ static void psy_hp_filter_mips(const float *firbuf, float *hpfsmpl, const float
    float coeff3 = psy_fir_coeffs[7];
    float coeff4 = psy_fir_coeffs[9];

+    float f1 = 32768.0;
    __asm__ volatile (
        ".set push                                          \n\t"
        ".set noreorder                                     \n\t"

-        "li.s   $f12,       32768                           \n\t"
        "1:                                                 \n\t"
        "lwc1   $f0,        40(%[fb])                       \n\t"
        "lwc1   $f1,        4(%[fb])                        \n\t"
@@ -203,14 +203,14 @@ static void psy_hp_filter_mips(const float *firbuf, float *hpfsmpl, const float
        "madd.s %[sum2],    %[sum2],    $f9,    %[coeff4]   \n\t"
        "madd.s %[sum4],    %[sum4],    $f6,    %[coeff4]   \n\t"
        "madd.s %[sum3],    %[sum3],    $f3,    %[coeff4]   \n\t"
-        "mul.s  %[sum1],    %[sum1],    $f12                \n\t"
-        "mul.s  %[sum2],    %[sum2],    $f12                \n\t"
+        "mul.s  %[sum1],    %[sum1],    %[f1]               \n\t"
+        "mul.s  %[sum2],    %[sum2],    %[f1]               \n\t"
        "madd.s %[sum4],    %[sum4],    $f11,   %[coeff4]   \n\t"
        "madd.s %[sum3],    %[sum3],    $f8,    %[coeff4]   \n\t"
        "swc1   %[sum1],    0(%[hp])                        \n\t"
        "swc1   %[sum2],    4(%[hp])                        \n\t"
-        "mul.s  %[sum4],    %[sum4],    $f12                \n\t"
-        "mul.s  %[sum3],    %[sum3],    $f12                \n\t"
+        "mul.s  %[sum4],    %[sum4],    %[f1]               \n\t"
+        "mul.s  %[sum3],    %[sum3],    %[f1]               \n\t"
        "swc1   %[sum4],    12(%[hp])                       \n\t"
        "swc1   %[sum3],    8(%[hp])                        \n\t"
        "bne    %[fb],      %[fb_end],  1b                  \n\t"
@@ -223,9 +223,9 @@ static void psy_hp_filter_mips(const float *firbuf, float *hpfsmpl, const float
          [fb]"+r"(fb), [hp]"+r"(hp)
        : [coeff0]"f"(coeff0), [coeff1]"f"(coeff1),
          [coeff2]"f"(coeff2), [coeff3]"f"(coeff3),
-          [coeff4]"f"(coeff4), [fb_end]"r"(fb_end)
+          [coeff4]"f"(coeff4), [fb_end]"r"(fb_end), [f1]"f"(f1)
        : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6",
-          "$f7", "$f8", "$f9", "$f10", "$f11", "$f12",
+          "$f7", "$f8", "$f9", "$f10", "$f11",
          "memory"
    );
 }

--- a/libavcodec/mips/fft_mips.c
+++ b/libavcodec/mips/fft_mips.c
@@ -71,6 +71,7 @@ static void ff_fft_calc_mips(FFTContext *s, FFTComplex *z)
    float temp, temp1, temp3, temp4;
    FFTComplex * tmpz_n2, * tmpz_n34, * tmpz_n4;
    FFTComplex * tmpz_n2_i, * tmpz_n34_i, * tmpz_n4_i, * tmpz_i;
+    float f1 = 0.7071067812;

    num_transforms = (21845 >> (17 - s->nbits)) | 1;

@@ -148,7 +149,6 @@ static void ff_fft_calc_mips(FFTContext *s, FFTComplex *z)
            "swc1  %[pom2], 4(%[tmpz])                      \n\t"  // tmpz[0].im = tmpz[0].im + tmp6;
            "lwc1  %[pom1], 16(%[tmpz])                     \n\t"
            "lwc1  %[pom3], 20(%[tmpz])                     \n\t"
-            "li.s  %[pom],  0.7071067812                    \n\t"  // float pom = 0.7071067812f;
            "add.s %[temp1],%[tmp1],    %[tmp2]             \n\t"
            "sub.s %[temp], %[pom1],    %[tmp8]             \n\t"
            "add.s %[pom2], %[pom3],    %[tmp7]             \n\t"
@@ -159,10 +159,10 @@ static void ff_fft_calc_mips(FFTContext *s, FFTComplex *z)
            "add.s %[pom1], %[pom1],    %[tmp8]             \n\t"
            "sub.s %[pom3], %[pom3],    %[tmp7]             \n\t"
            "add.s %[tmp3], %[tmp3],    %[tmp4]             \n\t"
-            "mul.s %[tmp5], %[pom],     %[temp1]            \n\t"  // tmp5 = pom * (tmp1 + tmp2);
-            "mul.s %[tmp7], %[pom],     %[temp3]            \n\t"  // tmp7 = pom * (tmp3 - tmp4);
-            "mul.s %[tmp6], %[pom],     %[temp4]            \n\t"  // tmp6 = pom * (tmp2 - tmp1);
-            "mul.s %[tmp8], %[pom],     %[tmp3]             \n\t"  // tmp8 = pom * (tmp3 + tmp4);
+            "mul.s %[tmp5], %[f1],      %[temp1]            \n\t"  // tmp5 = pom * (tmp1 + tmp2);
+            "mul.s %[tmp7], %[f1],      %[temp3]            \n\t"  // tmp7 = pom * (tmp3 - tmp4);
+            "mul.s %[tmp6], %[f1],      %[temp4]            \n\t"  // tmp6 = pom * (tmp2 - tmp1);
+            "mul.s %[tmp8], %[f1],      %[tmp3]             \n\t"  // tmp8 = pom * (tmp3 + tmp4);
            "swc1  %[pom1], 16(%[tmpz])                     \n\t"  // tmpz[2].re = tmpz[2].re + tmp8;
            "swc1  %[pom3], 20(%[tmpz])                     \n\t"  // tmpz[2].im = tmpz[2].im - tmp7;
            "add.s %[tmp1], %[tmp5],    %[tmp7]             \n\t"  // tmp1 = tmp5 + tmp7;
@@ -193,7 +193,7 @@ static void ff_fft_calc_mips(FFTContext *s, FFTComplex *z)
              [tmp3]"=&f"(tmp3), [tmp2]"=&f"(tmp2), [tmp4]"=&f"(tmp4), [tmp5]"=&f"(tmp5),  [tmp7]"=&f"(tmp7),
              [tmp6]"=&f"(tmp6), [tmp8]"=&f"(tmp8), [pom3]"=&f"(pom3),[temp]"=&f"(temp), [temp1]"=&f"(temp1),
              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4)
-            : [tmpz]"r"(tmpz)
+            : [tmpz]"r"(tmpz), [f1]"f"(f1)
            : "memory"
        );
    }

--- a/libavcodec/mips/mpegaudiodsp_mips_float.c
+++ b/libavcodec/mips/mpegaudiodsp_mips_float.c