diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index 809c5d137815206432a67d8c769b3678f68dc8e7..ba59f3314f5cf9a8ed22450c957dec3fa0385cf1 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -54,26 +54,24 @@ cglobal conv_fltp_to_flt_6ch, 2,8,7, dst, src, src1, src2, src3, src4, src5, len mova m3, [srcq+src3q] mova m4, [srcq+src4q] mova m5, [srcq+src5q] -%if cpuflag(sse) +%if cpuflag(sse4) SBUTTERFLYPS 0, 1, 6 SBUTTERFLYPS 2, 3, 6 SBUTTERFLYPS 4, 5, 6 - movaps m6, m4 - shufps m4, m0, q3210 + blendps m6, m4, m0, 1100b movlhps m0, m2 - movhlps m6, m2 - movaps [dstq ], m0 - movaps [dstq+16], m4 - movaps [dstq+32], m6 - - movaps m6, m5 - shufps m5, m1, q3210 + movhlps m4, m2 + blendps m2, m5, m1, 1100b movlhps m1, m3 - movhlps m6, m3 + movhlps m5, m3 + + movaps [dstq ], m0 + movaps [dstq+16], m6 + movaps [dstq+32], m4 movaps [dstq+48], m1 - movaps [dstq+64], m5 - movaps [dstq+80], m6 + movaps [dstq+64], m2 + movaps [dstq+80], m5 %else ; mmx SBUTTERFLY dq, 0, 1, 6 SBUTTERFLY dq, 2, 3, 6 @@ -100,5 +98,9 @@ cglobal conv_fltp_to_flt_6ch, 2,8,7, dst, src, src1, src2, src3, src4, src5, len INIT_MMX mmx CONV_FLTP_TO_FLT_6CH -INIT_XMM sse +INIT_XMM sse4 +CONV_FLTP_TO_FLT_6CH +%if HAVE_AVX +INIT_XMM avx CONV_FLTP_TO_FLT_6CH +%endif diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index 6883f10a21d5f27452e84f1af2a8e569b7a230e6..206aede751cd7d924c3559c27b4864738fb7b318 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -22,8 +22,9 @@ #include "libavutil/cpu.h" #include "libavresample/audio_convert.h" -extern void ff_conv_fltp_to_flt_6ch_mmx(float *dst, float *const *src, int len); -extern void ff_conv_fltp_to_flt_6ch_sse(float *dst, float *const *src, int len); +extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int len); +extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int len); +extern void ff_conv_fltp_to_flt_6ch_avx (float *dst, float *const *src, int len); av_cold void ff_audio_convert_init_x86(AudioConvert *ac) { @@ -34,9 +35,13 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, 6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx); } - if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { + if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, - 6, 16, 4, "SSE", ff_conv_fltp_to_flt_6ch_sse); + 6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4); + } + if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { + ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, + 6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx); } #endif } diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm index 55f4a936e2a868fe9a8699f4d1cc40323fa2858c..508f24e2b5bb84b5425c4e74e7be9772f767cdd0 100644 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@ -42,10 +42,9 @@ %endmacro %macro SBUTTERFLYPS 3 - movaps m%3, m%1 - unpcklps m%1, m%2 - unpckhps m%3, m%2 - SWAP %2, %3 + unpcklps m%3, m%1, m%2 + unpckhps m%1, m%1, m%2 + SWAP %1, %3, %2 %endmacro %macro TRANSPOSE4x4B 5