diff --git a/compat/solaris/make_sunver.pl b/compat/solaris/make_sunver.pl
index 929bdda7b74384896ff06ad68099b04df4ba65d3..0e9ed1d351ec2ac65817609dd929100fb49e43ae 100755
--- a/compat/solaris/make_sunver.pl
+++ b/compat/solaris/make_sunver.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
 
 # make_sunver.pl
 #
diff --git a/libavcodec/huffyuvencdsp.c b/libavcodec/huffyuvencdsp.c
index 95fcc19582ddf7fb5dd79abf3883c3058e67b614..fdcd0b06aa703b8474e089883c38578b870a184a 100644
--- a/libavcodec/huffyuvencdsp.c
+++ b/libavcodec/huffyuvencdsp.c
@@ -25,7 +25,7 @@
 #define pb_7f (~0UL / 255 * 0x7f)
 #define pb_80 (~0UL / 255 * 0x80)
 
-static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w)
+static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, intptr_t w)
 {
     long i;
 
@@ -54,7 +54,7 @@ static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
 }
 
 static void sub_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1,
-                                   const uint8_t *src2, int w,
+                                   const uint8_t *src2, intptr_t w,
                                    int *left, int *left_top)
 {
     int i;
diff --git a/libavcodec/huffyuvencdsp.h b/libavcodec/huffyuvencdsp.h
index 3a49b4a7efa11855f96ea9a71ffb12c2f041ab66..9d090953747992414155224059ba204a8432d420 100644
--- a/libavcodec/huffyuvencdsp.h
+++ b/libavcodec/huffyuvencdsp.h
@@ -25,13 +25,13 @@ typedef struct HuffYUVEncDSPContext {
     void (*diff_bytes)(uint8_t *dst /* align 16 */,
                        const uint8_t *src1 /* align 16 */,
                        const uint8_t *src2 /* align 1 */,
-                       int w);
+                       intptr_t w);
     /**
      * Subtract HuffYUV's variant of median prediction.
      * Note, this might read from src1[-1], src2[-1].
      */
     void (*sub_hfyu_median_pred)(uint8_t *dst, const uint8_t *src1,
-                                 const uint8_t *src2, int w,
+                                 const uint8_t *src2, intptr_t w,
                                  int *left, int *left_top);
 } HuffYUVEncDSPContext;
 
diff --git a/libavcodec/mpegaudio_parser.c b/libavcodec/mpegaudio_parser.c
index 7849fed649b74c996326b15a78097591e9809a01..4c72131bd434fd9b55fb981a5768cf7332ad622d 100644
--- a/libavcodec/mpegaudio_parser.c
+++ b/libavcodec/mpegaudio_parser.c
@@ -35,7 +35,7 @@ typedef struct MpegAudioParseContext {
 
 #define MPA_HEADER_SIZE 4
 
-/* header + layer + bitrate + freq + lsf/mpeg25 */
+/* header + layer + freq + lsf/mpeg25 */
 #define SAME_HEADER_MASK \
    (0xffe00000 | (3 << 17) | (3 << 10) | (3 << 19))
 
diff --git a/libavcodec/vp9_parser.c b/libavcodec/vp9_parser.c
index f1f7e350d26b14192219ead64dd7c7664c149c70..0437097391d5bfbff80823289fe3ab1463d42b62 100644
--- a/libavcodec/vp9_parser.c
+++ b/libavcodec/vp9_parser.c
@@ -111,12 +111,12 @@ static int parse(AVCodecParserContext *ctx,
                 while (n_frames--) { \
                     unsigned sz = rd; \
                     idx += a; \
-                    if (sz > size) { \
+                    if (sz == 0 || sz > size) { \
                         s->n_frames = 0; \
                         *out_size = size; \
                         *out_data = data; \
                         av_log(avctx, AV_LOG_ERROR, \
-                               "Superframe packet size too big: %u > %d\n", \
+                               "Invalid superframe packet size: %u frame size: %d\n", \
                                sz, size); \
                         return full_size; \
                     } \
diff --git a/libavcodec/x86/fpel.h b/libavcodec/x86/fpel.h
index 4d93959a96107a92c46ad84a6c4c8bacb37d430a..4e83cf71c30e5804353440fd7c6fe00162679efa 100644
--- a/libavcodec/x86/fpel.h
+++ b/libavcodec/x86/fpel.h
@@ -22,6 +22,10 @@
 #include <stddef.h>
 #include <stdint.h>
 
+void ff_avg_pixels4_mmx(uint8_t *block, const uint8_t *pixels,
+                        ptrdiff_t line_size, int h);
+void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int h);
 void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
                         ptrdiff_t line_size, int h);
 void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
@@ -32,10 +36,10 @@ void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
                             ptrdiff_t line_size, int h);
 void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
                           ptrdiff_t line_size, int h);
+void ff_put_pixels4_mmx(uint8_t *block, const uint8_t *pixels,
+                        ptrdiff_t line_size, int h);
 void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
                         ptrdiff_t line_size, int h);
-void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
-                           ptrdiff_t line_size, int h);
 void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
                          ptrdiff_t line_size, int h);
 void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c
index d9cb5f264cf170d913cfb2e4444fd5b5b6075db0..d759e888a591094ba055ab02fe82b3916fecce11 100644
--- a/libavcodec/x86/h264_qpel.c
+++ b/libavcodec/x86/h264_qpel.c
@@ -29,10 +29,6 @@
 #include "fpel.h"
 
 #if HAVE_YASM
-void ff_put_pixels4_mmx(uint8_t *block, const uint8_t *pixels,
-                        ptrdiff_t line_size, int h);
-void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels,
-                           ptrdiff_t line_size, int h);
 void ff_put_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
                               int dstStride, int src1Stride, int h);
 void ff_avg_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
diff --git a/libavcodec/x86/huffyuvencdsp_mmx.c b/libavcodec/x86/huffyuvencdsp_mmx.c
index 63d8e3cc732b04b81baa801f383c31721067c392..ee60f4c5739d06a27d574f7fd9a2950bbc667eec 100644
--- a/libavcodec/x86/huffyuvencdsp_mmx.c
+++ b/libavcodec/x86/huffyuvencdsp_mmx.c
@@ -31,7 +31,8 @@
 
 #if HAVE_INLINE_ASM
 
-static void diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w)
+static void diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
+                           intptr_t w)
 {
     x86_reg i = 0;
 
@@ -57,7 +58,7 @@ static void diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src
 }
 
 static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
-                                        const uint8_t *src2, int w,
+                                        const uint8_t *src2, intptr_t w,
                                         int *left, int *left_top)
 {
     x86_reg i = 0;
diff --git a/libavcodec/x86/vc1dsp_init.c b/libavcodec/x86/vc1dsp_init.c
index 2bef5f5fb59341549d42a7a39b4f5b2c9561c7c0..8c5fc209849592b8bbf431986d7fd01a4ca4131b 100644
--- a/libavcodec/x86/vc1dsp_init.c
+++ b/libavcodec/x86/vc1dsp_init.c
@@ -63,16 +63,22 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq)
     ff_vc1_h_loop_filter8_sse4(src,          stride, pq);
     ff_vc1_h_loop_filter8_sse4(src+8*stride, stride, pq);
 }
-static void avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src,
-                                      ptrdiff_t stride, int rnd)
-{
-    ff_avg_pixels8_mmxext(dst, src, stride, 8);
-}
-static void avg_vc1_mspel_mc00_16_sse2(uint8_t *dst, const uint8_t *src,
-                                       ptrdiff_t stride, int rnd)
-{
-    ff_avg_pixels16_sse2(dst, src, stride, 16);
-}
+
+#define DECLARE_FUNCTION(OP, DEPTH, INSN)                       \
+    static void OP##vc1_mspel_mc00_##DEPTH##INSN(uint8_t *dst,          \
+                             const uint8_t *src, ptrdiff_t stride, int rnd) \
+    {                                                                       \
+        ff_ ## OP ## pixels ## DEPTH ## INSN(dst, src, stride, DEPTH);     \
+    }
+
+DECLARE_FUNCTION(put_,  8, _mmx)
+DECLARE_FUNCTION(put_, 16, _mmx)
+DECLARE_FUNCTION(avg_,  8, _mmx)
+DECLARE_FUNCTION(avg_, 16, _mmx)
+DECLARE_FUNCTION(avg_,  8, _mmxext)
+DECLARE_FUNCTION(avg_, 16, _mmxext)
+DECLARE_FUNCTION(put_, 16, _sse2)
+DECLARE_FUNCTION(avg_, 16, _sse2)
 
 #endif /* HAVE_YASM */
 
@@ -109,6 +115,11 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
 #if HAVE_YASM
     if (EXTERNAL_MMX(cpu_flags)) {
         dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx;
+
+        dsp->put_vc1_mspel_pixels_tab[1][0]      = put_vc1_mspel_mc00_8_mmx;
+        dsp->put_vc1_mspel_pixels_tab[0][0]      = put_vc1_mspel_mc00_16_mmx;
+        dsp->avg_vc1_mspel_pixels_tab[1][0]      = avg_vc1_mspel_mc00_8_mmx;
+        dsp->avg_vc1_mspel_pixels_tab[0][0]      = avg_vc1_mspel_mc00_16_mmx;
     }
     if (EXTERNAL_AMD3DNOW(cpu_flags)) {
         dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow;
@@ -117,13 +128,16 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
         ASSIGN_LF(mmxext);
         dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext;
 
-        dsp->avg_vc1_mspel_pixels_tab[1][0]      = avg_vc1_mspel_mc00_mmxext;
+        dsp->avg_vc1_mspel_pixels_tab[1][0]      = avg_vc1_mspel_mc00_8_mmxext;
+        dsp->avg_vc1_mspel_pixels_tab[0][0]      = avg_vc1_mspel_mc00_16_mmxext;
     }
     if (EXTERNAL_SSE2(cpu_flags)) {
         dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_sse2;
         dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse2;
         dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2;
         dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2;
+
+        dsp->put_vc1_mspel_pixels_tab[0][0]      = put_vc1_mspel_mc00_16_sse2;
         dsp->avg_vc1_mspel_pixels_tab[0][0]      = avg_vc1_mspel_mc00_16_sse2;
     }
     if (EXTERNAL_SSSE3(cpu_flags)) {
diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c
index a7eb59df479adffc153b9c2a3a2d2431068bd3a0..e42099b46c292d0998e8bf6641de24e926012621 100644
--- a/libavcodec/x86/vc1dsp_mmx.c
+++ b/libavcodec/x86/vc1dsp_mmx.c
@@ -728,39 +728,12 @@ static void vc1_inv_trans_8x8_dc_mmxext(uint8_t *dest, int linesize,
     );
 }
 
-#if HAVE_MMX_EXTERNAL
-static void put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src,
-                                   ptrdiff_t stride, int rnd)
-{
-    ff_put_pixels8_mmx(dst, src, stride, 8);
-}
-static void put_vc1_mspel_mc00_16_mmx(uint8_t *dst, const uint8_t *src,
-                                      ptrdiff_t stride, int rnd)
-{
-    ff_put_pixels16_mmx(dst, src, stride, 16);
-}
-static void avg_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src,
-                                   ptrdiff_t stride, int rnd)
-{
-    ff_avg_pixels8_mmx(dst, src, stride, 8);
-}
-static void avg_vc1_mspel_mc00_16_mmx(uint8_t *dst, const uint8_t *src,
-                                      ptrdiff_t stride, int rnd)
-{
-    ff_avg_pixels16_mmx(dst, src, stride, 16);
-}
-#endif
-
 #define FN_ASSIGN(OP, X, Y, INSN) \
     dsp->OP##vc1_mspel_pixels_tab[1][X+4*Y] = OP##vc1_mspel_mc##X##Y##INSN; \
     dsp->OP##vc1_mspel_pixels_tab[0][X+4*Y] = OP##vc1_mspel_mc##X##Y##_16##INSN
 
 av_cold void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
 {
-#if HAVE_MMX_EXTERNAL
-    FN_ASSIGN(put_, 0, 0, _mmx);
-    FN_ASSIGN(avg_, 0, 0, _mmx);
-#endif
     FN_ASSIGN(put_, 0, 1, _mmx);
     FN_ASSIGN(put_, 0, 2, _mmx);
     FN_ASSIGN(put_, 0, 3, _mmx);
diff --git a/libavutil/intmath.h b/libavutil/intmath.h
index 5a551237372500b07b3e70ca0a6b962de2fd0b4b..5a10d025dba05f6344b42174cb28a3607cb7b8ca 100644
--- a/libavutil/intmath.h
+++ b/libavutil/intmath.h
@@ -34,14 +34,7 @@
 #endif
 
 #if HAVE_FAST_CLZ
-#if defined( __INTEL_COMPILER )
-#ifndef ff_log2
-#   define ff_log2(x) (_bit_scan_reverse((x)|1))
-#   ifndef ff_log2_16bit
-#      define ff_log2_16bit av_log2
-#   endif
-#endif /* ff_log2 */
-#elif AV_GCC_VERSION_AT_LEAST(3,4)
+#if AV_GCC_VERSION_AT_LEAST(3,4)
 #ifndef ff_log2
 #   define ff_log2(x) (31 - __builtin_clz((x)|1))
 #   ifndef ff_log2_16bit
@@ -55,7 +48,6 @@ extern const uint8_t ff_log2_tab[256];
 
 #ifndef ff_log2
 #define ff_log2 ff_log2_c
-#if !defined( _MSC_VER )
 static av_always_inline av_const int ff_log2_c(unsigned int v)
 {
     int n = 0;
@@ -71,15 +63,6 @@ static av_always_inline av_const int ff_log2_c(unsigned int v)
 
     return n;
 }
-#else
-static av_always_inline av_const int ff_log2_c(unsigned int v)
-{
-    unsigned long n;
-    _BitScanReverse(&n, v|1);
-    return n;
-}
-#define ff_log2_16bit av_log2
-#endif
 #endif
 
 #ifndef ff_log2_16bit
@@ -106,11 +89,7 @@ static av_always_inline av_const int ff_log2_16bit_c(unsigned int v)
  */
 
 #if HAVE_FAST_CLZ
-#if defined( __INTEL_COMPILER )
-#ifndef ff_ctz
-#define ff_ctz(v) _bit_scan_forward(v)
-#endif
-#elif AV_GCC_VERSION_AT_LEAST(3,4)
+#if AV_GCC_VERSION_AT_LEAST(3,4)
 #ifndef ff_ctz
 #define ff_ctz(v) __builtin_ctz(v)
 #endif
@@ -128,7 +107,6 @@ static av_always_inline av_const int ff_log2_16bit_c(unsigned int v)
  * @param v  input value. If v is 0, the result is undefined.
  * @return   the number of trailing 0-bits
  */
-#if !defined( _MSC_VER )
 /* We use the De-Bruijn method outlined in:
  * http://supertech.csail.mit.edu/papers/debruijn.pdf. */
 static av_always_inline av_const int ff_ctz_c(int v)
@@ -139,14 +117,6 @@ static av_always_inline av_const int ff_ctz_c(int v)
     };
     return debruijn_ctz32[(uint32_t)((v & -v) * 0x077CB531U) >> 27];
 }
-#else
-static av_always_inline av_const int ff_ctz_c( int v )
-{
-    unsigned long c;
-    _BitScanForward(&c, v);
-    return c;
-}
-#endif
 #endif
 
 #ifndef ff_ctzll
diff --git a/libavutil/x86/intmath.h b/libavutil/x86/intmath.h
index fefad205e630a84944b545ebb1be3f37905c462c..7881e3c7b6399d103f6c1780cf971521e9760be4 100644
--- a/libavutil/x86/intmath.h
+++ b/libavutil/x86/intmath.h
@@ -24,6 +24,61 @@
 #include <stdint.h>
 #include "config.h"
 
+#if HAVE_FAST_CLZ
+#if defined(__INTEL_COMPILER)
+#   define ff_log2(x) (_bit_scan_reverse((x)|1))
+#   define ff_log2_16bit av_log2
+
+#   define ff_ctz(v) _bit_scan_forward(v)
+
+#   define ff_ctzll ff_ctzll_x86
+static av_always_inline av_const int ff_ctzll_x86(long long v)
+{
+#   if ARCH_X86_64
+    uint64_t c;
+    __asm__("bsfq %1,%0" : "=r" (c) : "r" (v));
+    return c;
+#   else
+    return ((uint32_t)v == 0) ? _bit_scan_forward((uint32_t)(v >> 32)) + 32 : _bit_scan_forward((uint32_t)v);
+#   endif
+}
+#elif defined(_MSC_VER)
+#   define ff_log2 ff_log2_x86
+static av_always_inline av_const int ff_log2_x86(unsigned int v) {
+    unsigned long n;
+    _BitScanReverse(&n, v | 1);
+    return n;
+}
+#   define ff_log2_16bit av_log2
+
+#   define ff_ctz ff_ctz_x86
+static av_always_inline av_const int ff_ctz_x86(int v) {
+    unsigned long c;
+    _BitScanForward(&c, v);
+    return c;
+}
+
+#   define ff_ctzll ff_ctzll_x86
+static av_always_inline av_const int ff_ctzll_x86(long long v)
+{
+    unsigned long c;
+#   if ARCH_X86_64
+    _BitScanForward64(&c, v);
+#   else
+    if ((uint32_t)v == 0) {
+        _BitScanForward(&c, (uint32_t)(v >> 32));
+        c += 32;
+    } else {
+        _BitScanForward(&c, (uint32_t)v);
+    }
+#   endif
+    return c;
+}
+
+#endif /* __INTEL_COMPILER */
+
+#endif /* HAVE_FAST_CLZ */
+
 #if defined(__GNUC__)
 
 /* Our generic version of av_popcount is faster than GCC's built-in on