提交 2354799f 编写于 作者: M Matt Oliver

Merge remote-tracking branch 'upstream/master'

#!/usr/bin/perl -w #!/usr/bin/env perl
# make_sunver.pl # make_sunver.pl
# #
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#define pb_7f (~0UL / 255 * 0x7f) #define pb_7f (~0UL / 255 * 0x7f)
#define pb_80 (~0UL / 255 * 0x80) #define pb_80 (~0UL / 255 * 0x80)
static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w) static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, intptr_t w)
{ {
long i; long i;
...@@ -54,7 +54,7 @@ static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ...@@ -54,7 +54,7 @@ static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
} }
static void sub_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1, static void sub_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1,
const uint8_t *src2, int w, const uint8_t *src2, intptr_t w,
int *left, int *left_top) int *left, int *left_top)
{ {
int i; int i;
......
...@@ -25,13 +25,13 @@ typedef struct HuffYUVEncDSPContext { ...@@ -25,13 +25,13 @@ typedef struct HuffYUVEncDSPContext {
void (*diff_bytes)(uint8_t *dst /* align 16 */, void (*diff_bytes)(uint8_t *dst /* align 16 */,
const uint8_t *src1 /* align 16 */, const uint8_t *src1 /* align 16 */,
const uint8_t *src2 /* align 1 */, const uint8_t *src2 /* align 1 */,
int w); intptr_t w);
/** /**
* Subtract HuffYUV's variant of median prediction. * Subtract HuffYUV's variant of median prediction.
* Note, this might read from src1[-1], src2[-1]. * Note, this might read from src1[-1], src2[-1].
*/ */
void (*sub_hfyu_median_pred)(uint8_t *dst, const uint8_t *src1, void (*sub_hfyu_median_pred)(uint8_t *dst, const uint8_t *src1,
const uint8_t *src2, int w, const uint8_t *src2, intptr_t w,
int *left, int *left_top); int *left, int *left_top);
} HuffYUVEncDSPContext; } HuffYUVEncDSPContext;
......
...@@ -35,7 +35,7 @@ typedef struct MpegAudioParseContext { ...@@ -35,7 +35,7 @@ typedef struct MpegAudioParseContext {
#define MPA_HEADER_SIZE 4 #define MPA_HEADER_SIZE 4
/* header + layer + bitrate + freq + lsf/mpeg25 */ /* header + layer + freq + lsf/mpeg25 */
#define SAME_HEADER_MASK \ #define SAME_HEADER_MASK \
(0xffe00000 | (3 << 17) | (3 << 10) | (3 << 19)) (0xffe00000 | (3 << 17) | (3 << 10) | (3 << 19))
......
...@@ -111,12 +111,12 @@ static int parse(AVCodecParserContext *ctx, ...@@ -111,12 +111,12 @@ static int parse(AVCodecParserContext *ctx,
while (n_frames--) { \ while (n_frames--) { \
unsigned sz = rd; \ unsigned sz = rd; \
idx += a; \ idx += a; \
if (sz > size) { \ if (sz == 0 || sz > size) { \
s->n_frames = 0; \ s->n_frames = 0; \
*out_size = size; \ *out_size = size; \
*out_data = data; \ *out_data = data; \
av_log(avctx, AV_LOG_ERROR, \ av_log(avctx, AV_LOG_ERROR, \
"Superframe packet size too big: %u > %d\n", \ "Invalid superframe packet size: %u frame size: %d\n", \
sz, size); \ sz, size); \
return full_size; \ return full_size; \
} \ } \
......
...@@ -22,6 +22,10 @@ ...@@ -22,6 +22,10 @@
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
void ff_avg_pixels4_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
...@@ -32,9 +36,9 @@ void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels, ...@@ -32,9 +36,9 @@ void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, void ff_put_pixels4_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
......
...@@ -29,10 +29,6 @@ ...@@ -29,10 +29,6 @@
#include "fpel.h" #include "fpel.h"
#if HAVE_YASM #if HAVE_YASM
void ff_put_pixels4_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, void ff_put_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
int dstStride, int src1Stride, int h); int dstStride, int src1Stride, int h);
void ff_avg_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, void ff_avg_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
......
...@@ -31,7 +31,8 @@ ...@@ -31,7 +31,8 @@
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
static void diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w) static void diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
intptr_t w)
{ {
x86_reg i = 0; x86_reg i = 0;
...@@ -57,7 +58,7 @@ static void diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src ...@@ -57,7 +58,7 @@ static void diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src
} }
static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1, static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
const uint8_t *src2, int w, const uint8_t *src2, intptr_t w,
int *left, int *left_top) int *left, int *left_top)
{ {
x86_reg i = 0; x86_reg i = 0;
......
...@@ -63,16 +63,22 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq) ...@@ -63,16 +63,22 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq)
ff_vc1_h_loop_filter8_sse4(src, stride, pq); ff_vc1_h_loop_filter8_sse4(src, stride, pq);
ff_vc1_h_loop_filter8_sse4(src+8*stride, stride, pq); ff_vc1_h_loop_filter8_sse4(src+8*stride, stride, pq);
} }
static void avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd) #define DECLARE_FUNCTION(OP, DEPTH, INSN) \
{ static void OP##vc1_mspel_mc00_##DEPTH##INSN(uint8_t *dst, \
ff_avg_pixels8_mmxext(dst, src, stride, 8); const uint8_t *src, ptrdiff_t stride, int rnd) \
} { \
static void avg_vc1_mspel_mc00_16_sse2(uint8_t *dst, const uint8_t *src, ff_ ## OP ## pixels ## DEPTH ## INSN(dst, src, stride, DEPTH); \
ptrdiff_t stride, int rnd) }
{
ff_avg_pixels16_sse2(dst, src, stride, 16); DECLARE_FUNCTION(put_, 8, _mmx)
} DECLARE_FUNCTION(put_, 16, _mmx)
DECLARE_FUNCTION(avg_, 8, _mmx)
DECLARE_FUNCTION(avg_, 16, _mmx)
DECLARE_FUNCTION(avg_, 8, _mmxext)
DECLARE_FUNCTION(avg_, 16, _mmxext)
DECLARE_FUNCTION(put_, 16, _sse2)
DECLARE_FUNCTION(avg_, 16, _sse2)
#endif /* HAVE_YASM */ #endif /* HAVE_YASM */
...@@ -109,6 +115,11 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) ...@@ -109,6 +115,11 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
#if HAVE_YASM #if HAVE_YASM
if (EXTERNAL_MMX(cpu_flags)) { if (EXTERNAL_MMX(cpu_flags)) {
dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx; dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx;
dsp->put_vc1_mspel_pixels_tab[1][0] = put_vc1_mspel_mc00_8_mmx;
dsp->put_vc1_mspel_pixels_tab[0][0] = put_vc1_mspel_mc00_16_mmx;
dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_vc1_mspel_mc00_8_mmx;
dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_vc1_mspel_mc00_16_mmx;
} }
if (EXTERNAL_AMD3DNOW(cpu_flags)) { if (EXTERNAL_AMD3DNOW(cpu_flags)) {
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow; dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow;
...@@ -117,13 +128,16 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) ...@@ -117,13 +128,16 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
ASSIGN_LF(mmxext); ASSIGN_LF(mmxext);
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext; dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext;
dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_vc1_mspel_mc00_mmxext; dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_vc1_mspel_mc00_8_mmxext;
dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_vc1_mspel_mc00_16_mmxext;
} }
if (EXTERNAL_SSE2(cpu_flags)) { if (EXTERNAL_SSE2(cpu_flags)) {
dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_sse2; dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_sse2;
dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse2; dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse2;
dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2; dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2;
dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2; dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2;
dsp->put_vc1_mspel_pixels_tab[0][0] = put_vc1_mspel_mc00_16_sse2;
dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_vc1_mspel_mc00_16_sse2; dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_vc1_mspel_mc00_16_sse2;
} }
if (EXTERNAL_SSSE3(cpu_flags)) { if (EXTERNAL_SSSE3(cpu_flags)) {
......
...@@ -728,39 +728,12 @@ static void vc1_inv_trans_8x8_dc_mmxext(uint8_t *dest, int linesize, ...@@ -728,39 +728,12 @@ static void vc1_inv_trans_8x8_dc_mmxext(uint8_t *dest, int linesize,
); );
} }
#if HAVE_MMX_EXTERNAL
static void put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd)
{
ff_put_pixels8_mmx(dst, src, stride, 8);
}
static void put_vc1_mspel_mc00_16_mmx(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd)
{
ff_put_pixels16_mmx(dst, src, stride, 16);
}
static void avg_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd)
{
ff_avg_pixels8_mmx(dst, src, stride, 8);
}
static void avg_vc1_mspel_mc00_16_mmx(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd)
{
ff_avg_pixels16_mmx(dst, src, stride, 16);
}
#endif
#define FN_ASSIGN(OP, X, Y, INSN) \ #define FN_ASSIGN(OP, X, Y, INSN) \
dsp->OP##vc1_mspel_pixels_tab[1][X+4*Y] = OP##vc1_mspel_mc##X##Y##INSN; \ dsp->OP##vc1_mspel_pixels_tab[1][X+4*Y] = OP##vc1_mspel_mc##X##Y##INSN; \
dsp->OP##vc1_mspel_pixels_tab[0][X+4*Y] = OP##vc1_mspel_mc##X##Y##_16##INSN dsp->OP##vc1_mspel_pixels_tab[0][X+4*Y] = OP##vc1_mspel_mc##X##Y##_16##INSN
av_cold void ff_vc1dsp_init_mmx(VC1DSPContext *dsp) av_cold void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
{ {
#if HAVE_MMX_EXTERNAL
FN_ASSIGN(put_, 0, 0, _mmx);
FN_ASSIGN(avg_, 0, 0, _mmx);
#endif
FN_ASSIGN(put_, 0, 1, _mmx); FN_ASSIGN(put_, 0, 1, _mmx);
FN_ASSIGN(put_, 0, 2, _mmx); FN_ASSIGN(put_, 0, 2, _mmx);
FN_ASSIGN(put_, 0, 3, _mmx); FN_ASSIGN(put_, 0, 3, _mmx);
......
...@@ -34,14 +34,7 @@ ...@@ -34,14 +34,7 @@
#endif #endif
#if HAVE_FAST_CLZ #if HAVE_FAST_CLZ
#if defined( __INTEL_COMPILER ) #if AV_GCC_VERSION_AT_LEAST(3,4)
#ifndef ff_log2
# define ff_log2(x) (_bit_scan_reverse((x)|1))
# ifndef ff_log2_16bit
# define ff_log2_16bit av_log2
# endif
#endif /* ff_log2 */
#elif AV_GCC_VERSION_AT_LEAST(3,4)
#ifndef ff_log2 #ifndef ff_log2
# define ff_log2(x) (31 - __builtin_clz((x)|1)) # define ff_log2(x) (31 - __builtin_clz((x)|1))
# ifndef ff_log2_16bit # ifndef ff_log2_16bit
...@@ -55,7 +48,6 @@ extern const uint8_t ff_log2_tab[256]; ...@@ -55,7 +48,6 @@ extern const uint8_t ff_log2_tab[256];
#ifndef ff_log2 #ifndef ff_log2
#define ff_log2 ff_log2_c #define ff_log2 ff_log2_c
#if !defined( _MSC_VER )
static av_always_inline av_const int ff_log2_c(unsigned int v) static av_always_inline av_const int ff_log2_c(unsigned int v)
{ {
int n = 0; int n = 0;
...@@ -71,15 +63,6 @@ static av_always_inline av_const int ff_log2_c(unsigned int v) ...@@ -71,15 +63,6 @@ static av_always_inline av_const int ff_log2_c(unsigned int v)
return n; return n;
} }
#else
static av_always_inline av_const int ff_log2_c(unsigned int v)
{
unsigned long n;
_BitScanReverse(&n, v|1);
return n;
}
#define ff_log2_16bit av_log2
#endif
#endif #endif
#ifndef ff_log2_16bit #ifndef ff_log2_16bit
...@@ -106,11 +89,7 @@ static av_always_inline av_const int ff_log2_16bit_c(unsigned int v) ...@@ -106,11 +89,7 @@ static av_always_inline av_const int ff_log2_16bit_c(unsigned int v)
*/ */
#if HAVE_FAST_CLZ #if HAVE_FAST_CLZ
#if defined( __INTEL_COMPILER ) #if AV_GCC_VERSION_AT_LEAST(3,4)
#ifndef ff_ctz
#define ff_ctz(v) _bit_scan_forward(v)
#endif
#elif AV_GCC_VERSION_AT_LEAST(3,4)
#ifndef ff_ctz #ifndef ff_ctz
#define ff_ctz(v) __builtin_ctz(v) #define ff_ctz(v) __builtin_ctz(v)
#endif #endif
...@@ -128,7 +107,6 @@ static av_always_inline av_const int ff_log2_16bit_c(unsigned int v) ...@@ -128,7 +107,6 @@ static av_always_inline av_const int ff_log2_16bit_c(unsigned int v)
* @param v input value. If v is 0, the result is undefined. * @param v input value. If v is 0, the result is undefined.
* @return the number of trailing 0-bits * @return the number of trailing 0-bits
*/ */
#if !defined( _MSC_VER )
/* We use the De-Bruijn method outlined in: /* We use the De-Bruijn method outlined in:
* http://supertech.csail.mit.edu/papers/debruijn.pdf. */ * http://supertech.csail.mit.edu/papers/debruijn.pdf. */
static av_always_inline av_const int ff_ctz_c(int v) static av_always_inline av_const int ff_ctz_c(int v)
...@@ -139,14 +117,6 @@ static av_always_inline av_const int ff_ctz_c(int v) ...@@ -139,14 +117,6 @@ static av_always_inline av_const int ff_ctz_c(int v)
}; };
return debruijn_ctz32[(uint32_t)((v & -v) * 0x077CB531U) >> 27]; return debruijn_ctz32[(uint32_t)((v & -v) * 0x077CB531U) >> 27];
} }
#else
static av_always_inline av_const int ff_ctz_c( int v )
{
unsigned long c;
_BitScanForward(&c, v);
return c;
}
#endif
#endif #endif
#ifndef ff_ctzll #ifndef ff_ctzll
......
...@@ -24,6 +24,61 @@ ...@@ -24,6 +24,61 @@
#include <stdint.h> #include <stdint.h>
#include "config.h" #include "config.h"
#if HAVE_FAST_CLZ
#if defined(__INTEL_COMPILER)
# define ff_log2(x) (_bit_scan_reverse((x)|1))
# define ff_log2_16bit av_log2
# define ff_ctz(v) _bit_scan_forward(v)
# define ff_ctzll ff_ctzll_x86
static av_always_inline av_const int ff_ctzll_x86(long long v)
{
# if ARCH_X86_64
uint64_t c;
__asm__("bsfq %1,%0" : "=r" (c) : "r" (v));
return c;
# else
return ((uint32_t)v == 0) ? _bit_scan_forward((uint32_t)(v >> 32)) + 32 : _bit_scan_forward((uint32_t)v);
# endif
}
#elif defined(_MSC_VER)
# define ff_log2 ff_log2_x86
static av_always_inline av_const int ff_log2_x86(unsigned int v) {
unsigned long n;
_BitScanReverse(&n, v | 1);
return n;
}
# define ff_log2_16bit av_log2
# define ff_ctz ff_ctz_x86
static av_always_inline av_const int ff_ctz_x86(int v) {
unsigned long c;
_BitScanForward(&c, v);
return c;
}
# define ff_ctzll ff_ctzll_x86
static av_always_inline av_const int ff_ctzll_x86(long long v)
{
unsigned long c;
# if ARCH_X86_64
_BitScanForward64(&c, v);
# else
if ((uint32_t)v == 0) {
_BitScanForward(&c, (uint32_t)(v >> 32));
c += 32;
} else {
_BitScanForward(&c, (uint32_t)v);
}
# endif
return c;
}
#endif /* __INTEL_COMPILER */
#endif /* HAVE_FAST_CLZ */
#if defined(__GNUC__) #if defined(__GNUC__)
/* Our generic version of av_popcount is faster than GCC's built-in on /* Our generic version of av_popcount is faster than GCC's built-in on
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册