提交 fa9b873e 编写于 作者: L Loren Merritt

clean up an ugliness introduced in r11826. this syntax will require fewer...

clean up an ugliness introduced in r11826. this syntax will require fewer changes when adding future sse2 code.

Originally committed as revision 11868 to svn://svn.ffmpeg.org/ffmpeg/trunk
上级 37b74c86
......@@ -98,7 +98,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
}
/* general case, bilinear */
rnd_reg = rnd ? ff_pw_32 : &ff_pw_28;
rnd_reg = rnd ? &ff_pw_32.a : &ff_pw_28;
asm volatile("movd %2, %%mm4\n\t"
"movd %3, %%mm6\n\t"
"punpcklwd %%mm4, %%mm4\n\t"
......@@ -250,7 +250,7 @@ static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*
"sub $2, %2 \n\t"
"jnz 1b \n\t"
: "+r"(dst), "+r"(src), "+r"(h)
: "r"((long)stride), "m"(*ff_pw_32), "m"(x), "m"(y)
: "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y)
);
}
......@@ -301,7 +301,7 @@ static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*
"sub $1, %2\n\t"
"jnz 1b\n\t"
: "+r" (dst), "+r"(src), "+r"(h)
: "m" (*ff_pw_32), "r"((long)stride)
: "m" (ff_pw_32), "r"((long)stride)
: "%esi");
}
......
......@@ -54,7 +54,7 @@ DECLARE_ALIGNED_8 (const uint64_t, ff_pw_8 ) = 0x0008000800080008ULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_16 ) = 0x0010001000100010ULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;
DECLARE_ALIGNED_16(const uint64_t, ff_pw_32[2]) = {0x0020002000200020ULL, 0x0020002000200020ULL};
DECLARE_ALIGNED_16(const xmm_t, ff_pw_32 ) = {0x0020002000200020ULL, 0x0020002000200020ULL};
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_64 ) = 0x0040004000400040ULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;
......
......@@ -24,6 +24,8 @@
#include <stdint.h>
typedef struct { uint64_t a, b; } xmm_t;
extern const uint64_t ff_bone;
extern const uint64_t ff_wtwo;
......@@ -36,7 +38,7 @@ extern const uint64_t ff_pw_8;
extern const uint64_t ff_pw_15;
extern const uint64_t ff_pw_16;
extern const uint64_t ff_pw_20;
extern const uint64_t ff_pw_32[2];
extern const xmm_t ff_pw_32;
extern const uint64_t ff_pw_42;
extern const uint64_t ff_pw_64;
extern const uint64_t ff_pw_96;
......
......@@ -75,7 +75,7 @@ static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride)
IDCT4_1D( %%mm4, %%mm2, %%mm3, %%mm0, %%mm1 )
"pxor %%mm7, %%mm7 \n\t"
:: "m"(*ff_pw_32));
:: "m"(ff_pw_32));
asm volatile(
STORE_DIFF_4P( %%mm0, %%mm1, %%mm7)
......@@ -294,7 +294,7 @@ static void ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride)
STORE_DIFF_8P(%%xmm0, (%0,%2,2), %%xmm6, %%xmm7)
STORE_DIFF_8P(%%xmm1, (%0,%3), %%xmm6, %%xmm7)
:"+r"(dst)
:"r"(block), "r"((long)stride), "r"(3L*stride), "m"(*ff_pw_32)
:"r"(block), "r"((long)stride), "r"(3L*stride), "m"(ff_pw_32)
);
}
......@@ -926,7 +926,7 @@ static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, in
"decl %2 \n\t"\
" jnz 1b \n\t"\
: "+a"(tmp), "+c"(dst), "+m"(h)\
: "S"((long)dstStride), "m"(*ff_pw_32)\
: "S"((long)dstStride), "m"(ff_pw_32)\
: "memory"\
);\
}\
......@@ -1200,7 +1200,7 @@ static av_noinline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst
"decl %2 \n\t"\
" jnz 1b \n\t"\
: "+a"(tmp), "+c"(dst), "+m"(h)\
: "S"((long)dstStride), "m"(*ff_pw_32)\
: "S"((long)dstStride), "m"(ff_pw_32)\
: "memory"\
);\
tmp += 8 - size*24;\
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册