Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
小白菜888
Ffmpeg
提交
3b15a6d7
F
Ffmpeg
项目概览
小白菜888
/
Ffmpeg
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
F
Ffmpeg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
3b15a6d7
编写于
1月 23, 2012
作者:
R
Ronald S. Bultje
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
config.asm: change %ifdef directives to %if directives.
This allows combining multiple conditionals in a single statement.
上级
08628b6a
变更
27
隐藏空白更改
内联
并排
Showing
27 changed file
with
208 addition
and
205 deletion
+208
-205
configure
configure
+2
-1
libavcodec/x86/ac3dsp.asm
libavcodec/x86/ac3dsp.asm
+5
-5
libavcodec/x86/dct32_sse.asm
libavcodec/x86/dct32_sse.asm
+2
-2
libavcodec/x86/dsputil_yasm.asm
libavcodec/x86/dsputil_yasm.asm
+27
-27
libavcodec/x86/dsputilenc_yasm.asm
libavcodec/x86/dsputilenc_yasm.asm
+2
-2
libavcodec/x86/fft_mmx.asm
libavcodec/x86/fft_mmx.asm
+14
-14
libavcodec/x86/fmtconvert.asm
libavcodec/x86/fmtconvert.asm
+5
-5
libavcodec/x86/h264_chromamc.asm
libavcodec/x86/h264_chromamc.asm
+8
-8
libavcodec/x86/h264_chromamc_10bit.asm
libavcodec/x86/h264_chromamc_10bit.asm
+2
-2
libavcodec/x86/h264_deblock.asm
libavcodec/x86/h264_deblock.asm
+15
-15
libavcodec/x86/h264_deblock_10bit.asm
libavcodec/x86/h264_deblock_10bit.asm
+6
-6
libavcodec/x86/h264_idct.asm
libavcodec/x86/h264_idct.asm
+30
-30
libavcodec/x86/h264_idct_10bit.asm
libavcodec/x86/h264_idct_10bit.asm
+13
-13
libavcodec/x86/h264_intrapred.asm
libavcodec/x86/h264_intrapred.asm
+10
-10
libavcodec/x86/h264_intrapred_10bit.asm
libavcodec/x86/h264_intrapred_10bit.asm
+13
-13
libavcodec/x86/h264_qpel_10bit.asm
libavcodec/x86/h264_qpel_10bit.asm
+4
-4
libavcodec/x86/h264_weight.asm
libavcodec/x86/h264_weight.asm
+2
-2
libavcodec/x86/h264_weight_10bit.asm
libavcodec/x86/h264_weight_10bit.asm
+1
-1
libavcodec/x86/imdct36_sse.asm
libavcodec/x86/imdct36_sse.asm
+4
-4
libavcodec/x86/proresdsp.asm
libavcodec/x86/proresdsp.asm
+1
-1
libavcodec/x86/vp3dsp.asm
libavcodec/x86/vp3dsp.asm
+9
-9
libavcodec/x86/vp56dsp.asm
libavcodec/x86/vp56dsp.asm
+1
-1
libavutil/x86/x86inc.asm
libavutil/x86/x86inc.asm
+15
-13
libavutil/x86/x86util.asm
libavutil/x86/x86util.asm
+1
-1
libswscale/x86/input.asm
libswscale/x86/input.asm
+4
-4
libswscale/x86/output.asm
libswscale/x86/output.asm
+8
-8
libswscale/x86/scale.asm
libswscale/x86/scale.asm
+4
-4
未找到文件。
configure
浏览文件 @
3b15a6d7
...
...
@@ -541,7 +541,8 @@ print_config_mak(){
}
print_config_asm
(){
enabled
$1
&&
echo
"%define
$2
"
enabled
$1
&&
v
=
1
||
v
=
0
echo
"%define
$2
$v
"
}
print_config
(){
...
...
libavcodec/x86/ac3dsp.asm
浏览文件 @
3b15a6d7
...
...
@@ -69,12 +69,12 @@ cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset
%define LOOP_ALIGN
INIT_MMX
AC3_EXPONENT_MIN
mmx
%if
def
HAVE_MMX2
%if HAVE_MMX2
%define PMINUB PMINUB_MMXEXT
%define LOOP_ALIGN ALIGN 16
AC3_EXPONENT_MIN
mmxext
%endif
%if
def
HAVE_SSE
%if HAVE_SSE
INIT_XMM
AC3_EXPONENT_MIN
ss
e2
%endif
...
...
@@ -367,7 +367,7 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
pabsd
%
1
,
%
1
%endmacro
%if
def
HAVE_AMD3DNOW
%if HAVE_AMD3DNOW
INIT_MMX
cglobal
ac3_extract_exponents_3dnow
,
3
,
3
,
0
,
exp
,
coef
,
len
add
expq
,
lenq
...
...
@@ -439,11 +439,11 @@ cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
REP_RET
%endmacro
%if
def
HAVE_SSE
%if HAVE_SSE
INIT_XMM
%define PABSD PABSD_MMX
AC3_EXTRACT_EXPONENTS
ss
e2
%if
def
HAVE_SSSE3
%if HAVE_SSSE3
%define PABSD PABSD_SSSE3
AC3_EXTRACT_EXPONENTS
ss
se3
%endif
...
...
libavcodec/x86/dct32_sse.asm
浏览文件 @
3b15a6d7
...
...
@@ -211,7 +211,7 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
INIT_YMM
SECTION
_TEXT
%if
def
HAVE_AVX
%if HAVE_AVX
; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in)
cglobal
dct32_float_avx
,
2
,
3
,
8
,
out
,
in
,
tmp
; pass 1
...
...
@@ -289,7 +289,7 @@ INIT_XMM
%define BUTTERFLY BUTTERFLY_SSE
%define BUTTERFLY0 BUTTERFLY0_SSE
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define SPILL SWAP
%define UNSPILL SWAP
...
...
libavcodec/x86/dsputil_yasm.asm
浏览文件 @
3b15a6d7
...
...
@@ -138,7 +138,7 @@ align 16
%endif
%define t0 [v1q + orderq]
%define t1 [v1q + orderq + mmsize]
%if
def
ARCH_X86_64
%if ARCH_X86_64
mova
m8
,
t0
mova
m9
,
t1
%define t0 m8
...
...
@@ -474,7 +474,7 @@ cglobal scalarproduct_float_sse, 3,3,2, v1, v2, offset
movss
xmm1
,
xmm0
shufps
xmm0
,
xmm0
,
1
addss
xmm0
,
xmm1
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
movd
r0m
,
xmm0
fld
dword
r0m
%endif
...
...
@@ -498,7 +498,7 @@ cglobal scalarproduct_float_sse, 3,3,2, v1, v2, offset
; function implementations. Fast are fixed-width, slow is variable-width
%macro EMU_EDGE_FUNC 0
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define w_reg r10
cglobal
emu_edge_core
,
6
,
7
,
1
mov
r11
,
r5
; save block_h
...
...
@@ -513,14 +513,14 @@ cglobal emu_edge_core, 2, 7, 0
mov
w_reg
,
r7m
sub
w_reg
,
r6m
; w = start_x - end_x
sub
r5
,
r4
%if
def
ARCH_X86_64
%if ARCH_X86_64
sub
r4
,
r3
%else
sub
r4
,
dword
r3m
%endif
cmp
w_reg
,
22
jg
.slow_v_extend_loop
%if
def
ARCH_X86_32
%if ARCH_X86_32
mov
r2
,
r2m
; linesize
%endif
sal
w_reg
,
7
; w * 128
...
...
@@ -536,7 +536,7 @@ cglobal emu_edge_core, 2, 7, 0
; horizontal extend (left/right)
mov
w_reg
,
r6m
; start_x
sub
r0
,
w_reg
%if
def
ARCH_X86_64
%if ARCH_X86_64
mov
r3
,
r0
; backup of buf+block_h*linesize
mov
r5
,
r11
%else
...
...
@@ -564,7 +564,7 @@ cglobal emu_edge_core, 2, 7, 0
; now r3(64)/r0(32)=buf,r2=linesize,r11/r5=block_h,r6/r3=val, r10/r6=end_x, r1=block_w
.right_extend:
%if
def
ARCH_X86_32
%if ARCH_X86_32
mov
r0
,
r0m
mov
r5
,
r5m
%endif
...
...
@@ -589,13 +589,13 @@ cglobal emu_edge_core, 2, 7, 0
.h_extend_end:
RET
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define vall al
%define valh ah
%define valw ax
%define valw2 r10w
%define valw3 r3w
%if
def
WIN64
%if WIN64
%define valw4 r4w
%else
; unix64
%define valw4 r3w
...
...
@@ -643,7 +643,7 @@ cglobal emu_edge_core, 2, 7, 0
%endrep
; %2/16
%endif
%if
def
ARCH_X86_64
%if ARCH_X86_64
%if (%2-%%src_off) == 8
mov
rax
,
[
r1
+%%
src_off
]
%assign %%src_off %%src_off+8
...
...
@@ -692,7 +692,7 @@ cglobal emu_edge_core, 2, 7, 0
%endrep
; %2/16
%endif
%if
def
ARCH_X86_64
%if ARCH_X86_64
%if (%2-%%dst_off) == 8
mov
[
r0
+%%
ds
t_off
],
rax
%assign %%dst_off %%dst_off+8
...
...
@@ -740,7 +740,7 @@ cglobal emu_edge_core, 2, 7, 0
ALIGN
128
.emuedge_v_extend_
%+
%%
n
:
; extend pixels above body
%if
def
ARCH_X86_64
%if ARCH_X86_64
test
r3
,
r3
; if (!start_y)
jz
.emuedge_copy_body_
%+
%%
n
%+
_loop
; goto body
%else
; ARCH_X86_32
...
...
@@ -751,7 +751,7 @@ ALIGN 128
.emuedge_extend_top_
%+
%%
n
%+
_loop
:
; do {
WRITE_NUM_BYTES
top
,
%%
n
; write bytes
add
r0
,
r2
; dst += linesize
%if
def
ARCH_X86_64
%if ARCH_X86_64
dec
r3d
%else
; ARCH_X86_32
dec
dword
r3m
...
...
@@ -779,7 +779,7 @@ ALIGN 128
jnz
.emuedge_extend_bottom_
%+
%%
n
%+
_loop
; } while (--block_h)
.emuedge_v_extend_end_
%+
%%
n
:
%if
def
ARCH_X86_64
%if ARCH_X86_64
ret
%else
; ARCH_X86_32
rep
ret
...
...
@@ -841,7 +841,7 @@ ALIGN 64
WRITE_V_PIXEL
%%
n
,
r0
; write pixels
dec
r5
jnz
.emuedge_extend_left_
%+
%%
n
; } while (--block_h)
%if
def
ARCH_X86_64
%if ARCH_X86_64
ret
%else
; ARCH_X86_32
rep
ret
...
...
@@ -856,7 +856,7 @@ ALIGN 64
%rep 11
ALIGN
64
.emuedge_extend_right_
%+
%%
n
:
; do {
%if
def
ARCH_X86_64
%if ARCH_X86_64
sub
r3
,
r2
; dst -= linesize
READ_V_PIXEL
%%
n
,
[
r3
+
w_reg
-
1
]
; read pixels
WRITE_V_PIXEL
%%
n
,
r3
+
r4
-%%
n
; write pixels
...
...
@@ -868,7 +868,7 @@ ALIGN 64
dec
r5
%endif
; ARCH_X86_64/32
jnz
.emuedge_extend_right_
%+
%%
n
; } while (--block_h)
%if
def
ARCH_X86_64
%if ARCH_X86_64
ret
%else
; ARCH_X86_32
rep
ret
...
...
@@ -876,7 +876,7 @@ ALIGN 64
%assign %%n %%n+2
%endrep
%if
def
ARCH_X86_32
%if ARCH_X86_32
%define stack_offset 0x10
%endif
%endmacro
; RIGHT_EXTEND
...
...
@@ -916,7 +916,7 @@ ALIGN 64
V_COPY_NPX
%
1
,
mm0
,
movq
,
8
,
0xFFFFFFF8
%else
; sse
V_COPY_NPX
%
1
,
xmm0
,
movups
,
16
,
0xFFFFFFF0
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define linesize r2
V_COPY_NPX
%
1
,
rax
,
mov
,
8
%else
; ARCH_X86_32
...
...
@@ -940,7 +940,7 @@ ALIGN 64
.slow_v_extend_loop:
; r0=buf,r1=src,r2(64)/r2m(32)=linesize,r3(64)/r3m(32)=start_x,r4=end_y,r5=block_h
; r11(64)/r3(later-64)/r2(32)=cnt_reg,r6(64)/r3(32)=val_reg,r10(64)/r6(32)=w=end_x-start_x
%if
def
ARCH_X86_64
%if ARCH_X86_64
push
r11
; save old value of block_h
test
r3
,
r3
%define cnt_reg r11
...
...
@@ -956,18 +956,18 @@ ALIGN 64
.do_body_copy:
V_COPY_ROW
body
,
r4
%if
def
ARCH_X86_64
%if ARCH_X86_64
pop
r11
; restore old value of block_h
%define cnt_reg r3
%endif
test
r5
,
r5
%if
def
ARCH_X86_64
%if ARCH_X86_64
jz
.v_extend_end
%else
jz
.skip_bottom_extend
%endif
V_COPY_ROW
bottom
,
r5
%if
def
ARCH_X86_32
%if ARCH_X86_32
.skip_bottom_extend:
mov
r2
,
r2m
%endif
...
...
@@ -996,7 +996,7 @@ ALIGN 64
.left_extend_loop_end:
dec
r5
jnz
.slow_left_extend_loop
%if
def
ARCH_X86_32
%if ARCH_X86_32
mov
r2
,
r2m
%endif
jmp
.right_extend
...
...
@@ -1006,7 +1006,7 @@ ALIGN 64
.slow_right_extend_loop:
; r3(64)/r0(32)=buf+block_h*linesize,r2=linesize,r4=block_w,r11(64)/r5(32)=block_h,
; r10(64)/r6(32)=end_x,r6/r3=val,r1=cntr
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define buf_reg r3
%define bh_reg r11
%else
...
...
@@ -1047,7 +1047,7 @@ SLOW_RIGHT_EXTEND
%endmacro
emu_edge
ss
e
%if
def
ARCH_X86_32
%if ARCH_X86_32
emu_edge
mmx
%endif
...
...
@@ -1138,7 +1138,7 @@ VECTOR_CLIP_INT32 6, 1, 0, 0
%macro BUTTERFLIES_FLOAT_INTERLEAVE 0
cglobal
butterflies_float_interleave
,
4
,
4
,
3
,
ds
t
,
src0
,
src1
,
len
%if
def
ARCH_X86_64
%if ARCH_X86_64
movsxd
lenq
,
lend
%endif
test
lenq
,
lenq
...
...
libavcodec/x86/dsputilenc_yasm.asm
浏览文件 @
3b15a6d7
...
...
@@ -245,7 +245,7 @@ hadamard8x8_diff_%1:
lea
r0
,
[
r3
*
3
]
DIFF_PIXELS_8
r1
,
r2
,
0
,
r3
,
r0
,
rsp
+
gprsize
HADAMARD8
%if
def
ARCH_X86_64
%if ARCH_X86_64
TRANSPOSE8x8W
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
%else
TRANSPOSE8x8W
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
[
rsp
+
gprsize
],
[
rsp
+
mmsize
+
gprsize
]
...
...
@@ -270,7 +270,7 @@ HADAMARD8_DIFF_MMX mmx2
INIT_XMM
%define ABS2 ABS2_MMX2
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define ABS_SUM_8x8 ABS_SUM_8x8_64
%else
%define ABS_SUM_8x8 ABS_SUM_8x8_32
...
...
libavcodec/x86/fft_mmx.asm
浏览文件 @
3b15a6d7
...
...
@@ -30,7 +30,7 @@
%include "x86inc.asm"
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define pointer resq
%else
%define pointer resd
...
...
@@ -73,7 +73,7 @@ cextern cos_ %+ i
%assign i i<<1
%endrep
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define pointer dq
%else
%define pointer dd
...
...
@@ -299,7 +299,7 @@ IF%1 mova Z(1), m5
INIT_YMM
%if
def
HAVE_AVX
%if HAVE_AVX
align
16
fft8_avx:
mova
m0
,
Z
(
0
)
...
...
@@ -534,7 +534,7 @@ DEFINE_ARGS z, w, n, o1, o3
INIT_YMM
%if
def
HAVE_AVX
%if HAVE_AVX
%macro INTERL_AVX 5
vunpckhps
%
3
,
%
2
,
%
1
vunpcklps
%
2
,
%
2
,
%
1
...
...
@@ -638,7 +638,7 @@ cglobal fft_dispatch%3%2, 2,5,8, z, nbits
RET
%endmacro
; DECL_FFT
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_YMM
DECL_FFT
6
,
_avx
DECL_FFT
6
,
_avx
,
_interleave
...
...
@@ -750,7 +750,7 @@ INIT_XMM
%macro DECL_IMDCT 2
cglobal
imdct_half
%
1
,
3
,
7
,
8
; FFTContext *s, FFTSample *output, const FFTSample *input
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define rrevtab r10
%define rtcos r11
%define rtsin r12
...
...
@@ -769,24 +769,24 @@ cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *
mov
rtsin
,
[
r0
+
FFTContext.tsin
]
add
rtcos
,
r3
add
rtsin
,
r3
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
push
rtcos
push
rtsin
%endif
shr
r3
,
1
mov
rrevtab
,
[
r0
+
FFTContext.revtab
]
add
rrevtab
,
r3
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
push
rrevtab
%endif
sub
r3
,
4
%if
def
ARCH_X86_64
%if ARCH_X86_64
xor
r4
,
r4
sub
r4
,
r3
%endif
.pre:
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
;unspill
xor
r4
,
r4
sub
r4
,
r3
...
...
@@ -795,7 +795,7 @@ cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *
%endif
PREROTATER
r4
,
r3
,
r2
,
rtcos
,
rtsin
%if
def
ARCH_X86_64
%if ARCH_X86_64
movzx
r5
,
word
[
rrevtab
+
r4
-
4
]
movzx
r6
,
word
[
rrevtab
+
r4
-
2
]
movzx
r13
,
word
[
rrevtab
+
r3
]
...
...
@@ -829,7 +829,7 @@ cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *
mov
r0d
,
[
r5
+
FFTContext.mdctsize
]
add
r6
,
r0
shr
r0
,
1
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
%define rtcos r2
%define rtsin r3
mov
rtcos
,
[
esp
+
8
]
...
...
@@ -839,7 +839,7 @@ cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *
mov
r1
,
-
mmsize
sub
r1
,
r0
%2 r0, r1, r6, rtcos, rtsin
%if
def
ARCH_X86_64
%if ARCH_X86_64
pop
r14
pop
r13
pop
r12
...
...
@@ -856,6 +856,6 @@ DECL_IMDCT _sse, POSROTATESHUF
INIT_YMM
%if
def
HAVE_AVX
%if HAVE_AVX
DECL_IMDCT
_avx
,
POSROTATESHUF_AVX
%endif
libavcodec/x86/fmtconvert.asm
浏览文件 @
3b15a6d7
...
...
@@ -28,14 +28,14 @@ SECTION_TEXT
; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len);
;---------------------------------------------------------------------------------
%macro INT32_TO_FLOAT_FMUL_SCALAR 2
%if
def
UNIX64
%if UNIX64
cglobal
int32_to_float_fmul_scalar_
%
1
,
3
,
3
,
%
2
,
ds
t
,
src
,
len
%else
cglobal
int32_to_float_fmul_scalar_
%
1
,
4
,
4
,
%
2
,
ds
t
,
src
,
mul
,
len
%endif
%if
def
WIN64
%if WIN64
SWAP
0
,
2
%elif
def
ARCH_X86_32
%elif ARCH_X86_32
movss
m0
,
mulm
%endif
SPLATD
m0
...
...
@@ -180,7 +180,7 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2
%macro FLOAT_TO_INT16_INTERLEAVE6 1
; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
cglobal
float_to_int16_interleave6_
%
1
,
2
,
7
,
0
,
ds
t
,
src
,
src1
,
src2
,
src3
,
src4
,
src5
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define lend r10d
mov
lend
,
r2d
%else
...
...
@@ -241,7 +241,7 @@ FLOAT_TO_INT16_INTERLEAVE6 3dn2
%macro FLOAT_INTERLEAVE6 2
cglobal
float_interleave6_
%
1
,
2
,
7
,
%
2
,
ds
t
,
src
,
src1
,
src2
,
src3
,
src4
,
src5
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define lend r10d
mov
lend
,
r2d
%else
...
...
libavcodec/x86/h264_chromamc.asm
浏览文件 @
3b15a6d7
...
...
@@ -94,7 +94,7 @@ SECTION .text
; put/avg_h264_chroma_mc8_mmx_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/,
; int stride, int h, int mx, int my)
cglobal
%
1
_
%
2
_chroma_mc8_
%
3
,
6
,
7
,
0
%if
def
ARCH_X86_64
%if ARCH_X86_64
movsxd
r2
,
r2d
%endif
mov
r6d
,
r5d
...
...
@@ -113,7 +113,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
%define rnd_1d_rv40 rnd_rv40_1d_tbl
%define rnd_2d_rv40 rnd_rv40_2d_tbl
%endif
%if
def
ARCH_X86_64
%if ARCH_X86_64
mov
r10
,
r5
and
r10
,
6
; &~1 for mx/my=[0,7]
lea
r10
,
[
r10
*
4
+
r4
]
...
...
@@ -147,7 +147,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
%ifdef PIC
lea
r11
,
[
rnd_rv40_1d_tbl
]
%endif
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
mov
r5
,
r0m
%endif
%endif
...
...
@@ -198,7 +198,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
%ifdef PIC
lea
r11
,
[
rnd_rv40_2d_tbl
]
%endif
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
mov
r5
,
r0m
%endif
%endif
...
...
@@ -279,7 +279,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
%macro chroma_mc4_mmx_func 3
cglobal
%
1
_
%
2
_chroma_mc4_
%
3
,
6
,
6
,
0
%if
def
ARCH_X86_64
%if ARCH_X86_64
movsxd
r2
,
r2d
%endif
pxor
m7
,
m7
...
...
@@ -364,7 +364,7 @@ cglobal %1_%2_chroma_mc4_%3, 6, 6, 0
%macro chroma_mc2_mmx_func 3
cglobal
%
1
_
%
2
_chroma_mc2_
%
3
,
6
,
7
,
0
%if
def
ARCH_X86_64
%if ARCH_X86_64
movsxd
r2
,
r2d
%endif
...
...
@@ -452,7 +452,7 @@ chroma_mc4_mmx_func avg, rv40, 3dnow
%macro chroma_mc8_ssse3_func 3
cglobal
%
1
_
%
2
_chroma_mc8_
%
3
,
6
,
7
,
8
%if
def
ARCH_X86_64
%if ARCH_X86_64
movsxd
r2
,
r2d
%endif
mov
r6d
,
r5d
...
...
@@ -600,7 +600,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
%macro chroma_mc4_ssse3_func 3
cglobal
%
1
_
%
2
_chroma_mc4_
%
3
,
6
,
7
,
0
%if
def
ARCH_X86_64
%if ARCH_X86_64
movsxd
r2
,
r2d
%endif
mov
r6
,
r4
...
...
libavcodec/x86/h264_chromamc_10bit.asm
浏览文件 @
3b15a6d7
...
...
@@ -252,7 +252,7 @@ cglobal %1_h264_chroma_mc2_10_%2, 6,7
%define CHROMAMC_AVG NOTHING
INIT_XMM
CHROMA_MC8
put
,
ss
e2
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
CHROMA_MC8
put
,
avx
%endif
...
...
@@ -264,7 +264,7 @@ CHROMA_MC2 put, mmxext
%define PAVG pavgw
INIT_XMM
CHROMA_MC8
avg
,
ss
e2
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
CHROMA_MC8
avg
,
avx
%endif
...
...
libavcodec/x86/h264_deblock.asm
浏览文件 @
3b15a6d7
...
...
@@ -200,7 +200,7 @@ cextern pb_A1
; out: %4 = |%1-%2|>%3
; clobbers: %5
%macro DIFF_GT2 5
%if
def
ARCH_X86_64
%if ARCH_X86_64
psubusb
%
5
,
%
2
,
%
1
psubusb
%
4
,
%
1
,
%
2
%else
...
...
@@ -278,7 +278,7 @@ cextern pb_A1
mova
%
4
,
%
2
%endmacro
%if
def
ARCH_X86_64
%if ARCH_X86_64
;-----------------------------------------------------------------------------
; void deblock_v_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
...
...
@@ -333,7 +333,7 @@ cglobal deblock_h_luma_8_%1, 5,7
lea
r11
,
[
r10
+
r10
*
2
]
lea
r6
,
[
r0
-
4
]
lea
r5
,
[
r0
-
4
+
r11
]
%if
def
WIN64
%if WIN64
sub
rsp
,
0x98
%define pix_tmp rsp+0x30
%else
...
...
@@ -352,7 +352,7 @@ cglobal deblock_h_luma_8_%1, 5,7
; don't backup r6, r5, r10, r11 because deblock_v_luma_sse2 doesn't use them
lea
r0
,
[
pix_tmp
+
0x30
]
mov
r1d
,
0x10
%if
def
WIN64
%if WIN64
mov
[
rsp
+
0x20
],
r4
%endif
call
deblock_v_luma_8_
%
1
...
...
@@ -376,7 +376,7 @@ cglobal deblock_h_luma_8_%1, 5,7
movq
m3
,
[
pix_tmp
+
0x40
]
TRANSPOSE8x4B_STORE
PASS8ROWS
(
r6
,
r5
,
r10
,
r11
)
%if
def
WIN64
%if WIN64
add
rsp
,
0x98
%else
add
rsp
,
0x68
...
...
@@ -513,7 +513,7 @@ DEBLOCK_LUMA avx, v, 16
%macro LUMA_INTRA_P012 4
; p0..p3 in memory
%if
def
ARCH_X86_64
%if ARCH_X86_64
pavgb
t0
,
p2
,
p1
pavgb
t1
,
p0
,
q0
%else
...
...
@@ -524,7 +524,7 @@ DEBLOCK_LUMA avx, v, 16
%endif
pavgb
t0
,
t1
; ((p2+p1+1)/2 + (p0+q0+1)/2 + 1)/2
mova
t5
,
t1
%if
def
ARCH_X86_64
%if ARCH_X86_64
paddb
t2
,
p2
,
p1
paddb
t3
,
p0
,
q0
%else
...
...
@@ -542,7 +542,7 @@ DEBLOCK_LUMA avx, v, 16
pand
t2
,
mpb_1
psubb
t0
,
t2
; p1' = (p2+p1+p0+q0+2)/4;
%if
def
ARCH_X86_64
%if ARCH_X86_64
pavgb
t1
,
p2
,
q1
psubb
t2
,
p2
,
q1
%else
...
...
@@ -617,7 +617,7 @@ DEBLOCK_LUMA avx, v, 16
%define t1 m5
%define t2 m6
%define t3 m7
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define p2 m8
%define q2 m9
%define t4 m10
...
...
@@ -644,7 +644,7 @@ DEBLOCK_LUMA avx, v, 16
; void deblock_v_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal
deblock_
%
2
_luma_intra_8_
%
1
,
4
,
6
,
16
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
sub
esp
,
0x60
%endif
lea
r4
,
[
r1
*
4
]
...
...
@@ -659,7 +659,7 @@ cglobal deblock_%2_luma_intra_8_%1, 4,6,16
mova
p0
,
[
r4
+
r5
]
mova
q0
,
[
r0
]
mova
q1
,
[
r0
+
r1
]
%if
def
ARCH_X86_64
%if ARCH_X86_64
pxor
mpb_0
,
mpb_0
mova
mpb_1
,
[
pb_1
]
LOAD_MASK
r2d
,
r3d
,
t5
; m5=beta-1, t5=alpha-1, m7=mask0
...
...
@@ -695,13 +695,13 @@ cglobal deblock_%2_luma_intra_8_%1, 4,6,16
LUMA_INTRA_SWAP_PQ
LUMA_INTRA_P012
[
r0
],
[
r0
+
r1
],
[
r0
+
2
*
r1
],
[
r0
+
r5
]
.end:
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
add
esp
,
0x60
%endif
RET
INIT_MMX
%if
def
ARCH_X86_64
%if ARCH_X86_64
;-----------------------------------------------------------------------------
; void deblock_h_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
...
...
@@ -779,7 +779,7 @@ INIT_XMM
DEBLOCK_LUMA_INTRA
ss
e2
,
v
INIT_AVX
DEBLOCK_LUMA_INTRA
avx
,
v
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
INIT_MMX
DEBLOCK_LUMA_INTRA
mmxext
,
v8
%endif
...
...
@@ -824,7 +824,7 @@ cglobal deblock_v_chroma_8_mmxext, 5,6
; void ff_deblock_h_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
cglobal
deblock_h_chroma_8_mmxext
,
5
,
7
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define buf0 [rsp-24]
%define buf1 [rsp-16]
%else
...
...
libavcodec/x86/h264_deblock_10bit.asm
浏览文件 @
3b15a6d7
...
...
@@ -302,7 +302,7 @@ cglobal deblock_h_luma_10_%1, 5,6,8*(mmsize/16)
%endmacro
INIT_XMM
%if
def
ARCH_X86_64
%if ARCH_X86_64
; in: m0=p1, m1=p0, m2=q0, m3=q1, m8=p2, m9=q2
; m12=alpha, m13=beta
; out: m0=p1', m3=q1', m1=p0', m2=q0'
...
...
@@ -435,7 +435,7 @@ DEBLOCK_LUMA_64 avx
; %1=p0 %2=p1 %3=p2 %4=p3 %5=q0 %6=q1 %7=mask0
; %8=mask1p %9=2 %10=p0' %11=p1' %12=p2'
%macro LUMA_INTRA_P012 12
; p0..p3 in memory
%if
def
ARCH_X86_64
%if ARCH_X86_64
paddw
t0
,
%
3
,
%
2
mova
t2
,
%
4
paddw
t2
,
%
3
...
...
@@ -501,7 +501,7 @@ DEBLOCK_LUMA_64 avx
LOAD_AB
t0
,
t1
,
r2d
,
r3d
mova
%
1
,
t0
LOAD_MASK
m0
,
m1
,
m2
,
m3
,
%
1
,
t1
,
t0
,
t2
,
t3
%if
def
ARCH_X86_64
%if ARCH_X86_64
mova
%
2
,
t0
; mask0
psrlw
t3
,
%
1
,
2
%else
...
...
@@ -598,7 +598,7 @@ DEBLOCK_LUMA_64 avx
%endif
%endmacro
%if
def
ARCH_X86_64
%if ARCH_X86_64
;-----------------------------------------------------------------------------
; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
...
...
@@ -792,7 +792,7 @@ cglobal deblock_h_luma_intra_10_%1, 4,7,8*(mmsize/16)
RET
%endmacro
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
INIT_MMX
DEBLOCK_LUMA
mmxext
DEBLOCK_LUMA_INTRA
mmxext
...
...
@@ -907,7 +907,7 @@ cglobal deblock_v_chroma_intra_10_%1, 4,6-(mmsize/16),8*(mmsize/16)
%endif
%endmacro
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
INIT_MMX
DEBLOCK_CHROMA
mmxext
%endif
...
...
libavcodec/x86/h264_idct.asm
浏览文件 @
3b15a6d7
...
...
@@ -198,14 +198,14 @@ cglobal h264_idct8_add_8_mmx, 3, 4, 0
; %1=uint8_t *dst, %2=int16_t *block, %3=int stride
%macro IDCT8_ADD_SSE 4
IDCT8_1D_FULL
%
2
%if
def
ARCH_X86_64
%if ARCH_X86_64
TRANSPOSE8x8W
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
%else
TRANSPOSE8x8W
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
[
%
2
],
[
%
2
+
16
]
%endif
paddw
m0
,
[
pw_32
]
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
mova
[
%
2
],
m0
mova
[
%
2
+
16
],
m4
IDCT8_1D
[
%
2
],
[
%
2
+
16
]
...
...
@@ -225,7 +225,7 @@ cglobal h264_idct8_add_8_mmx, 3, 4, 0
STORE_DIFF
m1
,
m6
,
m7
,
[
%
1
+%
3
]
STORE_DIFF
m2
,
m6
,
m7
,
[
%
1
+%
3
*
2
]
STORE_DIFF
m3
,
m6
,
m7
,
[
%
1
+%
4
]
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
mova
m0
,
[
%
2
]
mova
m1
,
[
%
2
+
16
]
%else
...
...
@@ -371,7 +371,7 @@ cglobal h264_idct_add16_8_mmx2, 5, 7, 0
test
r6
,
r6
jz
.no_dc
DC_ADD_MMX2_INIT
r2
,
r3
,
r6
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define dst_reg r10
%define dst_regd r10d
%else
...
...
@@ -381,7 +381,7 @@ cglobal h264_idct_add16_8_mmx2, 5, 7, 0
mov
ds
t_regd
,
dword
[
r1
+
r5
*
4
]
lea
ds
t_reg
,
[
r0
+
ds
t_reg
]
DC_ADD_MMX2_OP
movh
,
ds
t_reg
,
r3
,
r6
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
mov
r1
,
r1m
%endif
inc
r5
...
...
@@ -448,7 +448,7 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 7, 0
test
r6
,
r6
jz
.skipblock
DC_ADD_MMX2_INIT
r2
,
r3
,
r6
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define dst_reg r10
%define dst_regd r10d
%else
...
...
@@ -458,7 +458,7 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 7, 0
mov
ds
t_regd
,
dword
[
r1
+
r5
*
4
]
add
ds
t_reg
,
r0
DC_ADD_MMX2_OP
movh
,
ds
t_reg
,
r3
,
r6
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
mov
r1
,
r1m
%endif
.skipblock
...
...
@@ -489,7 +489,7 @@ cglobal h264_idct8_add4_8_mmx2, 5, 7, 0
test
r6
,
r6
jz
.no_dc
DC_ADD_MMX2_INIT
r2
,
r3
,
r6
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define dst_reg r10
%define dst_regd r10d
%else
...
...
@@ -501,7 +501,7 @@ cglobal h264_idct8_add4_8_mmx2, 5, 7, 0
DC_ADD_MMX2_OP
mova
,
ds
t_reg
,
r3
,
r6
lea
ds
t_reg
,
[
ds
t_reg
+
r3
*
4
]
DC_ADD_MMX2_OP
mova
,
ds
t_reg
,
r3
,
r6
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
mov
r1
,
r1m
%endif
add
r5
,
4
...
...
@@ -550,7 +550,7 @@ cglobal h264_idct8_add4_8_sse2, 5, 7, 10
jz
.no_dc
INIT_MMX
DC_ADD_MMX2_INIT
r2
,
r3
,
r6
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define dst_reg r10
%define dst_regd r10d
%else
...
...
@@ -562,7 +562,7 @@ INIT_MMX
DC_ADD_MMX2_OP
mova
,
ds
t_reg
,
r3
,
r6
lea
ds
t_reg
,
[
ds
t_reg
+
r3
*
4
]
DC_ADD_MMX2_OP
mova
,
ds
t_reg
,
r3
,
r6
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
mov
r1
,
r1m
%endif
add
r5
,
4
...
...
@@ -575,7 +575,7 @@ INIT_XMM
mov
ds
t_regd
,
dword
[
r1
+
r5
*
4
]
add
ds
t_reg
,
r0
IDCT8_ADD_SSE
ds
t_reg
,
r2
,
r3
,
r6
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
mov
r1
,
r1m
%endif
.skipblock
...
...
@@ -593,7 +593,7 @@ h264_idct_add8_mmx_plane:
or
r6w
,
word
[
r2
]
test
r6
,
r6
jz
.skipblock
%if
def
ARCH_X86_64
%if ARCH_X86_64
mov
r0d
,
dword
[
r1
+
r5
*
4
]
add
r0
,
[
r10
]
%else
...
...
@@ -617,13 +617,13 @@ cglobal h264_idct_add8_8_mmx, 5, 7, 0
%ifdef PIC
lea
r11
,
[
scan8_mem
]
%endif
%if
def
ARCH_X86_64
%if ARCH_X86_64
mov
r10
,
r0
%endif
call
h264_idct_add8_mmx_plane
mov
r5
,
32
add
r2
,
384
%if
def
ARCH_X86_64
%if ARCH_X86_64
add
r10
,
gprsize
%else
add
r0mp
,
gprsize
...
...
@@ -637,7 +637,7 @@ h264_idct_add8_mmx2_plane
movzx
r6
,
byte
[
r4
+
r6
]
test
r6
,
r6
jz
.try_dc
%if
def
ARCH_X86_64
%if ARCH_X86_64
mov
r0d
,
dword
[
r1
+
r5
*
4
]
add
r0
,
[
r10
]
%else
...
...
@@ -656,7 +656,7 @@ h264_idct_add8_mmx2_plane
test
r6
,
r6
jz
.skipblock
DC_ADD_MMX2_INIT
r2
,
r3
,
r6
%if
def
ARCH_X86_64
%if ARCH_X86_64
mov
r0d
,
dword
[
r1
+
r5
*
4
]
add
r0
,
[
r10
]
%else
...
...
@@ -677,7 +677,7 @@ h264_idct_add8_mmx2_plane
cglobal
h264_idct_add8_8_mmx2
,
5
,
7
,
0
mov
r5
,
16
add
r2
,
512
%if
def
ARCH_X86_64
%if ARCH_X86_64
mov
r10
,
r0
%endif
%ifdef PIC
...
...
@@ -686,7 +686,7 @@ cglobal h264_idct_add8_8_mmx2, 5, 7, 0
call
h264_idct_add8_mmx2_plane
mov
r5
,
32
add
r2
,
384
%if
def
ARCH_X86_64
%if ARCH_X86_64
add
r10
,
gprsize
%else
add
r0mp
,
gprsize
...
...
@@ -738,7 +738,7 @@ x264_add8x4_idct_sse2:
test
r0
,
r0
jz
.cycle
%
1
end
mov
r0d
,
dword
[
r1
+%
1
*
8
]
%if
def
ARCH_X86_64
%if ARCH_X86_64
add
r0
,
r10
%else
add
r0
,
r0m
...
...
@@ -753,7 +753,7 @@ x264_add8x4_idct_sse2:
; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16_8_sse2
,
5
,
5
,
8
%if
def
ARCH_X86_64
%if ARCH_X86_64
mov
r10
,
r0
%endif
; unrolling of the loop leads to an average performance gain of
...
...
@@ -773,7 +773,7 @@ cglobal h264_idct_add16_8_sse2, 5, 5, 8
test
r0
,
r0
jz
.try
%
1
dc
mov
r0d
,
dword
[
r1
+%
1
*
8
]
%if
def
ARCH_X86_64
%if ARCH_X86_64
add
r0
,
r10
%else
add
r0
,
r0m
...
...
@@ -785,7 +785,7 @@ cglobal h264_idct_add16_8_sse2, 5, 5, 8
or
r0w
,
word
[
r2
+
32
]
jz
.cycle
%
1
end
mov
r0d
,
dword
[
r1
+%
1
*
8
]
%if
def
ARCH_X86_64
%if ARCH_X86_64
add
r0
,
r10
%else
add
r0
,
r0m
...
...
@@ -800,7 +800,7 @@ cglobal h264_idct_add16_8_sse2, 5, 5, 8
; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16intra_8_sse2
,
5
,
7
,
8
%if
def
ARCH_X86_64
%if ARCH_X86_64
mov
r10
,
r0
%endif
add16intra_sse2_cycle
0
,
0xc
...
...
@@ -817,7 +817,7 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
movzx
r0
,
word
[
r4
+%
2
]
test
r0
,
r0
jz
.try
%
1
dc
%if
def
ARCH_X86_64
%if ARCH_X86_64
mov
r0d
,
dword
[
r1
+
(
%
1
&
1
)
*
8
+
64
*
(
1
+
(
%
1
>>
1
))]
add
r0
,
[
r10
]
%else
...
...
@@ -831,7 +831,7 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
movsx
r0
,
word
[
r2
]
or
r0w
,
word
[
r2
+
32
]
jz
.cycle
%
1
end
%if
def
ARCH_X86_64
%if ARCH_X86_64
mov
r0d
,
dword
[
r1
+
(
%
1
&
1
)
*
8
+
64
*
(
1
+
(
%
1
>>
1
))]
add
r0
,
[
r10
]
%else
...
...
@@ -852,12 +852,12 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add8_8_sse2
,
5
,
7
,
8
add
r2
,
512
%if
def
ARCH_X86_64
%if ARCH_X86_64
mov
r10
,
r0
%endif
add8_sse2_cycle
0
,
0x34
add8_sse2_cycle
1
,
0x3c
%if
def
ARCH_X86_64
%if ARCH_X86_64
add
r10
,
gprsize
%else
add
r0mp
,
gprsize
...
...
@@ -977,11 +977,11 @@ cglobal h264_luma_dc_dequant_idct_%1, 3,4,%2
WALSH4_1D
0
,
1
,
2
,
3
,
4
; shift, tmp, output, qmul
%if
def
WIN64
%if WIN64
DECLARE_REG_TMP
0
,
3
,
1
,
2
; we can't avoid this, because r0 is the shift register (ecx) on win64
xchg
r0
,
t2
%elif
def
ARCH_X86_64
%elif ARCH_X86_64
DECLARE_REG_TMP
3
,
1
,
0
,
2
%else
DECLARE_REG_TMP
1
,
3
,
0
,
2
...
...
libavcodec/x86/h264_idct_10bit.asm
浏览文件 @
3b15a6d7
...
...
@@ -98,7 +98,7 @@ cglobal h264_idct_add_10_%1, 3,3
INIT_XMM
IDCT_ADD_10
ss
e2
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
IDCT_ADD_10
avx
%endif
...
...
@@ -128,7 +128,7 @@ add4x4_idct_%1:
INIT_XMM
ALIGN
16
ADD4x4IDCT
ss
e2
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
ALIGN
16
ADD4x4IDCT
avx
...
...
@@ -168,7 +168,7 @@ cglobal h264_idct_add16_10_%1, 5,6
INIT_XMM
IDCT_ADD16_10
ss
e2
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
IDCT_ADD16_10
avx
%endif
...
...
@@ -234,7 +234,7 @@ cglobal h264_idct8_dc_add_10_%1,3,3,7
INIT_XMM
IDCT8_DC_ADD
ss
e2
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
IDCT8_DC_ADD
avx
%endif
...
...
@@ -305,7 +305,7 @@ cglobal h264_idct_add16intra_10_%1,5,7,8
INIT_XMM
IDCT_ADD16INTRA_10
ss
e2
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
IDCT_ADD16INTRA_10
avx
%endif
...
...
@@ -316,7 +316,7 @@ IDCT_ADD16INTRA_10 avx
;-----------------------------------------------------------------------------
%macro IDCT_ADD8 1
cglobal
h264_idct_add8_10_
%
1
,
5
,
7
%if
def
ARCH_X86_64
%if ARCH_X86_64
mov
r10
,
r0
%endif
add
r2
,
1024
...
...
@@ -324,7 +324,7 @@ cglobal h264_idct_add8_10_%1,5,7
ADD16_OP_INTRA
%
1
,
16
,
4
+
6
*
8
ADD16_OP_INTRA
%
1
,
18
,
4
+
7
*
8
add
r2
,
1024
-
128
*
2
%if
def
ARCH_X86_64
%if ARCH_X86_64
mov
r0
,
[
r10
+
gprsize
]
%else
mov
r0
,
r0m
...
...
@@ -342,7 +342,7 @@ cglobal h264_idct_add8_10_%1,5,7
INIT_XMM
IDCT_ADD8
ss
e2
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
IDCT_ADD8
avx
%endif
...
...
@@ -411,7 +411,7 @@ IDCT_ADD8 avx
; %1=int16_t *block, %2=int16_t *dstblock
%macro IDCT8_ADD_SSE_START 2
IDCT8_1D_FULL
%
1
%if
def
ARCH_X86_64
%if ARCH_X86_64
TRANSPOSE4x4D
0
,
1
,
2
,
3
,
8
mova
[
%
2
],
m0
TRANSPOSE4x4D
4
,
5
,
6
,
7
,
8
...
...
@@ -452,7 +452,7 @@ IDCT_ADD8 avx
%macro IDCT8_ADD 1
cglobal
h264_idct8_add_10_
%
1
,
3
,
4
,
16
%if
ndef UNIX64
%if
UNIX64 == 0
%assign pad 16-gprsize-(stack_offset&15)
sub
rsp
,
pad
call
h264_idct8_add1_10_
%
1
...
...
@@ -467,7 +467,7 @@ h264_idct8_add1_10_%1:
sub
rsp
,
pad
add
dword
[
r1
],
32
%if
def
ARCH_X86_64
%if ARCH_X86_64
IDCT8_ADD_SSE_START
r1
,
rsp
SWAP
1
,
9
SWAP
2
,
10
...
...
@@ -519,7 +519,7 @@ h264_idct8_add1_10_%1:
INIT_XMM
IDCT8_ADD
ss
e2
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
IDCT8_ADD
avx
%endif
...
...
@@ -559,7 +559,7 @@ cglobal h264_idct8_add4_10_%1, 0,7,16
INIT_XMM
IDCT8_ADD4
ss
e2
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
IDCT8_ADD4
avx
%endif
libavcodec/x86/h264_intrapred.asm
浏览文件 @
3b15a6d7
...
...
@@ -348,7 +348,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2
lea
r3
,
[
r0
+
r2
*
4
-
1
]
add
r4
,
r2
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define e_reg r11
%else
%define e_reg r0
...
...
@@ -369,7 +369,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2
lea
r5
,
[
r5
+
r6
*
4
]
movzx
e_reg
,
byte
[
r3
]
%if
def
ARCH_X86_64
%if ARCH_X86_64
movzx
r10
,
byte
[
r4
+
r2
]
sub
r10
,
e_reg
%else
...
...
@@ -385,7 +385,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2
movzx
r4
,
byte
[
e_reg
+
r2
]
movzx
r6
,
byte
[
r3
]
sub
r6
,
r4
%if
def
ARCH_X86_64
%if ARCH_X86_64
lea
r6
,
[
r10
+
r6
*
2
]
lea
r5
,
[
r5
+
r6
*
2
]
add
r5
,
r6
...
...
@@ -395,7 +395,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2
%endif
movzx
r4
,
byte
[
e_reg
]
%if
def
ARCH_X86_64
%if ARCH_X86_64
movzx
r10
,
byte
[
r3
+
r2
]
sub
r10
,
r4
sub
r5
,
r10
...
...
@@ -409,7 +409,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2
movzx
r4
,
byte
[
e_reg
+
r1
]
movzx
r6
,
byte
[
r3
+
r2
*
2
]
sub
r6
,
r4
%if
def
ARCH_X86_64
%if ARCH_X86_64
add
r6
,
r10
%endif
lea
r5
,
[
r5
+
r6
*
8
]
...
...
@@ -420,7 +420,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2
lea
r5
,
[
r5
+
r6
*
4
]
add
r5
,
r6
; sum of V coefficients
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
mov
r0
,
r0m
%endif
...
...
@@ -641,7 +641,7 @@ cglobal pred8x8_plane_%1, 2, 7, %2
lea
r3
,
[
r0
-
1
]
add
r4
,
r2
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define e_reg r11
%else
%define e_reg r0
...
...
@@ -652,7 +652,7 @@ cglobal pred8x8_plane_%1, 2, 7, %2
sub
r5
,
e_reg
movzx
e_reg
,
byte
[
r3
]
%if
def
ARCH_X86_64
%if ARCH_X86_64
movzx
r10
,
byte
[
r4
+
r2
]
sub
r10
,
e_reg
sub
r5
,
r10
...
...
@@ -666,7 +666,7 @@ cglobal pred8x8_plane_%1, 2, 7, %2
movzx
e_reg
,
byte
[
r3
+
r1
]
movzx
r6
,
byte
[
r4
+
r2
*
2
]
sub
r6
,
e_reg
%if
def
ARCH_X86_64
%if ARCH_X86_64
add
r6
,
r10
%endif
lea
r5
,
[
r5
+
r6
*
4
]
...
...
@@ -680,7 +680,7 @@ cglobal pred8x8_plane_%1, 2, 7, %2
lea
r5
,
[
r5
+
r6
*
8
]
sar
r5
,
5
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
mov
r0
,
r0m
%endif
...
...
libavcodec/x86/h264_intrapred_10bit.asm
浏览文件 @
3b15a6d7
...
...
@@ -84,7 +84,7 @@ INIT_XMM
PRED4x4_DR
ss
e2
%define PALIGNR PALIGNR_SSSE3
PRED4x4_DR
ss
se3
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
PRED4x4_DR
avx
%endif
...
...
@@ -124,7 +124,7 @@ INIT_XMM
PRED4x4_VR
ss
e2
%define PALIGNR PALIGNR_SSSE3
PRED4x4_VR
ss
se3
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
PRED4x4_VR
avx
%endif
...
...
@@ -167,7 +167,7 @@ INIT_XMM
PRED4x4_HD
ss
e2
%define PALIGNR PALIGNR_SSSE3
PRED4x4_HD
ss
se3
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
PRED4x4_HD
avx
%endif
...
...
@@ -238,7 +238,7 @@ cglobal pred4x4_down_left_10_%1, 3,3
INIT_XMM
PRED4x4_DL
ss
e2
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
PRED4x4_DL
avx
%endif
...
...
@@ -267,7 +267,7 @@ cglobal pred4x4_vertical_left_10_%1, 3,3
INIT_XMM
PRED4x4_VL
ss
e2
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
PRED4x4_VL
avx
%endif
...
...
@@ -577,7 +577,7 @@ cglobal pred8x8l_top_dc_10_%1, 4,4,6
INIT_XMM
PRED8x8L_TOP_DC
ss
e2
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
PRED8x8L_TOP_DC
avx
%endif
...
...
@@ -636,7 +636,7 @@ cglobal pred8x8l_dc_10_%1, 4,6,6
INIT_XMM
PRED8x8L_DC
ss
e2
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
PRED8x8L_DC
avx
%endif
...
...
@@ -671,7 +671,7 @@ cglobal pred8x8l_vertical_10_%1, 4,4,6
INIT_XMM
PRED8x8L_VERTICAL
ss
e2
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
PRED8x8L_VERTICAL
avx
%endif
...
...
@@ -728,7 +728,7 @@ INIT_XMM
PRED8x8L_HORIZONTAL
ss
e2
%define PALIGNR PALIGNR_SSSE3
PRED8x8L_HORIZONTAL
ss
se3
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
PRED8x8L_HORIZONTAL
avx
%endif
...
...
@@ -797,7 +797,7 @@ INIT_XMM
PRED8x8L_DOWN_LEFT
ss
e2
%define PALIGNR PALIGNR_SSSE3
PRED8x8L_DOWN_LEFT
ss
se3
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
PRED8x8L_DOWN_LEFT
avx
%endif
...
...
@@ -872,7 +872,7 @@ INIT_XMM
PRED8x8L_DOWN_RIGHT
ss
e2
%define PALIGNR PALIGNR_SSSE3
PRED8x8L_DOWN_RIGHT
ss
se3
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
PRED8x8L_DOWN_RIGHT
avx
%endif
...
...
@@ -943,7 +943,7 @@ INIT_XMM
PRED8x8L_VERTICAL_RIGHT
ss
e2
%define PALIGNR PALIGNR_SSSE3
PRED8x8L_VERTICAL_RIGHT
ss
se3
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
PRED8x8L_VERTICAL_RIGHT
avx
%endif
...
...
@@ -1005,7 +1005,7 @@ INIT_XMM
PRED8x8L_HORIZONTAL_UP
ss
e2
%define PALIGNR PALIGNR_SSSE3
PRED8x8L_HORIZONTAL_UP
ss
se3
%if
def
HAVE_AVX
%if HAVE_AVX
INIT_AVX
PRED8x8L_HORIZONTAL_UP
avx
%endif
...
...
libavcodec/x86/h264_qpel_10bit.asm
浏览文件 @
3b15a6d7
...
...
@@ -111,7 +111,7 @@ INIT_XMM
%endmacro
%macro MCAxA 8
%if
def
ARCH_X86_64
%if ARCH_X86_64
%ifnidn %1,mmxext
MCAxA_OP
%
1
,
%
2
,
%
3
,
%
4
,
%
5
,
%
6
,
%
7
,
%
8
%endif
...
...
@@ -122,7 +122,7 @@ MCAxA_OP %1,%2,%3,%4,%5,%6,%7,%8
%macro MCAxA_OP 8
cglobal
%
2
_h264_qpel
%
5
_
%
3
_10_
%
1
,
%
6
,
%
7
,
%
8
%if
def
ARCH_X86_32
%if ARCH_X86_32
call
stub_
%
2
_h264_qpel
%
4
_
%
3
_10_
%
1
mov
r0
,
r0m
mov
r1
,
r1m
...
...
@@ -152,7 +152,7 @@ cglobal %2_h264_qpel%5_%3_10_%1, %6,%7,%8
call
stub_
%
2
_h264_qpel
%
4
_
%
3
_10_
%
1
lea
r0
,
[
r10
+
r2
*%
4
+%
4
*
2
]
lea
r1
,
[
r11
+
r2
*%
4
+%
4
*
2
]
%if
ndef UNIX64
; fall through to function
%if
UNIX64 == 0
; fall through to function
call
stub_
%
2
_h264_qpel
%
4
_
%
3
_10_
%
1
RET
%endif
...
...
@@ -165,7 +165,7 @@ cglobal %2_h264_qpel%5_%3_10_%1, %6,%7,%8
MCAxA
%
1
,
%
2
,
%
3
,
%
4
,
i
,
%
5
,
%
6
,
%
7
cglobal
%
2
_h264_qpel
%
4
_
%
3
_10_
%
1
,
%
5
,
%
6
,
%
7
%if
ndef UNIX64
; no prologue or epilogue for UNIX64
%if
UNIX64 == 0
; no prologue or epilogue for UNIX64
call
stub_
%
2
_h264_qpel
%
4
_
%
3
_10_
%
1
RET
%endif
...
...
libavcodec/x86/h264_weight.asm
浏览文件 @
3b15a6d7
...
...
@@ -126,7 +126,7 @@ INIT_XMM
WEIGHT_FUNC_HALF_MM
8
,
8
,
ss
e2
%macro BIWEIGHT_SETUP 0
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define off_regd r11d
%else
%define off_regd r3d
...
...
@@ -244,7 +244,7 @@ INIT_XMM
BIWEIGHT_FUNC_HALF_MM
8
,
8
,
ss
e2
%macro BIWEIGHT_SSSE3_SETUP 0
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define off_regd r11d
%else
%define off_regd r3d
...
...
libavcodec/x86/h264_weight_10bit.asm
浏览文件 @
3b15a6d7
...
...
@@ -152,7 +152,7 @@ WEIGHT_FUNC_HALF_MM sse4
; void h264_biweight(uint8_t *dst, uint8_t *src, int stride, int height,
; int log2_denom, int weightd, int weights, int offset);
;-----------------------------------------------------------------------------
%if
def
ARCH_X86_32
%if ARCH_X86_32
DECLARE_REG_TMP
3
%else
DECLARE_REG_TMP
10
...
...
libavcodec/x86/imdct36_sse.asm
浏览文件 @
3b15a6d7
...
...
@@ -219,13 +219,13 @@ cglobal imdct36_float, 4,4,9, out, buf, in, win
subps
m5
,
m0
,
m3
%if
def
ARCH_X86_64
%if ARCH_X86_64
SWAP
m5
,
m8
%endif
mulps
m7
,
m2
,
[
ps_val1
]
%if
def
ARCH_X86_64
%if ARCH_X86_64
mulps
m5
,
m8
,
[
ps_val2
]
%else
mulps
m5
,
m5
,
[
ps_val2
]
...
...
@@ -235,7 +235,7 @@ cglobal imdct36_float, 4,4,9, out, buf, in, win
mulps
m5
,
m6
,
[
ps_val1
]
subps
m7
,
m7
,
m5
%if
def
ARCH_X86_64
%if ARCH_X86_64
SWAP
m5
,
m8
%else
subps
m5
,
m0
,
m3
...
...
@@ -376,7 +376,7 @@ DEFINE_IMDCT
INIT_XMM
ss
e
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define SPILL SWAP
%define UNSPILL SWAP
%define SPILLED(x) m %+ x
...
...
libavcodec/x86/proresdsp.asm
浏览文件 @
3b15a6d7
...
...
@@ -32,7 +32,7 @@
%define W6sh2 8867
; W6 = 35468 = 8867<<2
%define W7sh2 4520
; W7 = 18081 = 4520<<2 + 1
%if
def
ARCH_X86_64
%if ARCH_X86_64
SECTION
_RODATA
...
...
libavcodec/x86/vp3dsp.asm
浏览文件 @
3b15a6d7
...
...
@@ -106,7 +106,7 @@ SECTION .text
INIT_MMX
cglobal
vp3_v_loop_filter_mmx2
,
3
,
4
%if
def
ARCH_X86_64
%if ARCH_X86_64
movsxd
r1
,
r1d
%endif
mov
r3
,
r1
...
...
@@ -123,7 +123,7 @@ cglobal vp3_v_loop_filter_mmx2, 3, 4
RET
cglobal
vp3_h_loop_filter_mmx2
,
3
,
4
%if
def
ARCH_X86_64
%if ARCH_X86_64
movsxd
r1
,
r1d
%endif
lea
r3
,
[
r1
*
3
]
...
...
@@ -510,7 +510,7 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4
%define SHIFT(x)
%define ADD(x)
VP3_1D_IDCT_SSE2
%if
def
ARCH_X86_64
%if ARCH_X86_64
TRANSPOSE8x8W
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
%else
TRANSPOSE8x8W
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
[
%
1
],
[
%
1
+
16
]
...
...
@@ -530,7 +530,7 @@ cglobal vp3_idct_%1, 1, 1, %2
cglobal
vp3_idct_put_
%
1
,
3
,
%
3
,
%
2
VP3_IDCT_
%
1
r2
%if
def
ARCH_X86_64
%if ARCH_X86_64
mov
r3
,
r2
mov
r2
,
r1
mov
r1
,
r0
...
...
@@ -540,7 +540,7 @@ cglobal vp3_idct_put_%1, 3, %3, %2
mov
r1m
,
r0
mov
r2m
,
r1
%endif
%if
def
WIN64
%if WIN64
call
put_signed_pixels_clamped_mmx
RET
%else
...
...
@@ -549,7 +549,7 @@ cglobal vp3_idct_put_%1, 3, %3, %2
cglobal
vp3_idct_add_
%
1
,
3
,
%
3
,
%
2
VP3_IDCT_
%
1
r2
%if
def
ARCH_X86_64
%if ARCH_X86_64
mov
r3
,
r2
mov
r2
,
r1
mov
r1
,
r0
...
...
@@ -559,7 +559,7 @@ cglobal vp3_idct_add_%1, 3, %3, %2
mov
r1m
,
r0
mov
r2m
,
r1
%endif
%if
def
WIN64
%if WIN64
call
add_pixels_clamped_mmx
RET
%else
...
...
@@ -567,7 +567,7 @@ cglobal vp3_idct_add_%1, 3, %3, %2
%endif
%endmacro
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define REGS 4
%else
%define REGS 3
...
...
@@ -599,7 +599,7 @@ vp3_idct_funcs sse2, 9, REGS
INIT_MMX
cglobal
vp3_idct_dc_add_mmx2
,
3
,
4
%if
def
ARCH_X86_64
%if ARCH_X86_64
movsxd
r1
,
r1d
%endif
lea
r3
,
[
r1
*
3
]
...
...
libavcodec/x86/vp56dsp.asm
浏览文件 @
3b15a6d7
...
...
@@ -127,7 +127,7 @@ cglobal vp6_filter_diag4_%1, 5, 7, %2
sub
rsp
,
8
*
15
movq
m6
,
[
pw_64
]
%endif
%if
def
ARCH_X86_64
%if ARCH_X86_64
movsxd
r2
,
r2d
%endif
...
...
libavutil/x86/x86inc.asm
浏览文件 @
3b15a6d7
...
...
@@ -35,11 +35,13 @@
%define program_name ff
%ifdef ARCH_X86_64
%define UNIX64 0
%define WIN64 0
%if ARCH_X86_64
%ifidn __OUTPUT_FORMAT__,win32
%define WIN64
%define WIN64
1
%else
%define UNIX64
%define UNIX64
1
%endif
%endif
...
...
@@ -79,9 +81,9 @@
%endif
%endmacro
%if
def
WIN64
%if WIN64
%define PIC
%elif
ndef
ARCH_X86_64
%elif
!
ARCH_X86_64
; x86_32 doesn't require PIC.
; Some distros prefer shared objects to be PIC, but nothing breaks if
; the code contains a few textrels, so we'll skip that complexity.
...
...
@@ -132,7 +134,7 @@
%define r%1m %6
%ifid %6
; i.e. it's a register
%define r%1mp %2
%elif
def
ARCH_X86_64
; memory
%elif ARCH_X86_64
; memory
%define r%1mp qword %6
%else
%define r%1mp dword %6
...
...
@@ -149,7 +151,7 @@
%define e%1w %1
%define r%1b %2
%define e%1b %2
%if
ndef ARCH_X86_64
%if
ARCH_X86_64 == 0
%define r%1 e%1
%endif
%endmacro
...
...
@@ -185,7 +187,7 @@ DECLARE_REG_SIZE bp, bpl
DECLARE_REG_TMP_SIZE
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define gprsize 8
%else
%define gprsize 4
...
...
@@ -261,7 +263,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9
%assign n_arg_names %%i
%endmacro
%if
def
WIN64
; Windows x64 ;=================================================
%if WIN64
; Windows x64 ;=================================================
DECLARE_REG
0
,
rcx
,
ecx
,
cx
,
cl
,
ecx
DECLARE_REG
1
,
rdx
,
edx
,
dx
,
dl
,
edx
...
...
@@ -346,7 +348,7 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]
%endif
%endmacro
%elif
def
ARCH_X86_64
; *nix x64 ;=============================================
%elif ARCH_X86_64
; *nix x64 ;=============================================
DECLARE_REG
0
,
rdi
,
edi
,
di
,
di
l
,
edi
DECLARE_REG
1
,
rsi
,
esi
,
si
,
si
l
,
esi
...
...
@@ -447,7 +449,7 @@ DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28]
%endif
;======================================================================
%if
ndef WIN64
%if
WIN64 == 0
%macro WIN64_SPILL_XMM 1
%endmacro
%macro WIN64_RESTORE_XMM 1
...
...
@@ -617,7 +619,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%define RESET_MM_PERMUTATION INIT_XMM %1
%define mmsize 16
%define num_mmregs 8
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define num_mmregs 16
%endif
%define mova movdqa
...
...
@@ -646,7 +648,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%define RESET_MM_PERMUTATION INIT_YMM %1
%define mmsize 32
%define num_mmregs 8
%if
def
ARCH_X86_64
%if ARCH_X86_64
%define num_mmregs 16
%endif
%define mova vmovaps
...
...
libavutil/x86/x86util.asm
浏览文件 @
3b15a6d7
...
...
@@ -95,7 +95,7 @@
%endmacro
%macro TRANSPOSE8x8W 9-11
%if
def
ARCH_X86_64
%if ARCH_X86_64
SBUTTERFLY
wd
,
%
1
,
%
2
,
%
9
SBUTTERFLY
wd
,
%
3
,
%
4
,
%
9
SBUTTERFLY
wd
,
%
5
,
%
6
,
%
9
...
...
libswscale/x86/input.asm
浏览文件 @
3b15a6d7
...
...
@@ -64,7 +64,7 @@ SECTION .text
; split the loop in an aligned and unaligned case
%macro YUYV_TO_Y_FN 2-3
cglobal
%
2
ToY
,
3
,
3
,
%
1
,
ds
t
,
src
,
w
%if
def
ARCH_X86_64
%if ARCH_X86_64
movsxd
wq
,
wd
%endif
add
ds
tq
,
wq
...
...
@@ -134,7 +134,7 @@ cglobal %2ToY, 3, 3, %1, dst, src, w
; split the loop in an aligned and unaligned case
%macro YUYV_TO_UV_FN 2-3
cglobal
%
2
ToUV
,
3
,
4
,
%
1
,
ds
tU
,
ds
tV
,
src
,
w
%if
def
ARCH_X86_64
%if ARCH_X86_64
movsxd
wq
,
dword
r4m
%else
; x86-32
mov
wq
,
r4m
...
...
@@ -189,7 +189,7 @@ cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w
; %2 = nv12 or nv21
%macro NVXX_TO_UV_FN 2
cglobal
%
2
ToUV
,
3
,
4
,
%
1
,
ds
tU
,
ds
tV
,
src
,
w
%if
def
ARCH_X86_64
%if ARCH_X86_64
movsxd
wq
,
dword
r4m
%else
; x86-32
mov
wq
,
r4m
...
...
@@ -215,7 +215,7 @@ cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w
%endif
; mmsize == 8/16
%endmacro
%if
def
ARCH_X86_32
%if ARCH_X86_32
INIT_MMX
mmx
YUYV_TO_Y_FN
0
,
yuyv
YUYV_TO_Y_FN
0
,
uyvy
...
...
libswscale/x86/output.asm
浏览文件 @
3b15a6d7
...
...
@@ -58,7 +58,7 @@ SECTION .text
%macro yuv2planeX_fn 3
%if
def
ARCH_X86_32
%if ARCH_X86_32
%define cntr_reg r1
%define movsx mov
%else
...
...
@@ -72,7 +72,7 @@ cglobal yuv2planeX_%1, %3, 7, %2
%endif
; %1 == 8/9/10
%if %1 == 8
%if
def
ARCH_X86_32
%if ARCH_X86_32
%assign pad 0x2c - (stack_offset & 15)
SUB
rsp
,
pad
%define m_dith m7
...
...
@@ -91,7 +91,7 @@ cglobal yuv2planeX_%1, %3, 7, %2
.no_rot:
%if mmsize == 16
punpcklbw
m_dith
,
m6
%if
def
ARCH_X86_64
%if ARCH_X86_64
punpcklwd
m8
,
m_dith
,
m6
pslld
m8
,
12
%else
; x86-32
...
...
@@ -100,7 +100,7 @@ cglobal yuv2planeX_%1, %3, 7, %2
%endif
; x86-32/64
punpckhwd
m_dith
,
m6
pslld
m_dith
,
12
%if
def
ARCH_X86_32
%if ARCH_X86_32
mova
[
rsp
+
0
],
m5
mova
[
rsp
+
16
],
m_dith
%endif
...
...
@@ -135,7 +135,7 @@ cglobal yuv2planeX_%1, %3, 7, %2
%endif
; %1 == 8
%if %1 == 8
%if
def
ARCH_X86_32
%if ARCH_X86_32
mova
m2
,
[
rsp
+
mmsize
*
(
0
+%%
i
)]
mova
m1
,
[
rsp
+
mmsize
*
(
1
+%%
i
)]
%else
; x86-64
...
...
@@ -233,7 +233,7 @@ cglobal yuv2planeX_%1, %3, 7, %2
jg
.pixelloop
%if %1 == 8
%if
def
ARCH_X86_32
%if ARCH_X86_32
ADD
rsp
,
pad
RET
%else
; x86-64
...
...
@@ -245,7 +245,7 @@ cglobal yuv2planeX_%1, %3, 7, %2
%endmacro
%define PALIGNR PALIGNR_MMX
%if
def
ARCH_X86_32
%if ARCH_X86_32
INIT_MMX
mmx2
yuv2planeX_fn
8
,
0
,
7
yuv2planeX_fn
9
,
0
,
5
...
...
@@ -382,7 +382,7 @@ cglobal yuv2plane1_%1, %3, %3, %2
REP_RET
%endmacro
%if
def
ARCH_X86_32
%if ARCH_X86_32
INIT_MMX
mmx
yuv2plane1_fn
8
,
0
,
5
yuv2plane1_fn
16
,
0
,
3
...
...
libswscale/x86/scale.asm
浏览文件 @
3b15a6d7
...
...
@@ -51,7 +51,7 @@ SECTION .text
; SCALE_FUNC source_width, intermediate_nbits, filtersize, filtersuffix, opt, n_args, n_xmm
%macro SCALE_FUNC 7
cglobal
hscale
%
1
to
%
2
_
%
4
_
%
5
,
%
6
,
7
,
%
7
%if
def
ARCH_X86_64
%if ARCH_X86_64
movsxd
r2
,
r2d
%endif
; x86-64
%if %2 == 19
...
...
@@ -237,7 +237,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
%else
; %4 == X || %4 == X8
%define r6sub 0
%endif
; %4 ==/!= X4
%if
def
ARCH_X86_64
%if ARCH_X86_64
push
r12
movsxd
r6
,
r6d
; filterSize
lea
r12
,
[
r3
+
(
r6
-
r6sub
)
*
srcmul
]
; &src[filterSize&~4]
...
...
@@ -384,7 +384,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
%ifnidn %3, X
REP_RET
%else
; %3 == X
%if
def
ARCH_X86_64
%if ARCH_X86_64
pop
r12
RET
%else
; x86-32
...
...
@@ -419,7 +419,7 @@ SCALE_FUNCS 10, 19, %1, %3
SCALE_FUNCS
16
,
19
,
%
1
,
%
4
%endmacro
%if
def
ARCH_X86_32
%if ARCH_X86_32
INIT_MMX
SCALE_FUNCS2
mmx
,
0
,
0
,
0
%endif
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录