Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
小白菜888
Ffmpeg
提交
decd5193
F
Ffmpeg
项目概览
小白菜888
/
Ffmpeg
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
F
Ffmpeg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
decd5193
编写于
3月 10, 2015
作者:
C
Christophe Gisquet
提交者:
Michael Niedermayer
3月 14, 2015
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
x86: xvid_idct: merged idct_put SSE2 versions
Signed-off-by:
N
Michael Niedermayer
<
michaelni@gmx.at
>
上级
8200575d
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
140 addition
and
70 deletion
+140
-70
libavcodec/x86/xvididct.asm
libavcodec/x86/xvididct.asm
+138
-64
libavcodec/x86/xvididct_init.c
libavcodec/x86/xvididct_init.c
+2
-6
未找到文件。
libavcodec/x86/xvididct.asm
浏览文件 @
decd5193
...
@@ -292,13 +292,13 @@ SECTION .text
...
@@ -292,13 +292,13 @@ SECTION .text
%define TAN3 xmm13
%define TAN3 xmm13
%define TAN1 xmm14
%define TAN1 xmm14
%else
%else
%define ROW0 [
r0
+ 0*16]
%define ROW0 [
BLOCK
+ 0*16]
%define REG0 xmm4
%define REG0 xmm4
%define ROW2 [
r0
+ 2*16]
%define ROW2 [
BLOCK
+ 2*16]
%define REG2 xmm4
%define REG2 xmm4
%define ROW4 [
r0
+ 4*16]
%define ROW4 [
BLOCK
+ 4*16]
%define REG4 xmm6
%define REG4 xmm6
%define ROW6 [
r0
+ 6*16]
%define ROW6 [
BLOCK
+ 6*16]
%define REG6 xmm6
%define REG6 xmm6
%define XMMS xmm2
%define XMMS xmm2
%define SREG2 xmm7
%define SREG2 xmm7
...
@@ -369,8 +369,71 @@ SECTION .text
...
@@ -369,8 +369,71 @@ SECTION .text
movdqa
TAN1
,
[
tan1
]
movdqa
TAN1
,
[
tan1
]
%endmacro
%endmacro
%macro FIRST_HALF 2
; %1=dct %2=type(normal,add,put)
psraw
xmm5
,
6
psraw
REG0
,
6
psraw
TAN3
,
6
psraw
xmm3
,
6
; dct coeffs must still be written for AC prediction
%if %2 == 0
movdqa
[
%
1
+
1
*
16
],
TAN3
movdqa
[
%
1
+
2
*
16
],
xmm3
movdqa
[
%
1
+
5
*
16
],
REG0
movdqa
[
%
1
+
6
*
16
],
xmm5
%else
; Must now load args as gprs are no longer used for masks
; DEST is set to where address of dest was loaded
%if ARCH_X86_32
%xdefine DEST r2q
; BLOCK is r0, stride r1
movifnidn
DEST
,
destm
movifnidn
strideq
,
stridem
%else
%xdefine DEST r0q
%endif
lea
r3q
,
[
3
*
strideq
]
%if %2 == 1
packuswb
TAN3
,
xmm3
packuswb
xmm5
,
REG0
movq
[
DEST
+
strideq
],
TAN3
movhps
[
DEST
+
2
*
strideq
],
TAN3
; REG0 and TAN3 are now available (and likely used in second half)
%else
%warning Unimplemented
%endif
%endif
%endmacro
%macro SECOND_HALF 6
; %1=dct %2=type(normal,add,put) 3-6: xmms
psraw
%
3
,
6
psraw
%
4
,
6
psraw
%
5
,
6
psraw
%
6
,
6
; dct coeffs must still be written for AC prediction
%if %2 == 0
movdqa
[
%
1
+
0
*
16
],
%
3
movdqa
[
%
1
+
3
*
16
],
%
5
movdqa
[
%
1
+
4
*
16
],
%
6
movdqa
[
%
1
+
7
*
16
],
%
4
%elif %2 == 1
packuswb
%
3
,
%
5
packuswb
%
6
,
%
4
; address of dest may have been loaded
movq
[
DEST
],
%
3
movhps
[
DEST
+
r3q
],
%
3
lea
DEST
,
[
DEST
+
4
*
strideq
]
movq
[
DEST
],
%
6
movhps
[
DEST
+
r3q
],
%
6
; and now write remainder of first half
movq
[
DEST
+
2
*
strideq
],
xmm5
movhps
[
DEST
+
strideq
],
xmm5
%elif %2 == 2
%warning Unimplemented
%endif
%endmacro
; IDCT pass on columns.
; IDCT pass on columns.
%macro iLLM_PASS
1
;dct
%macro iLLM_PASS
2
; %1=dct %2=type(normal,add,put)
movdqa
xmm1
,
TAN3
movdqa
xmm1
,
TAN3
movdqa
xmm3
,
TAN1
movdqa
xmm3
,
TAN1
pmulhw
TAN3
,
xmm4
pmulhw
TAN3
,
xmm4
...
@@ -407,7 +470,7 @@ SECTION .text
...
@@ -407,7 +470,7 @@ SECTION .text
psubsw
xmm5
,
REG6
psubsw
xmm5
,
REG6
MOV32
ROW0
,
REG0
MOV32
ROW0
,
REG0
MOV32
ROW4
,
REG4
MOV32
ROW4
,
REG4
MOV32
TAN1
,
[
r0
]
MOV32
TAN1
,
[
BL
OCK
]
movdqa
XMMS
,
REG0
movdqa
XMMS
,
REG0
psubsw
REG0
,
REG4
psubsw
REG0
,
REG4
paddsw
REG4
,
XMMS
paddsw
REG4
,
XMMS
...
@@ -423,33 +486,22 @@ SECTION .text
...
@@ -423,33 +486,22 @@ SECTION .text
movdqa
XMMS
,
REG0
movdqa
XMMS
,
REG0
psubsw
REG0
,
xmm3
psubsw
REG0
,
xmm3
paddsw
xmm3
,
XMMS
paddsw
xmm3
,
XMMS
MOV32
[
r0
],
TAN1
MOV32
[
BL
OCK
],
TAN1
psraw
xmm5
,
6
psraw
REG0
,
6
FIRST_HALF
%
1
,
%
2
psraw
TAN3
,
6
psraw
xmm3
,
6
movdqa
[
%
1
+
1
*
16
],
TAN3
movdqa
[
%
1
+
2
*
16
],
xmm3
movdqa
[
%
1
+
5
*
16
],
REG0
movdqa
[
%
1
+
6
*
16
],
xmm5
movdqa
xmm0
,
xmm7
movdqa
xmm0
,
xmm7
movdqa
xmm4
,
REG4
movdqa
xmm4
,
REG4
psubsw
xmm7
,
xmm1
psubsw
xmm7
,
xmm1
psubsw
REG4
,
TAN1
psubsw
REG4
,
TAN1
paddsw
xmm1
,
xmm0
paddsw
xmm1
,
xmm0
paddsw
TAN1
,
xmm4
paddsw
TAN1
,
xmm4
psraw
xmm1
,
6
psraw
xmm7
,
6
SECOND_HALF
%
1
,
%
2
,
xmm1
,
xmm7
,
TAN1
,
REG4
psraw
TAN1
,
6
psraw
REG4
,
6
movdqa
[
%
1
+
0
*
16
],
xmm1
movdqa
[
%
1
+
3
*
16
],
TAN1
movdqa
[
%
1
+
4
*
16
],
REG4
movdqa
[
%
1
+
7
*
16
],
xmm7
%endmacro
%endmacro
; IDCT pass on columns, assuming rows 4-7 are zero
; IDCT pass on columns, assuming rows 4-7 are zero
%macro iLLM_PASS_SPARSE
1
;dct
%macro iLLM_PASS_SPARSE
2
; %1=dct %2=type(normal,put,add)
pmulhw
TAN3
,
xmm4
pmulhw
TAN3
,
xmm4
paddsw
TAN3
,
xmm4
paddsw
TAN3
,
xmm4
movdqa
xmm3
,
xmm6
movdqa
xmm3
,
xmm6
...
@@ -475,7 +527,7 @@ SECTION .text
...
@@ -475,7 +527,7 @@ SECTION .text
movdqa
xmm6
,
REG0
movdqa
xmm6
,
REG0
psubsw
xmm6
,
SREG2
psubsw
xmm6
,
SREG2
paddsw
SREG2
,
REG0
paddsw
SREG2
,
REG0
MOV32
TAN1
,
[
r0
]
MOV32
TAN1
,
[
BL
OCK
]
movdqa
XMMS
,
REG0
movdqa
XMMS
,
REG0
psubsw
REG0
,
xmm5
psubsw
REG0
,
xmm5
paddsw
xmm5
,
XMMS
paddsw
xmm5
,
XMMS
...
@@ -485,70 +537,92 @@ SECTION .text
...
@@ -485,70 +537,92 @@ SECTION .text
movdqa
XMMS
,
REG0
movdqa
XMMS
,
REG0
psubsw
REG0
,
xmm3
psubsw
REG0
,
xmm3
paddsw
xmm3
,
XMMS
paddsw
xmm3
,
XMMS
MOV32
[
r0
],
TAN1
MOV32
[
BL
OCK
],
TAN1
psraw
xmm5
,
6
psraw
REG0
,
6
FIRST_HALF
%
1
,
%
2
psraw
TAN3
,
6
psraw
xmm3
,
6
movdqa
[
%
1
+
1
*
16
],
TAN3
movdqa
[
%
1
+
2
*
16
],
xmm3
movdqa
[
%
1
+
5
*
16
],
REG0
movdqa
[
%
1
+
6
*
16
],
xmm5
movdqa
xmm0
,
SREG2
movdqa
xmm0
,
SREG2
movdqa
xmm4
,
xmm6
movdqa
xmm4
,
xmm6
psubsw
SREG2
,
xmm1
psubsw
SREG2
,
xmm1
psubsw
xmm6
,
TAN1
psubsw
xmm6
,
TAN1
paddsw
xmm1
,
xmm0
paddsw
xmm1
,
xmm0
paddsw
TAN1
,
xmm4
paddsw
TAN1
,
xmm4
psraw
xmm1
,
6
psraw
SREG2
,
6
SECOND_HALF
%
1
,
%
2
,
xmm1
,
SREG2
,
TAN1
,
xmm6
psraw
TAN1
,
6
psraw
xmm6
,
6
movdqa
[
%
1
+
0
*
16
],
xmm1
movdqa
[
%
1
+
3
*
16
],
TAN1
movdqa
[
%
1
+
4
*
16
],
xmm6
movdqa
[
%
1
+
7
*
16
],
SREG2
%endmacro
%endmacro
INIT_XMM
ss
e2
%macro IDCT_SSE2 1
; 0=normal 1=put 2=add
cglobal
xvid_idct
,
1
,
5
,
8
+
7
*
ARCH_X86_64
,
bl
ock
%if %1 == 0 || ARCH_X86_32
%define GPR0 r1d
%define GPR1 r2d
%define GPR2 r3d
%define GPR3 r4d
%define NUM_GPRS 5
%else
%define GPR0 r3d
%define GPR1 r4d
%define GPR2 r5d
%define GPR3 r6d
%define NUM_GPRS 7
%endif
%if %1 == 0
cglobal
xvid_idct
,
1
,
NUM_GPRS
,
8
+
7
*
ARCH_X86_64
,
bl
ock
%xdefine BLOCK blockq
%else
%if %1 == 1
cglobal
xvid_idct_put
,
0
,
NUM_GPRS
,
8
+
7
*
ARCH_X86_64
,
dest
,
stride
,
bl
ock
%else
cglobal
xvid_idct_add
,
0
,
NUM_GPRS
,
8
+
7
*
ARCH_X86_64
,
dest
,
stride
,
bl
ock
%endif
%if ARCH_X86_64
%xdefine BLOCK blockq
%else
mov
r0q
,
bl
ockm
%xdefine BLOCK r0q
%endif
%endif
movq
mm0
,
[
pb_127
]
movq
mm0
,
[
pb_127
]
iMTX_MULT
r0
+
0
*
16
,
iTab1
,
PUT_EVEN
,
ROW0
,
0
*
16
iMTX_MULT
BL
OCK
+
0
*
16
,
iTab1
,
PUT_EVEN
,
ROW0
,
0
*
16
iMTX_MULT
r0
+
1
*
16
,
iTab2
,
PUT_ODD
,
ROW1
,
1
*
16
iMTX_MULT
BL
OCK
+
1
*
16
,
iTab2
,
PUT_ODD
,
ROW1
,
1
*
16
iMTX_MULT
r0
+
2
*
16
,
iTab3
,
PUT_EVEN
,
ROW2
,
2
*
16
iMTX_MULT
BL
OCK
+
2
*
16
,
iTab3
,
PUT_EVEN
,
ROW2
,
2
*
16
TEST_TWO_ROWS
r0
+
3
*
16
,
r0
+
4
*
16
,
r1d
,
r2d
,
CL
EAR_ODD
,
ROW3
,
CL
EAR_EVEN
,
ROW4
; a, c
TEST_TWO_ROWS
BL
OCK
+
3
*
16
,
BL
OCK
+
4
*
16
,
GPR0
,
GPR1
,
CL
EAR_ODD
,
ROW3
,
CL
EAR_EVEN
,
ROW4
; a, c
JZ
r1d
,
col1
JZ
GPR0
,
col1
iMTX_MULT
r0
+
3
*
16
,
iTab4
,
PUT_ODD
,
ROW3
,
3
*
16
iMTX_MULT
BL
OCK
+
3
*
16
,
iTab4
,
PUT_ODD
,
ROW3
,
3
*
16
.col1:
.col1:
TEST_TWO_ROWS
r0
+
5
*
16
,
r0
+
6
*
16
,
r1d
,
r3d
,
CL
EAR_ODD
,
ROW5
,
CL
EAR_EVEN
,
ROW6
; a, d
TEST_TWO_ROWS
BL
OCK
+
5
*
16
,
BL
OCK
+
6
*
16
,
GPR0
,
GPR2
,
CL
EAR_ODD
,
ROW5
,
CL
EAR_EVEN
,
ROW6
; a, d
TEST_ONE_ROW
r0
+
7
*
16
,
r4d
,
CL
EAR_ODD
,
ROW7
; esi
TEST_ONE_ROW
BL
OCK
+
7
*
16
,
GPR3
,
CL
EAR_ODD
,
ROW7
; esi
iLLM_HEAD
iLLM_HEAD
JNZ
r2d
,
2
JNZ
GPR1
,
2
JNZ
r1d
,
3
JNZ
GPR0
,
3
JNZ
r3d
,
4
JNZ
GPR2
,
4
JNZ
r4d
,
5
JNZ
GPR3
,
5
iLLM_PASS_SPARSE
r0
iLLM_PASS_SPARSE
BL
OCK
,
%
1
jmp
.6
jmp
.6
.2:
.2:
iMTX_MULT
r0
+
4
*
16
,
iTab1
,
PUT_EVEN
,
ROW4
iMTX_MULT
BL
OCK
+
4
*
16
,
iTab1
,
PUT_EVEN
,
ROW4
.3:
.3:
iMTX_MULT
r0
+
5
*
16
,
iTab4
,
PUT_ODD
,
ROW5
,
4
*
16
iMTX_MULT
BL
OCK
+
5
*
16
,
iTab4
,
PUT_ODD
,
ROW5
,
4
*
16
JZ
r3d
,
col2
JZ
GPR2
,
col2
.4:
.4:
iMTX_MULT
r0
+
6
*
16
,
iTab3
,
PUT_EVEN
,
ROW6
,
5
*
16
iMTX_MULT
BL
OCK
+
6
*
16
,
iTab3
,
PUT_EVEN
,
ROW6
,
5
*
16
.col2:
.col2:
JZ
r4d
,
col3
JZ
GPR3
,
col3
.5:
.5:
iMTX_MULT
r0
+
7
*
16
,
iTab2
,
PUT_ODD
,
ROW7
,
5
*
16
iMTX_MULT
BL
OCK
+
7
*
16
,
iTab2
,
PUT_ODD
,
ROW7
,
5
*
16
.col3:
.col3:
%if ARCH_X86_32
%if ARCH_X86_32
iLLM_HEAD
iLLM_HEAD
%endif
%endif
iLLM_PASS
r0
iLLM_PASS
BL
OCK
,
%
1
.6:
.6:
RET
RET
%endmacro
INIT_XMM
ss
e2
IDCT_SSE2
0
IDCT_SSE2
1
%if ARCH_X86_32
%if ARCH_X86_32
...
...
libavcodec/x86/xvididct_init.c
浏览文件 @
decd5193
...
@@ -26,11 +26,7 @@
...
@@ -26,11 +26,7 @@
#include "idctdsp.h"
#include "idctdsp.h"
#include "xvididct.h"
#include "xvididct.h"
static
void
xvid_idct_sse2_put
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
)
void
ff_xvid_idct_put_sse2
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
);
{
ff_xvid_idct_sse2
(
block
);
ff_put_pixels_clamped
(
block
,
dest
,
line_size
);
}
static
void
xvid_idct_sse2_add
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
)
static
void
xvid_idct_sse2_add
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
)
{
{
...
@@ -91,7 +87,7 @@ av_cold void ff_xvid_idct_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
...
@@ -91,7 +87,7 @@ av_cold void ff_xvid_idct_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
#endif
#endif
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
c
->
idct_put
=
xvid_idct_sse2_put
;
c
->
idct_put
=
ff_xvid_idct_put_sse2
;
c
->
idct_add
=
xvid_idct_sse2_add
;
c
->
idct_add
=
xvid_idct_sse2_add
;
c
->
idct
=
ff_xvid_idct_sse2
;
c
->
idct
=
ff_xvid_idct_sse2
;
c
->
perm_type
=
FF_IDCT_PERM_SSE2
;
c
->
perm_type
=
FF_IDCT_PERM_SSE2
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录