Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
小白菜888
Ffmpeg
提交
8fd19ab2
F
Ffmpeg
项目概览
小白菜888
/
Ffmpeg
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
F
Ffmpeg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
8fd19ab2
编写于
1月 29, 2004
作者:
M
Michael Niedermayer
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
SSE2 fdct by (Balatoni Denes <pnis at coder dot hu>)
Originally committed as revision 2729 to
svn://svn.ffmpeg.org/ffmpeg/trunk
上级
5a603607
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
228 addition
and
9 deletion
+228
-9
libavcodec/dsputil.h
libavcodec/dsputil.h
+1
-0
libavcodec/i386/dsputil_mmx.c
libavcodec/i386/dsputil_mmx.c
+3
-1
libavcodec/i386/fdct_mmx.c
libavcodec/i386/fdct_mmx.c
+215
-7
libavcodec/i386/mpegvideo_mmx.c
libavcodec/i386/mpegvideo_mmx.c
+9
-1
未找到文件。
libavcodec/dsputil.h
浏览文件 @
8fd19ab2
...
...
@@ -45,6 +45,7 @@ void j_rev_dct (DCTELEM *data);
void
ff_fdct_mmx
(
DCTELEM
*
block
);
void
ff_fdct_mmx2
(
DCTELEM
*
block
);
void
ff_fdct_sse2
(
DCTELEM
*
block
);
/* encoding scans */
extern
const
uint8_t
ff_alternate_horizontal_scan
[
64
];
...
...
libavcodec/i386/dsputil_mmx.c
浏览文件 @
8fd19ab2
...
...
@@ -2032,7 +2032,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#ifdef CONFIG_ENCODERS
if
(
dct_algo
==
FF_DCT_AUTO
||
dct_algo
==
FF_DCT_MMX
){
if
(
mm_flags
&
MM_MMXEXT
){
if
(
mm_flags
&
MM_SSE2
){
c
->
fdct
=
ff_fdct_sse2
;
}
else
if
(
mm_flags
&
MM_MMXEXT
){
c
->
fdct
=
ff_fdct_mmx2
;
}
else
{
c
->
fdct
=
ff_fdct_mmx
;
...
...
libavcodec/i386/fdct_mmx.c
浏览文件 @
8fd19ab2
...
...
@@ -2,11 +2,16 @@
* MMX optimized forward DCT
* The gcc porting is Copyright (c) 2001 Fabrice Bellard.
* cleanup/optimizations are Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
* SSE2 optimization is Copyright (c) 2004 Denes Balatoni.
*
* from fdctam32.c - AP922 MMX(3D-Now) forward-DCT
*
* Intel Application Note AP-922 - fast, precise implementation of DCT
* http://developer.intel.com/vtune/cbts/appnotes.htm
*
* Also of inspiration:
* a page about fdct at http://www.geocities.com/ssavekar/dct.htm
* Skal's fdct at http://skal.planet-d.net/coding/dct.html
*/
#include "../common.h"
#include "mmx.h"
...
...
@@ -27,10 +32,8 @@
#define BITS_FRW_ACC 3 //; 2 or 3 for accuracy
#define SHIFT_FRW_COL BITS_FRW_ACC
#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3)
//#define RND_FRW_ROW (262144 * (BITS_FRW_ACC - 1)) //; 1 << (SHIFT_FRW_ROW-1)
#define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1))
//#define RND_FRW_COL (2 * (BITS_FRW_ACC - 1)) //; 1 << (SHIFT_FRW_COL-1)
#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1))
//#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1))
//concatenated table, for forward DCT transformation
static
const
int16_t
fdct_tg_all_16
[]
ATTR_ALIGN
(
8
)
=
{
...
...
@@ -38,17 +41,17 @@ static const int16_t fdct_tg_all_16[] ATTR_ALIGN(8) = {
27146
,
27146
,
27146
,
27146
,
// tg * (2<<16) + 0.5
-
21746
,
-
21746
,
-
21746
,
-
21746
,
// tg * (2<<16) + 0.5
};
static
const
int16_t
cos_4_16
[
4
]
ATTR_ALIGN
(
8
)
=
{
-
19195
,
-
19195
,
-
19195
,
-
19195
,
//cos * (2<<16) + 0.5
};
static
const
int16_t
ocos_4_16
[
4
]
ATTR_ALIGN
(
8
)
=
{
23170
,
23170
,
23170
,
23170
,
//cos * (2<<15) + 0.5
};
static
const
long
long
fdct_one_corr
ATTR_ALIGN
(
8
)
=
0x0001000100010001LL
;
static
const
long
long
fdct_one_corr
ATTR_ALIGN
(
8
)
=
0x0001000100010001LL
;
static
const
long
fdct_r_row
[
2
]
ATTR_ALIGN
(
8
)
=
{
RND_FRW_ROW
,
RND_FRW_ROW
};
static
const
long
fdct_r_row_sse2
[
4
]
ATTR_ALIGN
(
16
)
=
{
RND_FRW_ROW
,
RND_FRW_ROW
,
RND_FRW_ROW
,
RND_FRW_ROW
};
static
const
int16_t
tab_frw_01234567
[]
ATTR_ALIGN
(
8
)
=
{
// forward_dct coeff table
16384
,
16384
,
-
8867
,
-
21407
,
16384
,
16384
,
21407
,
8867
,
...
...
@@ -123,6 +126,133 @@ static const int16_t tab_frw_01234567[] ATTR_ALIGN(8) = { // forward_dct coeff
6270
,
26722
,
6270
,
-
17855
,
};
static
const
int16_t
tab_frw_01234567_sse2
[]
ATTR_ALIGN
(
16
)
=
{
// forward_dct coeff table
#define TABLE_SSE2 C4, C4, C1, C3, -C6, -C2, -C1, -C5, \
C4, C4, C5, C7, C2, C6, C3, -C7, \
-C4, C4, C7, C3, C6, -C2, C7, -C5, \
C4, -C4, C5, -C1, C2, -C6, C3, -C1,
// c1..c7 * cos(pi/4) * 2^15
#define C1 22725
#define C2 21407
#define C3 19266
#define C4 16384
#define C5 12873
#define C6 8867
#define C7 4520
TABLE_SSE2
#undef C1
#undef C2
#undef C3
#undef C4
#undef C5
#undef C6
#undef C7
#define C1 31521
#define C2 29692
#define C3 26722
#define C4 22725
#define C5 17855
#define C6 12299
#define C7 6270
TABLE_SSE2
#undef C1
#undef C2
#undef C3
#undef C4
#undef C5
#undef C6
#undef C7
#define C1 29692
#define C2 27969
#define C3 25172
#define C4 21407
#define C5 16819
#define C6 11585
#define C7 5906
TABLE_SSE2
#undef C1
#undef C2
#undef C3
#undef C4
#undef C5
#undef C6
#undef C7
#define C1 26722
#define C2 25172
#define C3 22654
#define C4 19266
#define C5 15137
#define C6 10426
#define C7 5315
TABLE_SSE2
#undef C1
#undef C2
#undef C3
#undef C4
#undef C5
#undef C6
#undef C7
#define C1 22725
#define C2 21407
#define C3 19266
#define C4 16384
#define C5 12873
#define C6 8867
#define C7 4520
TABLE_SSE2
#undef C1
#undef C2
#undef C3
#undef C4
#undef C5
#undef C6
#undef C7
#define C1 26722
#define C2 25172
#define C3 22654
#define C4 19266
#define C5 15137
#define C6 10426
#define C7 5315
TABLE_SSE2
#undef C1
#undef C2
#undef C3
#undef C4
#undef C5
#undef C6
#undef C7
#define C1 29692
#define C2 27969
#define C3 25172
#define C4 21407
#define C5 16819
#define C6 11585
#define C7 5906
TABLE_SSE2
#undef C1
#undef C2
#undef C3
#undef C4
#undef C5
#undef C6
#undef C7
#define C1 31521
#define C2 29692
#define C3 26722
#define C4 22725
#define C5 17855
#define C6 12299
#define C7 6270
TABLE_SSE2
};
static
always_inline
void
fdct_col
(
const
int16_t
*
in
,
int16_t
*
out
,
int
offset
)
{
...
...
@@ -203,6 +333,69 @@ static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
movq_r2m
(
mm3
,
*
(
out
+
offset
+
7
*
8
));
}
static
always_inline
void
fdct_row_sse2
(
const
int16_t
*
in
,
int16_t
*
out
)
{
asm
volatile
(
".macro FDCT_ROW_SSE2_H1 i t
\n\t
"
"movq
\\
i(%0), %%xmm2
\n\t
"
"movq
\\
i+8(%0), %%xmm0
\n\t
"
"movdqa
\\
t+32(%1), %%xmm3
\n\t
"
"movdqa
\\
t+48(%1), %%xmm7
\n\t
"
"movdqa
\\
t(%1), %%xmm4
\n\t
"
"movdqa
\\
t+16(%1), %%xmm5
\n\t
"
".endm
\n\t
"
".macro FDCT_ROW_SSE2_H2 i t
\n\t
"
"movq
\\
i(%0), %%xmm2
\n\t
"
"movq
\\
i+8(%0), %%xmm0
\n\t
"
"movdqa
\\
t+32(%1), %%xmm3
\n\t
"
"movdqa
\\
t+48(%1), %%xmm7
\n\t
"
".endm
\n\t
"
".macro FDCT_ROW_SSE2 i
\n\t
"
"movq %%xmm2, %%xmm1
\n\t
"
"pshuflw $27, %%xmm0, %%xmm0
\n\t
"
"paddsw %%xmm0, %%xmm1
\n\t
"
"psubsw %%xmm0, %%xmm2
\n\t
"
"punpckldq %%xmm2, %%xmm1
\n\t
"
"pshufd $78, %%xmm1, %%xmm2
\n\t
"
"pmaddwd %%xmm2, %%xmm3
\n\t
"
"pmaddwd %%xmm1, %%xmm7
\n\t
"
"pmaddwd %%xmm5, %%xmm2
\n\t
"
"pmaddwd %%xmm4, %%xmm1
\n\t
"
"paddd %%xmm7, %%xmm3
\n\t
"
"paddd %%xmm2, %%xmm1
\n\t
"
"paddd %%xmm6, %%xmm3
\n\t
"
"paddd %%xmm6, %%xmm1
\n\t
"
"psrad %3, %%xmm3
\n\t
"
"psrad %3, %%xmm1
\n\t
"
"packssdw %%xmm3, %%xmm1
\n\t
"
"movdqa %%xmm1,
\\
i(%4)
\n\t
"
".endm
\n\t
"
"movdqa (%2), %%xmm6
\n\t
"
"FDCT_ROW_SSE2_H1 0 0
\n\t
"
"FDCT_ROW_SSE2 0
\n\t
"
"FDCT_ROW_SSE2_H2 64 0
\n\t
"
"FDCT_ROW_SSE2 64
\n\t
"
"FDCT_ROW_SSE2_H1 16 64
\n\t
"
"FDCT_ROW_SSE2 16
\n\t
"
"FDCT_ROW_SSE2_H2 112 64
\n\t
"
"FDCT_ROW_SSE2 112
\n\t
"
"FDCT_ROW_SSE2_H1 32 128
\n\t
"
"FDCT_ROW_SSE2 32
\n\t
"
"FDCT_ROW_SSE2_H2 96 128
\n\t
"
"FDCT_ROW_SSE2 96
\n\t
"
"FDCT_ROW_SSE2_H1 48 192
\n\t
"
"FDCT_ROW_SSE2 48
\n\t
"
"FDCT_ROW_SSE2_H2 80 192
\n\t
"
"FDCT_ROW_SSE2 80
\n\t
"
:
:
"r"
(
in
),
"r"
(
tab_frw_01234567_sse2
),
"r"
(
fdct_r_row_sse2
),
"i"
(
SHIFT_FRW_ROW
),
"r"
(
out
)
);
}
static
always_inline
void
fdct_row_mmx2
(
const
int16_t
*
in
,
int16_t
*
out
,
const
int16_t
*
table
)
{
pshufw_m2r
(
*
(
in
+
4
),
mm5
,
0x1B
);
...
...
@@ -341,3 +534,18 @@ void ff_fdct_mmx2(int16_t *block)
out
+=
8
;
}
}
void
ff_fdct_sse2
(
int16_t
*
block
)
{
int64_t
align_tmp
[
16
]
ATTR_ALIGN
(
8
);
int16_t
*
const
block_tmp
=
(
int16_t
*
)
align_tmp
;
int16_t
*
block1
;
int
i
;
block1
=
block_tmp
;
fdct_col
(
block
,
block1
,
0
);
fdct_col
(
block
,
block1
,
4
);
fdct_row_sse2
(
block1
,
block
);
}
libavcodec/i386/mpegvideo_mmx.c
浏览文件 @
8fd19ab2
...
...
@@ -683,6 +683,12 @@ static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
#define RENAMEl(a) a ## _mmx2
#include "mpegvideo_mmx_template.c"
#undef RENAME
#undef RENAMEl
#define RENAME(a) a ## _SSE2
#define RENAMEl(a) a ## _sse2
#include "mpegvideo_mmx_template.c"
void
MPV_common_init_mmx
(
MpegEncContext
*
s
)
{
if
(
mm_flags
&
MM_MMX
)
{
...
...
@@ -704,7 +710,9 @@ void MPV_common_init_mmx(MpegEncContext *s)
}
if
(
dct_algo
==
FF_DCT_AUTO
||
dct_algo
==
FF_DCT_MMX
){
if
(
mm_flags
&
MM_MMXEXT
){
if
(
mm_flags
&
MM_SSE2
){
s
->
dct_quantize
=
dct_quantize_SSE2
;
}
else
if
(
mm_flags
&
MM_MMXEXT
){
s
->
dct_quantize
=
dct_quantize_MMX2
;
}
else
{
s
->
dct_quantize
=
dct_quantize_MMX
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录