Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
小白菜888
Ffmpeg
提交
3072f0cb
F
Ffmpeg
项目概览
小白菜888
/
Ffmpeg
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
F
Ffmpeg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
3072f0cb
编写于
5月 25, 2005
作者:
Z
Zoltán Hidvégi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
MMX code for (put|avg)_h264_chroma_mc8
Originally committed as revision 4305 to
svn://svn.ffmpeg.org/ffmpeg/trunk
上级
9c7154c7
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
168 addition
and
0 deletion
+168
-0
libavcodec/i386/dsputil_h264_template_mmx.c
libavcodec/i386/dsputil_h264_template_mmx.c
+140
-0
libavcodec/i386/dsputil_mmx.c
libavcodec/i386/dsputil_mmx.c
+28
-0
未找到文件。
libavcodec/i386/dsputil_h264_template_mmx.c
0 → 100644
浏览文件 @
3072f0cb
/*
* Copyright (c) 2005 Zoltan Hidvegi <hzoli -a- hzoli -d- com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/**
* MMX optimized version of (put|avg)_h264_chroma_mc8.
* H264_CHROMA_MC8_TMPL must be defined to the desired function name and
* H264_CHROMA_OP must be defined to empty for put and pavgb/pavgusb for avg.
*/
static
void
H264_CHROMA_MC8_TMPL
(
uint8_t
*
dst
/*align 8*/
,
uint8_t
*
src
/*align 1*/
,
int
stride
,
int
h
,
int
x
,
int
y
)
{
uint64_t
AA
__align8
;
uint64_t
DD
__align8
;
unsigned
long
srcos
=
(
long
)
src
&
7
;
uint64_t
sh1
__align8
=
srcos
*
8
;
uint64_t
sh2
__align8
=
56
-
sh1
;
int
i
;
assert
(
x
<
8
&&
y
<
8
&&
x
>=
0
&&
y
>=
0
);
asm
volatile
(
"movd %1, %%mm4
\n\t
"
"movd %2, %%mm6
\n\t
"
"punpcklwd %%mm4, %%mm4
\n\t
"
"punpcklwd %%mm6, %%mm6
\n\t
"
"punpckldq %%mm4, %%mm4
\n\t
"
/* mm4 = x words */
"punpckldq %%mm6, %%mm6
\n\t
"
/* mm6 = y words */
"movq %%mm4, %%mm5
\n\t
"
"pmullw %%mm6, %%mm4
\n\t
"
/* mm4 = x * y */
"psllw $3, %%mm5
\n\t
"
"psllw $3, %%mm6
\n\t
"
"movq %%mm5, %%mm7
\n\t
"
"paddw %%mm6, %%mm7
\n\t
"
"movq %%mm4, %0
\n\t
"
/* DD = x * y */
"psubw %%mm4, %%mm5
\n\t
"
/* mm5 = B = 8x - xy */
"psubw %%mm4, %%mm6
\n\t
"
/* mm6 = C = 8y - xy */
"paddw %3, %%mm4
\n\t
"
"psubw %%mm7, %%mm4
\n\t
"
/* mm4 = A = xy - (8x+8y) + 64 */
"pxor %%mm7, %%mm7
\n\t
"
:
"=m"
(
DD
)
:
"rm"
(
x
),
"rm"
(
y
),
"m"
(
sixtyfour
));
asm
volatile
(
"movq %%mm4, %0"
:
"=m"
(
AA
));
src
-=
srcos
;
asm
volatile
(
/* mm0 = src[0..7], mm1 = src[1..8] */
"movq %0, %%mm1
\n\t
"
"movq %1, %%mm0
\n\t
"
"psrlq %2, %%mm1
\n\t
"
"psllq %3, %%mm0
\n\t
"
"movq %%mm0, %%mm4
\n\t
"
"psllq $8, %%mm0
\n\t
"
"por %%mm1, %%mm0
\n\t
"
"psrlq $8, %%mm1
\n\t
"
"por %%mm4, %%mm1
\n\t
"
:
:
"m"
(
src
[
0
]),
"m"
(
src
[
8
]),
"m"
(
sh1
),
"m"
(
sh2
));
for
(
i
=
0
;
i
<
h
;
i
++
)
{
asm
volatile
(
/* [mm2,mm3] = A * src[0..7] */
"movq %%mm0, %%mm2
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"pmullw %0, %%mm2
\n\t
"
"movq %%mm0, %%mm3
\n\t
"
"punpckhbw %%mm7, %%mm3
\n\t
"
"pmullw %0, %%mm3
\n\t
"
/* [mm2,mm3] += B * src[1..8] */
"movq %%mm1, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"pmullw %%mm5, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"pmullw %%mm5, %%mm1
\n\t
"
"paddw %%mm0, %%mm2
\n\t
"
"paddw %%mm1, %%mm3
\n\t
"
:
:
"m"
(
AA
));
src
+=
stride
;
asm
volatile
(
/* mm0 = src[0..7], mm1 = src[1..8] */
"movq %0, %%mm1
\n\t
"
"movq %1, %%mm0
\n\t
"
"psrlq %2, %%mm1
\n\t
"
"psllq %3, %%mm0
\n\t
"
"movq %%mm0, %%mm4
\n\t
"
"psllq $8, %%mm0
\n\t
"
"por %%mm1, %%mm0
\n\t
"
"psrlq $8, %%mm1
\n\t
"
"por %%mm4, %%mm1
\n\t
"
:
:
"m"
(
src
[
0
]),
"m"
(
src
[
8
]),
"m"
(
sh1
),
"m"
(
sh2
));
asm
volatile
(
/* [mm2,mm3] += C * src[0..7] */
"movq %mm0, %mm4
\n\t
"
"punpcklbw %mm7, %mm4
\n\t
"
"pmullw %mm6, %mm4
\n\t
"
"paddw %mm4, %mm2
\n\t
"
"movq %mm0, %mm4
\n\t
"
"punpckhbw %mm7, %mm4
\n\t
"
"pmullw %mm6, %mm4
\n\t
"
"paddw %mm4, %mm3
\n\t
"
);
asm
volatile
(
/* [mm2,mm3] += D * src[1..8] */
"movq %%mm1, %%mm4
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
"pmullw %0, %%mm4
\n\t
"
"paddw %%mm4, %%mm2
\n\t
"
"movq %%mm1, %%mm4
\n\t
"
"punpckhbw %%mm7, %%mm4
\n\t
"
"pmullw %0, %%mm4
\n\t
"
"paddw %%mm4, %%mm3
\n\t
"
:
:
"m"
(
DD
));
asm
volatile
(
/* dst[0..7] = pack(([mm2,mm3] + 32) >> 6) */
"paddw %1, %%mm2
\n\t
"
"paddw %1, %%mm3
\n\t
"
"psrlw $6, %%mm2
\n\t
"
"psrlw $6, %%mm3
\n\t
"
"packuswb %%mm3, %%mm2
\n\t
"
H264_CHROMA_OP
(
%
0
,
%%
mm2
)
"movq %%mm2, %0
\n\t
"
:
"=m"
(
dst
[
0
])
:
"m"
(
thirtytwo
));
dst
+=
stride
;
}
}
libavcodec/i386/dsputil_mmx.c
浏览文件 @
3072f0cb
...
...
@@ -3078,6 +3078,30 @@ H264_MC(avg_, 4, mmx2)
H264_MC
(
avg_
,
8
,
mmx2
)
H264_MC
(
avg_
,
16
,
mmx2
)
/** These are used by *_h264_chroma_mc8_* */
static
const
uint64_t
thirtytwo
__align8
=
0x0020002000200020ULL
;
static
const
uint64_t
sixtyfour
__align8
=
0x0040004000400040ULL
;
#define H264_CHROMA_OP(S,D)
#define H264_CHROMA_MC8_TMPL put_h264_chroma_mc8_mmx
#include "dsputil_h264_template_mmx.c"
#undef H264_CHROMA_OP
#undef H264_CHROMA_MC8_TMPL
#define H264_CHROMA_OP(S,D) "pavgb " #S ", " #D " \n\t"
#define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_mmx2
#include "dsputil_h264_template_mmx.c"
#undef H264_CHROMA_OP
#undef H264_CHROMA_MC8_TMPL
#define H264_CHROMA_OP(S,D) "pavgusb " #S ", " #D " \n\t"
#define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_3dnow
#include "dsputil_h264_template_mmx.c"
#undef H264_CHROMA_OP
#undef H264_CHROMA_MC8_TMPL
#if 0
static void just_return() { return; }
#endif
...
...
@@ -3377,6 +3401,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c
->
h263_v_loop_filter
=
h263_v_loop_filter_mmx
;
c
->
h263_h_loop_filter
=
h263_h_loop_filter_mmx
;
c
->
put_h264_chroma_pixels_tab
[
0
]
=
put_h264_chroma_mc8_mmx
;
if
(
mm_flags
&
MM_MMXEXT
)
{
c
->
put_pixels_tab
[
0
][
1
]
=
put_pixels16_x2_mmx2
;
...
...
@@ -3473,6 +3498,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
dspfunc
(
avg_h264_qpel
,
2
,
4
);
#undef dspfunc
c
->
avg_h264_chroma_pixels_tab
[
0
]
=
avg_h264_chroma_mc8_mmx2
;
c
->
h264_v_loop_filter_luma
=
h264_v_loop_filter_luma_mmx2
;
c
->
h264_h_loop_filter_luma
=
h264_h_loop_filter_luma_mmx2
;
c
->
h264_v_loop_filter_chroma
=
h264_v_loop_filter_chroma_mmx2
;
...
...
@@ -3564,6 +3590,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
dspfunc
(
avg_h264_qpel
,
0
,
16
);
dspfunc
(
avg_h264_qpel
,
1
,
8
);
dspfunc
(
avg_h264_qpel
,
2
,
4
);
c
->
avg_h264_chroma_pixels_tab
[
0
]
=
avg_h264_chroma_mc8_3dnow
;
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录