Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
小白菜888
Ffmpeg
提交
ef9d1d15
F
Ffmpeg
项目概览
小白菜888
/
Ffmpeg
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
F
Ffmpeg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
ef9d1d15
编写于
2月 10, 2006
作者:
L
Loren Merritt
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
h264: special case dc-only idct. ~1% faster overall
Originally committed as revision 4971 to
svn://svn.ffmpeg.org/ffmpeg/trunk
上级
a283db39
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
173 addition
and
38 deletion
+173
-38
libavcodec/dsputil.c
libavcodec/dsputil.c
+2
-0
libavcodec/dsputil.h
libavcodec/dsputil.h
+4
-0
libavcodec/h264.c
libavcodec/h264.c
+59
-38
libavcodec/h264idct.c
libavcodec/h264idct.c
+25
-0
libavcodec/i386/dsputil_mmx.c
libavcodec/i386/dsputil_mmx.c
+2
-0
libavcodec/i386/h264dsp_mmx.c
libavcodec/i386/h264dsp_mmx.c
+81
-0
未找到文件。
libavcodec/dsputil.c
浏览文件 @
ef9d1d15
...
...
@@ -3851,6 +3851,8 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c
->
h264_idct_add
=
ff_h264_idct_add_c
;
c
->
h264_idct8_add
=
ff_h264_idct8_add_c
;
c
->
h264_idct_dc_add
=
ff_h264_idct_dc_add_c
;
c
->
h264_idct8_dc_add
=
ff_h264_idct8_dc_add_c
;
c
->
get_pixels
=
get_pixels_c
;
c
->
diff_pixels
=
diff_pixels_c
;
...
...
libavcodec/dsputil.h
浏览文件 @
ef9d1d15
...
...
@@ -52,6 +52,8 @@ void ff_fdct_sse2(DCTELEM *block);
void
ff_h264_idct8_add_c
(
uint8_t
*
dst
,
DCTELEM
*
block
,
int
stride
);
void
ff_h264_idct_add_c
(
uint8_t
*
dst
,
DCTELEM
*
block
,
int
stride
);
void
ff_h264_idct8_dc_add_c
(
uint8_t
*
dst
,
DCTELEM
*
block
,
int
stride
);
void
ff_h264_idct_dc_add_c
(
uint8_t
*
dst
,
DCTELEM
*
block
,
int
stride
);
void
ff_h264_lowres_idct_add_c
(
uint8_t
*
dst
,
int
stride
,
DCTELEM
*
block
);
void
ff_h264_lowres_idct_put_c
(
uint8_t
*
dst
,
int
stride
,
DCTELEM
*
block
);
...
...
@@ -330,6 +332,8 @@ typedef struct DSPContext {
void
(
*
h264_idct_add
)(
uint8_t
*
dst
,
DCTELEM
*
block
,
int
stride
);
void
(
*
h264_idct8_add
)(
uint8_t
*
dst
,
DCTELEM
*
block
,
int
stride
);
void
(
*
h264_idct_dc_add
)(
uint8_t
*
dst
,
DCTELEM
*
block
,
int
stride
);
void
(
*
h264_idct8_dc_add
)(
uint8_t
*
dst
,
DCTELEM
*
block
,
int
stride
);
}
DSPContext
;
void
dsputil_static_init
(
void
);
...
...
libavcodec/h264.c
浏览文件 @
ef9d1d15
...
...
@@ -3314,6 +3314,7 @@ static void hl_decode_mb(H264Context *h){
const
unsigned
int
bottom
=
mb_y
&
1
;
const
int
transform_bypass
=
(
s
->
qscale
==
0
&&
h
->
sps
.
transform_bypass
);
void
(
*
idct_add
)(
uint8_t
*
dst
,
DCTELEM
*
block
,
int
stride
);
void
(
*
idct_dc_add
)(
uint8_t
*
dst
,
DCTELEM
*
block
,
int
stride
);
if
(
!
s
->
decode
)
return
;
...
...
@@ -3337,9 +3338,16 @@ static void hl_decode_mb(H264Context *h){
// dct_offset = s->linesize * 16;
}
idct_add
=
transform_bypass
?
IS_8x8DCT
(
mb_type
)
?
s
->
dsp
.
add_pixels8
:
s
->
dsp
.
add_pixels4
:
IS_8x8DCT
(
mb_type
)
?
s
->
dsp
.
h264_idct8_add
:
s
->
dsp
.
h264_idct_add
;
if
(
transform_bypass
){
idct_dc_add
=
idct_add
=
IS_8x8DCT
(
mb_type
)
?
s
->
dsp
.
add_pixels8
:
s
->
dsp
.
add_pixels4
;
}
else
if
(
IS_8x8DCT
(
mb_type
)){
idct_dc_add
=
s
->
dsp
.
h264_idct8_dc_add
;
idct_add
=
s
->
dsp
.
h264_idct8_add
;
}
else
{
idct_dc_add
=
s
->
dsp
.
h264_idct_dc_add
;
idct_add
=
s
->
dsp
.
h264_idct_add
;
}
if
(
IS_INTRA_PCM
(
mb_type
))
{
unsigned
int
x
,
y
;
...
...
@@ -3389,17 +3397,22 @@ static void hl_decode_mb(H264Context *h){
for
(
i
=
0
;
i
<
16
;
i
+=
4
){
uint8_t
*
const
ptr
=
dest_y
+
block_offset
[
i
];
const
int
dir
=
h
->
intra4x4_pred_mode_cache
[
scan8
[
i
]
];
const
int
nnz
=
h
->
non_zero_count_cache
[
scan8
[
i
]
];
h
->
pred8x8l
[
dir
](
ptr
,
(
h
->
topleft_samples_available
<<
i
)
&
0x8000
,
(
h
->
topright_samples_available
<<
(
i
+
1
))
&
0x8000
,
linesize
);
if
(
h
->
non_zero_count_cache
[
scan8
[
i
]
])
idct_add
(
ptr
,
h
->
mb
+
i
*
16
,
linesize
);
if
(
nnz
){
if
(
nnz
==
1
&&
h
->
mb
[
i
*
16
])
idct_dc_add
(
ptr
,
h
->
mb
+
i
*
16
,
linesize
);
else
idct_add
(
ptr
,
h
->
mb
+
i
*
16
,
linesize
);
}
}
}
else
for
(
i
=
0
;
i
<
16
;
i
++
){
uint8_t
*
const
ptr
=
dest_y
+
block_offset
[
i
];
uint8_t
*
topright
;
const
int
dir
=
h
->
intra4x4_pred_mode_cache
[
scan8
[
i
]
];
int
tr
;
int
nnz
,
tr
;
if
(
dir
==
DIAG_DOWN_LEFT_PRED
||
dir
==
VERT_LEFT_PRED
){
const
int
topright_avail
=
(
h
->
topright_samples_available
<<
i
)
&
0x8000
;
...
...
@@ -3413,10 +3426,14 @@ static void hl_decode_mb(H264Context *h){
topright
=
NULL
;
h
->
pred4x4
[
dir
](
ptr
,
topright
,
linesize
);
if
(
h
->
non_zero_count_cache
[
scan8
[
i
]
]){
if
(
s
->
codec_id
==
CODEC_ID_H264
)
idct_add
(
ptr
,
h
->
mb
+
i
*
16
,
linesize
);
else
nnz
=
h
->
non_zero_count_cache
[
scan8
[
i
]
];
if
(
nnz
){
if
(
s
->
codec_id
==
CODEC_ID_H264
){
if
(
nnz
==
1
&&
h
->
mb
[
i
*
16
])
idct_dc_add
(
ptr
,
h
->
mb
+
i
*
16
,
linesize
);
else
idct_add
(
ptr
,
h
->
mb
+
i
*
16
,
linesize
);
}
else
svq3_add_idct_c
(
ptr
,
h
->
mb
+
i
*
16
,
linesize
,
s
->
qscale
,
0
);
}
}
...
...
@@ -3453,11 +3470,23 @@ static void hl_decode_mb(H264Context *h){
if
(
!
IS_INTRA4x4
(
mb_type
)){
if
(
s
->
codec_id
==
CODEC_ID_H264
){
const
int
di
=
IS_8x8DCT
(
mb_type
)
?
4
:
1
;
for
(
i
=
0
;
i
<
16
;
i
+=
di
){
if
(
h
->
non_zero_count_cache
[
scan8
[
i
]
]
||
h
->
mb
[
i
*
16
]){
//FIXME benchmark weird rule, & below
uint8_t
*
const
ptr
=
dest_y
+
block_offset
[
i
];
idct_add
(
ptr
,
h
->
mb
+
i
*
16
,
linesize
);
if
(
IS_INTRA16x16
(
mb_type
)){
for
(
i
=
0
;
i
<
16
;
i
++
){
if
(
h
->
non_zero_count_cache
[
scan8
[
i
]
])
idct_add
(
dest_y
+
block_offset
[
i
],
h
->
mb
+
i
*
16
,
linesize
);
else
if
(
h
->
mb
[
i
*
16
])
idct_dc_add
(
dest_y
+
block_offset
[
i
],
h
->
mb
+
i
*
16
,
linesize
);
}
}
else
{
const
int
di
=
IS_8x8DCT
(
mb_type
)
?
4
:
1
;
for
(
i
=
0
;
i
<
16
;
i
+=
di
){
int
nnz
=
h
->
non_zero_count_cache
[
scan8
[
i
]
];
if
(
nnz
){
if
(
nnz
==
1
&&
h
->
mb
[
i
*
16
])
idct_dc_add
(
dest_y
+
block_offset
[
i
],
h
->
mb
+
i
*
16
,
linesize
);
else
idct_add
(
dest_y
+
block_offset
[
i
],
h
->
mb
+
i
*
16
,
linesize
);
}
}
}
}
else
{
...
...
@@ -3471,34 +3500,26 @@ static void hl_decode_mb(H264Context *h){
}
if
(
!
(
s
->
flags
&
CODEC_FLAG_GRAY
)){
idct_add
=
transform_bypass
?
s
->
dsp
.
add_pixels4
:
s
->
dsp
.
h264_idct_add
;
if
(
!
transform_bypass
){
uint8_t
*
dest
[
2
]
=
{
dest_cb
,
dest_cr
};
if
(
transform_bypass
){
idct_add
=
idct_dc_add
=
s
->
dsp
.
add_pixels4
;
}
else
{
idct_add
=
s
->
dsp
.
h264_idct_add
;
idct_dc_add
=
s
->
dsp
.
h264_idct_dc_add
;
chroma_dc_dequant_idct_c
(
h
->
mb
+
16
*
16
,
h
->
chroma_qp
,
h
->
dequant4_coeff
[
IS_INTRA
(
mb_type
)
?
1
:
4
][
h
->
chroma_qp
][
0
]);
chroma_dc_dequant_idct_c
(
h
->
mb
+
16
*
16
+
4
*
16
,
h
->
chroma_qp
,
h
->
dequant4_coeff
[
IS_INTRA
(
mb_type
)
?
2
:
5
][
h
->
chroma_qp
][
0
]);
}
if
(
s
->
codec_id
==
CODEC_ID_H264
){
for
(
i
=
16
;
i
<
16
+
4
;
i
++
){
if
(
h
->
non_zero_count_cache
[
scan8
[
i
]
]
||
h
->
mb
[
i
*
16
]){
uint8_t
*
const
ptr
=
dest_cb
+
block_offset
[
i
];
idct_add
(
ptr
,
h
->
mb
+
i
*
16
,
uvlinesize
);
}
}
for
(
i
=
20
;
i
<
20
+
4
;
i
++
){
if
(
h
->
non_zero_count_cache
[
scan8
[
i
]
]
||
h
->
mb
[
i
*
16
]){
uint8_t
*
const
ptr
=
dest_cr
+
block_offset
[
i
];
idct_add
(
ptr
,
h
->
mb
+
i
*
16
,
uvlinesize
);
}
for
(
i
=
16
;
i
<
16
+
8
;
i
++
){
if
(
h
->
non_zero_count_cache
[
scan8
[
i
]
])
idct_add
(
dest
[(
i
&
4
)
>>
2
]
+
block_offset
[
i
],
h
->
mb
+
i
*
16
,
uvlinesize
);
else
if
(
h
->
mb
[
i
*
16
])
idct_dc_add
(
dest
[(
i
&
4
)
>>
2
]
+
block_offset
[
i
],
h
->
mb
+
i
*
16
,
uvlinesize
);
}
}
else
{
for
(
i
=
16
;
i
<
16
+
4
;
i
++
){
if
(
h
->
non_zero_count_cache
[
scan8
[
i
]
]
||
h
->
mb
[
i
*
16
]){
uint8_t
*
const
ptr
=
dest_cb
+
block_offset
[
i
];
svq3_add_idct_c
(
ptr
,
h
->
mb
+
i
*
16
,
uvlinesize
,
chroma_qp
[
s
->
qscale
+
12
]
-
12
,
2
);
}
}
for
(
i
=
20
;
i
<
20
+
4
;
i
++
){
for
(
i
=
16
;
i
<
16
+
8
;
i
++
){
if
(
h
->
non_zero_count_cache
[
scan8
[
i
]
]
||
h
->
mb
[
i
*
16
]){
uint8_t
*
const
ptr
=
dest
_cr
+
block_offset
[
i
];
uint8_t
*
const
ptr
=
dest
[(
i
&
4
)
>>
2
]
+
block_offset
[
i
];
svq3_add_idct_c
(
ptr
,
h
->
mb
+
i
*
16
,
uvlinesize
,
chroma_qp
[
s
->
qscale
+
12
]
-
12
,
2
);
}
}
...
...
@@ -5131,7 +5152,7 @@ decode_intra_mb:
return
-
1
;
}
nnz
=
&
h
->
non_zero_count_cache
[
scan8
[
4
*
i8x8
]
];
nnz
[
0
]
|=
nnz
[
1
]
|
nnz
[
8
]
|
nnz
[
9
];
nnz
[
0
]
+=
nnz
[
1
]
+
nnz
[
8
]
+
nnz
[
9
];
}
else
{
for
(
i4x4
=
0
;
i4x4
<
4
;
i4x4
++
){
const
int
index
=
i4x4
+
4
*
i8x8
;
...
...
@@ -5690,7 +5711,7 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n
h
->
non_zero_count_cache
[
scan8
[
16
+
n
]]
=
coeff_count
;
else
{
assert
(
cat
==
5
);
fill_rectangle
(
&
h
->
non_zero_count_cache
[
scan8
[
n
]],
2
,
2
,
8
,
1
,
1
);
fill_rectangle
(
&
h
->
non_zero_count_cache
[
scan8
[
n
]],
2
,
2
,
8
,
coeff_count
,
1
);
}
for
(
i
=
coeff_count
-
1
;
i
>=
0
;
i
--
)
{
...
...
libavcodec/h264idct.c
浏览文件 @
ef9d1d15
...
...
@@ -139,3 +139,28 @@ void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride){
dst
[
i
+
7
*
stride
]
=
cm
[
dst
[
i
+
7
*
stride
]
+
((
b0
-
b7
)
>>
6
)
];
}
}
// assumes all AC coefs are 0
void
ff_h264_idct_dc_add_c
(
uint8_t
*
dst
,
DCTELEM
*
block
,
int
stride
){
int
i
,
j
;
uint8_t
*
cm
=
cropTbl
+
MAX_NEG_CROP
;
int
dc
=
(
block
[
0
]
+
32
)
>>
6
;
for
(
j
=
0
;
j
<
4
;
j
++
)
{
for
(
i
=
0
;
i
<
4
;
i
++
)
dst
[
i
]
=
cm
[
dst
[
i
]
+
dc
];
dst
+=
stride
;
}
}
void
ff_h264_idct8_dc_add_c
(
uint8_t
*
dst
,
DCTELEM
*
block
,
int
stride
){
int
i
,
j
;
uint8_t
*
cm
=
cropTbl
+
MAX_NEG_CROP
;
int
dc
=
(
block
[
0
]
+
32
)
>>
6
;
for
(
j
=
0
;
j
<
8
;
j
++
)
{
for
(
i
=
0
;
i
<
8
;
i
++
)
dst
[
i
]
=
cm
[
dst
[
i
]
+
dc
];
dst
+=
stride
;
}
}
libavcodec/i386/dsputil_mmx.c
浏览文件 @
ef9d1d15
...
...
@@ -2754,6 +2754,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#endif //CONFIG_ENCODERS
c
->
h264_idct_add
=
ff_h264_idct_add_mmx2
;
c
->
h264_idct_dc_add
=
ff_h264_idct_dc_add_mmx2
;
c
->
h264_idct8_dc_add
=
ff_h264_idct8_dc_add_mmx2
;
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
)){
c
->
put_no_rnd_pixels_tab
[
0
][
1
]
=
put_no_rnd_pixels16_x2_mmx2
;
...
...
libavcodec/i386/h264dsp_mmx.c
浏览文件 @
ef9d1d15
...
...
@@ -104,6 +104,87 @@ void ff_h264_idct_add_mmx2(uint8_t *dst, int16_t *block, int stride)
);
}
void
ff_h264_idct_dc_add_mmx2
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
)
{
int
dc
=
(
block
[
0
]
+
32
)
>>
6
;
asm
volatile
(
"movd %0, %%mm0
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
"pshufw $0, %%mm0, %%mm0
\n\t
"
"pxor %%mm1, %%mm1
\n\t
"
"psubw %%mm0, %%mm1
\n\t
"
"pmaxsw %%mm7, %%mm0
\n\t
"
"pmaxsw %%mm7, %%mm1
\n\t
"
"packuswb %%mm0, %%mm0
\n\t
"
"packuswb %%mm1, %%mm1
\n\t
"
::
"r"
(
dc
)
);
asm
volatile
(
"movd %0, %%mm2
\n\t
"
"movd %1, %%mm3
\n\t
"
"movd %2, %%mm4
\n\t
"
"movd %3, %%mm5
\n\t
"
"paddusb %%mm0, %%mm2
\n\t
"
"paddusb %%mm0, %%mm3
\n\t
"
"paddusb %%mm0, %%mm4
\n\t
"
"paddusb %%mm0, %%mm5
\n\t
"
"psubusb %%mm1, %%mm2
\n\t
"
"psubusb %%mm1, %%mm3
\n\t
"
"psubusb %%mm1, %%mm4
\n\t
"
"psubusb %%mm1, %%mm5
\n\t
"
"movd %%mm2, %0
\n\t
"
"movd %%mm3, %1
\n\t
"
"movd %%mm4, %2
\n\t
"
"movd %%mm5, %3
\n\t
"
:
"+m"
(
*
(
uint32_t
*
)(
dst
+
0
*
stride
)),
"+m"
(
*
(
uint32_t
*
)(
dst
+
1
*
stride
)),
"+m"
(
*
(
uint32_t
*
)(
dst
+
2
*
stride
)),
"+m"
(
*
(
uint32_t
*
)(
dst
+
3
*
stride
))
);
}
void
ff_h264_idct8_dc_add_mmx2
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
)
{
int
dc
=
(
block
[
0
]
+
32
)
>>
6
;
int
y
;
asm
volatile
(
"movd %0, %%mm0
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
"pshufw $0, %%mm0, %%mm0
\n\t
"
"pxor %%mm1, %%mm1
\n\t
"
"psubw %%mm0, %%mm1
\n\t
"
"pmaxsw %%mm7, %%mm0
\n\t
"
"pmaxsw %%mm7, %%mm1
\n\t
"
"packuswb %%mm0, %%mm0
\n\t
"
"packuswb %%mm1, %%mm1
\n\t
"
::
"r"
(
dc
)
);
for
(
y
=
2
;
y
--
;
dst
+=
4
*
stride
){
asm
volatile
(
"movq %0, %%mm2
\n\t
"
"movq %1, %%mm3
\n\t
"
"movq %2, %%mm4
\n\t
"
"movq %3, %%mm5
\n\t
"
"paddusb %%mm0, %%mm2
\n\t
"
"paddusb %%mm0, %%mm3
\n\t
"
"paddusb %%mm0, %%mm4
\n\t
"
"paddusb %%mm0, %%mm5
\n\t
"
"psubusb %%mm1, %%mm2
\n\t
"
"psubusb %%mm1, %%mm3
\n\t
"
"psubusb %%mm1, %%mm4
\n\t
"
"psubusb %%mm1, %%mm5
\n\t
"
"movq %%mm2, %0
\n\t
"
"movq %%mm3, %1
\n\t
"
"movq %%mm4, %2
\n\t
"
"movq %%mm5, %3
\n\t
"
:
"+m"
(
*
(
uint64_t
*
)(
dst
+
0
*
stride
)),
"+m"
(
*
(
uint64_t
*
)(
dst
+
1
*
stride
)),
"+m"
(
*
(
uint64_t
*
)(
dst
+
2
*
stride
)),
"+m"
(
*
(
uint64_t
*
)(
dst
+
3
*
stride
))
);
}
}
/***********************************/
/* deblocking */
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录