Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
云duo
FFmpeg
提交
8438b3f0
F
FFmpeg
项目概览
云duo
/
FFmpeg
与 Fork 源项目一致
从无法访问的项目Fork
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
F
FFmpeg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
8438b3f0
编写于
12月 13, 2013
作者:
J
Janne Grunau
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
aarch64: h264 idct NEON assembler optimizations
Ported from ARMv7 NEON.
上级
71617884
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
536 addition
and
0 deletion
+536
-0
libavcodec/aarch64/Makefile
libavcodec/aarch64/Makefile
+2
-0
libavcodec/aarch64/h264dsp_init_aarch64.c
libavcodec/aarch64/h264dsp_init_aarch64.c
+62
-0
libavcodec/aarch64/h264idct_neon.S
libavcodec/aarch64/h264idct_neon.S
+408
-0
libavcodec/aarch64/neon.S
libavcodec/aarch64/neon.S
+61
-0
libavcodec/h264dsp.c
libavcodec/h264dsp.c
+1
-0
libavcodec/h264dsp.h
libavcodec/h264dsp.h
+2
-0
未找到文件。
libavcodec/aarch64/Makefile
浏览文件 @
8438b3f0
OBJS-$(CONFIG_H264CHROMA)
+=
aarch64/h264chroma_init_aarch64.o
OBJS-$(CONFIG_H264DSP)
+=
aarch64/h264dsp_init_aarch64.o
OBJS-$(CONFIG_RV40_DECODER)
+=
aarch64/rv40dsp_init_aarch64.o
OBJS-$(CONFIG_VC1_DECODER)
+=
aarch64/vc1dsp_init_aarch64.o
NEON-OBJS-$(CONFIG_H264CHROMA)
+=
aarch64/h264cmc_neon.o
NEON-OBJS-$(CONFIG_H264DSP)
+=
aarch64/h264idct_neon.o
libavcodec/aarch64/h264dsp_init_aarch64.c
0 → 100644
浏览文件 @
8438b3f0
/*
* Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/h264dsp.h"
void
ff_h264_idct_add_neon
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
);
void
ff_h264_idct_dc_add_neon
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
);
void
ff_h264_idct_add16_neon
(
uint8_t
*
dst
,
const
int
*
block_offset
,
int16_t
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct_add16intra_neon
(
uint8_t
*
dst
,
const
int
*
block_offset
,
int16_t
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct_add8_neon
(
uint8_t
**
dest
,
const
int
*
block_offset
,
int16_t
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct8_add_neon
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
);
void
ff_h264_idct8_dc_add_neon
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
);
void
ff_h264_idct8_add4_neon
(
uint8_t
*
dst
,
const
int
*
block_offset
,
int16_t
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
av_cold
void
ff_h264dsp_init_aarch64
(
H264DSPContext
*
c
,
const
int
bit_depth
,
const
int
chroma_format_idc
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
have_neon
(
cpu_flags
)
&&
bit_depth
==
8
)
{
c
->
h264_idct_add
=
ff_h264_idct_add_neon
;
c
->
h264_idct_dc_add
=
ff_h264_idct_dc_add_neon
;
c
->
h264_idct_add16
=
ff_h264_idct_add16_neon
;
c
->
h264_idct_add16intra
=
ff_h264_idct_add16intra_neon
;
if
(
chroma_format_idc
<=
1
)
c
->
h264_idct_add8
=
ff_h264_idct_add8_neon
;
c
->
h264_idct8_add
=
ff_h264_idct8_add_neon
;
c
->
h264_idct8_dc_add
=
ff_h264_idct8_dc_add_neon
;
c
->
h264_idct8_add4
=
ff_h264_idct8_add4_neon
;
}
}
libavcodec/aarch64/h264idct_neon.S
0 → 100644
浏览文件 @
8438b3f0
/*
*
Copyright
(
c
)
2008
Mans
Rullgard
<
mans
@
mansr
.
com
>
*
Copyright
(
c
)
2013
Janne
Grunau
<
janne
-
libav
@
jannau
.
net
>
*
*
This
file
is
part
of
Libav
.
*
*
Libav
is
free
software
; you can redistribute it and/or
*
modify
it
under
the
terms
of
the
GNU
Lesser
General
Public
*
License
as
published
by
the
Free
Software
Foundation
; either
*
version
2
.1
of
the
License
,
or
(
at
your
option
)
any
later
version
.
*
*
Libav
is
distributed
in
the
hope
that
it
will
be
useful
,
*
but
WITHOUT
ANY
WARRANTY
; without even the implied warranty of
*
MERCHANTABILITY
or
FITNESS
FOR
A
PARTICULAR
PURPOSE
.
See
the
GNU
*
Lesser
General
Public
License
for
more
details
.
*
*
You
should
have
received
a
copy
of
the
GNU
Lesser
General
Public
*
License
along
with
Libav
; if not, write to the Free Software
*
Foundation
,
Inc
.
,
51
Franklin
Street
,
Fifth
Floor
,
Boston
,
MA
02110
-
1301
USA
*/
#include "libavutil/aarch64/asm.S"
#include "neon.S"
function
ff_h264_idct_add_neon
,
export
=
1
ld1
{
v0
.4
H
,
v1
.4
H
,
v2
.4
H
,
v3
.4
H
},
[
x1
]
sxtw
x2
,
w2
movi
v30
.8
H
,
#
0
add
v4
.4
H
,
v0
.4
H
,
v2
.4
H
sshr
v16
.4
H
,
v1
.4
H
,
#
1
st1
{
v30
.8
H
},
[
x1
],
#
16
sshr
v17
.4
H
,
v3
.4
H
,
#
1
st1
{
v30
.8
H
},
[
x1
],
#
16
sub
v5
.4
H
,
v0
.4
H
,
v2
.4
H
add
v6
.4
H
,
v1
.4
H
,
v17
.4
H
sub
v7
.4
H
,
v16
.4
H
,
v3
.4
H
add
v0
.4
H
,
v4
.4
H
,
v6
.4
H
add
v1
.4
H
,
v5
.4
H
,
v7
.4
H
sub
v2
.4
H
,
v4
.4
H
,
v6
.4
H
sub
v3
.4
H
,
v5
.4
H
,
v7
.4
H
transpose_4x4H
v0
,
v1
,
v2
,
v3
,
v4
,
v5
,
v6
,
v7
add
v4
.4
H
,
v0
.4
H
,
v3
.4
H
ld1
{
v18
.
S
}[
0
],
[
x0
],
x2
sshr
v16
.4
H
,
v2
.4
H
,
#
1
sshr
v17
.4
H
,
v1
.4
H
,
#
1
ld1
{
v19
.
S
}[
1
],
[
x0
],
x2
sub
v5
.4
H
,
v0
.4
H
,
v3
.4
H
ld1
{
v18
.
S
}[
1
],
[
x0
],
x2
add
v6
.4
H
,
v16
.4
H
,
v1
.4
H
ins
v4
.
D
[
1
],
v5
.
D
[
0
]
sub
v7
.4
H
,
v2
.4
H
,
v17
.4
H
ld1
{
v19
.
S
}[
0
],
[
x0
],
x2
ins
v6
.
D
[
1
],
v7
.
D
[
0
]
sub
x0
,
x0
,
x2
,
lsl
#
2
add
v0
.8
H
,
v4
.8
H
,
v6
.8
H
sub
v1
.8
H
,
v4
.8
H
,
v6
.8
H
srshr
v0
.8
H
,
v0
.8
H
,
#
6
srshr
v1
.8
H
,
v1
.8
H
,
#
6
uaddw
v0
.8
H
,
v0
.8
H
,
v18
.8
B
uaddw
v1
.8
H
,
v1
.8
H
,
v19
.8
B
sqxtun
v0
.8
B
,
v0
.8
H
sqxtun
v1
.8
B
,
v1
.8
H
st1
{
v0
.
S
}[
0
],
[
x0
],
x2
st1
{
v1
.
S
}[
1
],
[
x0
],
x2
st1
{
v0
.
S
}[
1
],
[
x0
],
x2
st1
{
v1
.
S
}[
0
],
[
x0
],
x2
sub
x1
,
x1
,
#
32
ret
endfunc
function
ff_h264_idct_dc_add_neon
,
export
=
1
sxtw
x2
,
w2
mov
w3
,
#
0
ld1r
{
v2
.8
H
},
[
x1
]
strh
w3
,
[
x1
]
srshr
v2
.8
H
,
v2
.8
H
,
#
6
ld1
{
v0
.
S
}[
0
],
[
x0
],
x2
ld1
{
v0
.
S
}[
1
],
[
x0
],
x2
uaddw
v3
.8
H
,
v2
.8
H
,
v0
.8
B
ld1
{
v1
.
S
}[
0
],
[
x0
],
x2
ld1
{
v1
.
S
}[
1
],
[
x0
],
x2
uaddw
v4
.8
H
,
v2
.8
H
,
v1
.8
B
sqxtun
v0
.8
B
,
v3
.8
H
sqxtun
v1
.8
B
,
v4
.8
H
sub
x0
,
x0
,
x2
,
lsl
#
2
st1
{
v0
.
S
}[
0
],
[
x0
],
x2
st1
{
v0
.
S
}[
1
],
[
x0
],
x2
st1
{
v1
.
S
}[
0
],
[
x0
],
x2
st1
{
v1
.
S
}[
1
],
[
x0
],
x2
ret
endfunc
function
ff_h264_idct_add16_neon
,
export
=
1
mov
x12
,
x30
mov
x6
,
x0
//
dest
mov
x5
,
x1
//
block_offset
mov
x1
,
x2
//
block
mov
w9
,
w3
//
stride
movrel
x7
,
scan8
mov
x10
,
#
16
movrel
x13
,
ff_h264_idct_dc_add_neon
movrel
x14
,
ff_h264_idct_add_neon
1
:
mov
w2
,
w9
ldrb
w3
,
[
x7
],
#
1
ldrsw
x0
,
[
x5
],
#
4
ldrb
w3
,
[
x4
,
w3
,
uxtw
]
subs
w3
,
w3
,
#
1
b.lt
2
f
ldrsh
w3
,
[
x1
]
add
x0
,
x0
,
x6
ccmp
w3
,
#
0
,
#
4
,
eq
csel
x15
,
x13
,
x14
,
ne
blr
x15
2
:
subs
x10
,
x10
,
#
1
add
x1
,
x1
,
#
32
b.ne
1
b
ret
x12
endfunc
function
ff_h264_idct_add16intra_neon
,
export
=
1
mov
x12
,
x30
mov
x6
,
x0
//
dest
mov
x5
,
x1
//
block_offset
mov
x1
,
x2
//
block
mov
w9
,
w3
//
stride
movrel
x7
,
scan8
mov
x10
,
#
16
movrel
x13
,
ff_h264_idct_dc_add_neon
movrel
x14
,
ff_h264_idct_add_neon
1
:
mov
w2
,
w9
ldrb
w3
,
[
x7
],
#
1
ldrsw
x0
,
[
x5
],
#
4
ldrb
w3
,
[
x4
,
w3
,
uxtw
]
add
x0
,
x0
,
x6
cmp
w3
,
#
0
ldrsh
w3
,
[
x1
]
csel
x15
,
x13
,
x14
,
eq
ccmp
w3
,
#
0
,
#
0
,
eq
b.eq
2
f
blr
x15
2
:
subs
x10
,
x10
,
#
1
add
x1
,
x1
,
#
32
b.ne
1
b
ret
x12
endfunc
function
ff_h264_idct_add8_neon
,
export
=
1
sub
sp
,
sp
,
#
0x40
stp
x19
,
x20
,
[
sp
]
mov
x12
,
x30
ldp
x6
,
x15
,
[
x0
]
//
dest
[
0
],
dest
[
1
]
add
x5
,
x1
,
#
16
*
4
//
block_offset
add
x9
,
x2
,
#
16
*
32
//
block
mov
w19
,
w3
//
stride
movrel
x13
,
ff_h264_idct_dc_add_neon
movrel
x14
,
ff_h264_idct_add_neon
movrel
x7
,
scan8
+
16
mov
x10
,
#
0
mov
x11
,
#
16
1
:
mov
w2
,
w19
ldrb
w3
,
[
x7
,
x10
]
//
scan8
[
i
]
ldrsw
x0
,
[
x5
,
x10
,
lsl
#
2
]
//
block_offset
[
i
]
ldrb
w3
,
[
x4
,
w3
,
uxtw
]
//
nnzc
[
scan8
[
i
]
]
add
x0
,
x0
,
x6
//
block_offset
[
i
]
+
dst
[
j
-
1
]
add
x1
,
x9
,
x10
,
lsl
#
5
//
block
+
i
*
16
cmp
w3
,
#
0
ldrsh
w3
,
[
x1
]
//
block
[
i
*
16
]
csel
x20
,
x13
,
x14
,
eq
ccmp
w3
,
#
0
,
#
0
,
eq
b.eq
2
f
blr
x20
2
:
add
x10
,
x10
,
#
1
cmp
x10
,
#
4
csel
x10
,
x11
,
x10
,
eq
//
mov
x10
,
#
16
csel
x6
,
x15
,
x6
,
eq
cmp
x10
,
#
20
b.lt
1
b
ldp
x19
,
x20
,
[
sp
]
add
sp
,
sp
,
#
0x40
ret
x12
endfunc
.
macro
idct8x8_cols
pass
.
if
\
pass
==
0
va
.
req
v18
vb
.
req
v30
sshr
v18
.8
H
,
v26
.8
H
,
#
1
add
v16
.8
H
,
v24
.8
H
,
v28
.8
H
ld1
{
v30
.8
H
,
v31
.8
H
},
[
x1
]
st1
{
v19
.8
H
},
[
x1
],
#
16
st1
{
v19
.8
H
},
[
x1
],
#
16
sub
v17
.8
H
,
v24
.8
H
,
v28
.8
H
sshr
v19
.8
H
,
v30
.8
H
,
#
1
sub
v18
.8
H
,
v18
.8
H
,
v30
.8
H
add
v19
.8
H
,
v19
.8
H
,
v26
.8
H
.
else
va
.
req
v30
vb
.
req
v18
sshr
v30
.8
H
,
v26
.8
H
,
#
1
sshr
v19
.8
H
,
v18
.8
H
,
#
1
add
v16
.8
H
,
v24
.8
H
,
v28
.8
H
sub
v17
.8
H
,
v24
.8
H
,
v28
.8
H
sub
v30
.8
H
,
v30
.8
H
,
v18
.8
H
add
v19
.8
H
,
v19
.8
H
,
v26
.8
H
.
endif
add
v26
.8
H
,
v17
.8
H
,
va
.8
H
sub
v28
.8
H
,
v17
.8
H
,
va
.8
H
add
v24
.8
H
,
v16
.8
H
,
v19
.8
H
sub
vb
.8
H
,
v16
.8
H
,
v19
.8
H
sub
v16
.8
H
,
v29
.8
H
,
v27
.8
H
add
v17
.8
H
,
v31
.8
H
,
v25
.8
H
sub
va
.8
H
,
v31
.8
H
,
v25
.8
H
add
v19
.8
H
,
v29
.8
H
,
v27
.8
H
sub
v16
.8
H
,
v16
.8
H
,
v31
.8
H
sub
v17
.8
H
,
v17
.8
H
,
v27
.8
H
add
va
.8
H
,
va
.8
H
,
v29
.8
H
add
v19
.8
H
,
v19
.8
H
,
v25
.8
H
sshr
v25
.8
H
,
v25
.8
H
,
#
1
sshr
v27
.8
H
,
v27
.8
H
,
#
1
sshr
v29
.8
H
,
v29
.8
H
,
#
1
sshr
v31
.8
H
,
v31
.8
H
,
#
1
sub
v16
.8
H
,
v16
.8
H
,
v31
.8
H
sub
v17
.8
H
,
v17
.8
H
,
v27
.8
H
add
va
.8
H
,
va
.8
H
,
v29
.8
H
add
v19
.8
H
,
v19
.8
H
,
v25
.8
H
sshr
v25
.8
H
,
v16
.8
H
,
#
2
sshr
v27
.8
H
,
v17
.8
H
,
#
2
sshr
v29
.8
H
,
va
.8
H
,
#
2
sshr
v31
.8
H
,
v19
.8
H
,
#
2
sub
v19
.8
H
,
v19
.8
H
,
v25
.8
H
sub
va
.8
H
,
v27
.8
H
,
va
.8
H
add
v17
.8
H
,
v17
.8
H
,
v29
.8
H
add
v16
.8
H
,
v16
.8
H
,
v31
.8
H
.
if
\
pass
==
0
sub
v31
.8
H
,
v24
.8
H
,
v19
.8
H
add
v24
.8
H
,
v24
.8
H
,
v19
.8
H
add
v25
.8
H
,
v26
.8
H
,
v18
.8
H
sub
v18
.8
H
,
v26
.8
H
,
v18
.8
H
add
v26
.8
H
,
v28
.8
H
,
v17
.8
H
add
v27
.8
H
,
v30
.8
H
,
v16
.8
H
sub
v29
.8
H
,
v28
.8
H
,
v17
.8
H
sub
v28
.8
H
,
v30
.8
H
,
v16
.8
H
.
else
sub
v31
.8
H
,
v24
.8
H
,
v19
.8
H
add
v24
.8
H
,
v24
.8
H
,
v19
.8
H
add
v25
.8
H
,
v26
.8
H
,
v30
.8
H
sub
v30
.8
H
,
v26
.8
H
,
v30
.8
H
add
v26
.8
H
,
v28
.8
H
,
v17
.8
H
sub
v29
.8
H
,
v28
.8
H
,
v17
.8
H
add
v27
.8
H
,
v18
.8
H
,
v16
.8
H
sub
v28
.8
H
,
v18
.8
H
,
v16
.8
H
.
endif
.
unreq
va
.
unreq
vb
.
endm
function
ff_h264_idct8_add_neon
,
export
=
1
movi
v19
.8
H
,
#
0
ld1
{
v24
.8
H
,
v25
.8
H
},
[
x1
]
st1
{
v19
.8
H
},
[
x1
],
#
16
st1
{
v19
.8
H
},
[
x1
],
#
16
ld1
{
v26
.8
H
,
v27
.8
H
},
[
x1
]
st1
{
v19
.8
H
},
[
x1
],
#
16
st1
{
v19
.8
H
},
[
x1
],
#
16
ld1
{
v28
.8
H
,
v29
.8
H
},
[
x1
]
st1
{
v19
.8
H
},
[
x1
],
#
16
st1
{
v19
.8
H
},
[
x1
],
#
16
idct8x8_cols
0
transpose_8x8H
v24
,
v25
,
v26
,
v27
,
v28
,
v29
,
v18
,
v31
,
v6
,
v7
idct8x8_cols
1
mov
x3
,
x0
srshr
v24
.8
H
,
v24
.8
H
,
#
6
ld1
{
v0
.8
B
},
[
x0
],
x2
srshr
v25
.8
H
,
v25
.8
H
,
#
6
ld1
{
v1
.8
B
},
[
x0
],
x2
srshr
v26
.8
H
,
v26
.8
H
,
#
6
ld1
{
v2
.8
B
},
[
x0
],
x2
srshr
v27
.8
H
,
v27
.8
H
,
#
6
ld1
{
v3
.8
B
},
[
x0
],
x2
srshr
v28
.8
H
,
v28
.8
H
,
#
6
ld1
{
v4
.8
B
},
[
x0
],
x2
srshr
v29
.8
H
,
v29
.8
H
,
#
6
ld1
{
v5
.8
B
},
[
x0
],
x2
srshr
v30
.8
H
,
v30
.8
H
,
#
6
ld1
{
v6
.8
B
},
[
x0
],
x2
srshr
v31
.8
H
,
v31
.8
H
,
#
6
ld1
{
v7
.8
B
},
[
x0
],
x2
uaddw
v24
.8
H
,
v24
.8
H
,
v0
.8
B
uaddw
v25
.8
H
,
v25
.8
H
,
v1
.8
B
uaddw
v26
.8
H
,
v26
.8
H
,
v2
.8
B
sqxtun
v0
.8
B
,
v24
.8
H
uaddw
v27
.8
H
,
v27
.8
H
,
v3
.8
B
sqxtun
v1
.8
B
,
v25
.8
H
uaddw
v28
.8
H
,
v28
.8
H
,
v4
.8
B
sqxtun
v2
.8
B
,
v26
.8
H
st1
{
v0
.8
B
},
[
x3
],
x2
uaddw
v29
.8
H
,
v29
.8
H
,
v5
.8
B
sqxtun
v3
.8
B
,
v27
.8
H
st1
{
v1
.8
B
},
[
x3
],
x2
uaddw
v30
.8
H
,
v30
.8
H
,
v6
.8
B
sqxtun
v4
.8
B
,
v28
.8
H
st1
{
v2
.8
B
},
[
x3
],
x2
uaddw
v31
.8
H
,
v31
.8
H
,
v7
.8
B
sqxtun
v5
.8
B
,
v29
.8
H
st1
{
v3
.8
B
},
[
x3
],
x2
sqxtun
v6
.8
B
,
v30
.8
H
sqxtun
v7
.8
B
,
v31
.8
H
st1
{
v4
.8
B
},
[
x3
],
x2
st1
{
v5
.8
B
},
[
x3
],
x2
st1
{
v6
.8
B
},
[
x3
],
x2
st1
{
v7
.8
B
},
[
x3
],
x2
sub
x1
,
x1
,
#
128
ret
endfunc
function
ff_h264_idct8_dc_add_neon
,
export
=
1
mov
w3
,
#
0
sxtw
x2
,
w2
ld1r
{
v31
.8
H
},
[
x1
]
strh
w3
,
[
x1
]
ld1
{
v0
.8
B
},
[
x0
],
x2
srshr
v31
.8
H
,
v31
.8
H
,
#
6
ld1
{
v1
.8
B
},
[
x0
],
x2
ld1
{
v2
.8
B
},
[
x0
],
x2
uaddw
v24
.8
H
,
v31
.8
H
,
v0
.8
B
ld1
{
v3
.8
B
},
[
x0
],
x2
uaddw
v25
.8
H
,
v31
.8
H
,
v1
.8
B
ld1
{
v4
.8
B
},
[
x0
],
x2
uaddw
v26
.8
H
,
v31
.8
H
,
v2
.8
B
ld1
{
v5
.8
B
},
[
x0
],
x2
uaddw
v27
.8
H
,
v31
.8
H
,
v3
.8
B
ld1
{
v6
.8
B
},
[
x0
],
x2
uaddw
v28
.8
H
,
v31
.8
H
,
v4
.8
B
ld1
{
v7
.8
B
},
[
x0
],
x2
uaddw
v29
.8
H
,
v31
.8
H
,
v5
.8
B
uaddw
v30
.8
H
,
v31
.8
H
,
v6
.8
B
uaddw
v31
.8
H
,
v31
.8
H
,
v7
.8
B
sqxtun
v0
.8
B
,
v24
.8
H
sqxtun
v1
.8
B
,
v25
.8
H
sqxtun
v2
.8
B
,
v26
.8
H
sqxtun
v3
.8
B
,
v27
.8
H
sub
x0
,
x0
,
x2
,
lsl
#
3
st1
{
v0
.8
B
},
[
x0
],
x2
sqxtun
v4
.8
B
,
v28
.8
H
st1
{
v1
.8
B
},
[
x0
],
x2
sqxtun
v5
.8
B
,
v29
.8
H
st1
{
v2
.8
B
},
[
x0
],
x2
sqxtun
v6
.8
B
,
v30
.8
H
st1
{
v3
.8
B
},
[
x0
],
x2
sqxtun
v7
.8
B
,
v31
.8
H
st1
{
v4
.8
B
},
[
x0
],
x2
st1
{
v5
.8
B
},
[
x0
],
x2
st1
{
v6
.8
B
},
[
x0
],
x2
st1
{
v7
.8
B
},
[
x0
],
x2
ret
endfunc
function
ff_h264_idct8_add4_neon
,
export
=
1
mov
x12
,
x30
mov
x6
,
x0
mov
x5
,
x1
mov
x1
,
x2
mov
w2
,
w3
movrel
x7
,
scan8
mov
w10
,
#
16
movrel
x13
,
ff_h264_idct8_dc_add_neon
movrel
x14
,
ff_h264_idct8_add_neon
1
:
ldrb
w9
,
[
x7
],
#
4
ldrsw
x0
,
[
x5
],
#
16
ldrb
w9
,
[
x4
,
w9
,
UXTW
]
subs
w9
,
w9
,
#
1
b.lt
2
f
ldrsh
w11
,
[
x1
]
add
x0
,
x6
,
x0
ccmp
w11
,
#
0
,
#
4
,
eq
csel
x15
,
x13
,
x14
,
ne
blr
x15
2
:
subs
w10
,
w10
,
#
4
add
x1
,
x1
,
#
128
b.ne
1
b
ret
x12
endfunc
const
scan8
.
byte
4
+
1
*
8
,
5
+
1
*
8
,
4
+
2
*
8
,
5
+
2
*
8
.
byte
6
+
1
*
8
,
7
+
1
*
8
,
6
+
2
*
8
,
7
+
2
*
8
.
byte
4
+
3
*
8
,
5
+
3
*
8
,
4
+
4
*
8
,
5
+
4
*
8
.
byte
6
+
3
*
8
,
7
+
3
*
8
,
6
+
4
*
8
,
7
+
4
*
8
.
byte
4
+
6
*
8
,
5
+
6
*
8
,
4
+
7
*
8
,
5
+
7
*
8
.
byte
6
+
6
*
8
,
7
+
6
*
8
,
6
+
7
*
8
,
7
+
7
*
8
.
byte
4
+
8
*
8
,
5
+
8
*
8
,
4
+
9
*
8
,
5
+
9
*
8
.
byte
6
+
8
*
8
,
7
+
8
*
8
,
6
+
9
*
8
,
7
+
9
*
8
.
byte
4
+
11
*
8
,
5
+
11
*
8
,
4
+
12
*
8
,
5
+
12
*
8
.
byte
6
+
11
*
8
,
7
+
11
*
8
,
6
+
12
*
8
,
7
+
12
*
8
.
byte
4
+
13
*
8
,
5
+
13
*
8
,
4
+
14
*
8
,
5
+
14
*
8
.
byte
6
+
13
*
8
,
7
+
13
*
8
,
6
+
14
*
8
,
7
+
14
*
8
endconst
libavcodec/aarch64/neon.S
0 → 100644
浏览文件 @
8438b3f0
/*
*
This
file
is
part
of
Libav
.
*
*
Libav
is
free
software
; you can redistribute it and/or
*
modify
it
under
the
terms
of
the
GNU
Lesser
General
Public
*
License
as
published
by
the
Free
Software
Foundation
; either
*
version
2
.1
of
the
License
,
or
(
at
your
option
)
any
later
version
.
*
*
Libav
is
distributed
in
the
hope
that
it
will
be
useful
,
*
but
WITHOUT
ANY
WARRANTY
; without even the implied warranty of
*
MERCHANTABILITY
or
FITNESS
FOR
A
PARTICULAR
PURPOSE
.
See
the
GNU
*
Lesser
General
Public
License
for
more
details
.
*
*
You
should
have
received
a
copy
of
the
GNU
Lesser
General
Public
*
License
along
with
Libav
; if not, write to the Free Software
*
Foundation
,
Inc
.
,
51
Franklin
Street
,
Fifth
Floor
,
Boston
,
MA
02110
-
1301
USA
*/
.
macro
transpose_4x4H
r0
,
r1
,
r2
,
r3
,
r4
,
r5
,
r6
,
r7
trn1
\
r4
\
()
.4
H
,
\
r0
\
()
.4
H
,
\
r1
\
()
.4
H
trn2
\
r5
\
()
.4
H
,
\
r0
\
()
.4
H
,
\
r1
\
()
.4
H
trn1
\
r7
\
()
.4
H
,
\
r3
\
()
.4
H
,
\
r2
\
()
.4
H
trn2
\
r6
\
()
.4
H
,
\
r3
\
()
.4
H
,
\
r2
\
()
.4
H
trn1
\
r0
\
()
.2
S
,
\
r4
\
()
.2
S
,
\
r7
\
()
.2
S
trn2
\
r3
\
()
.2
S
,
\
r4
\
()
.2
S
,
\
r7
\
()
.2
S
trn1
\
r1
\
()
.2
S
,
\
r5
\
()
.2
S
,
\
r6
\
()
.2
S
trn2
\
r2
\
()
.2
S
,
\
r5
\
()
.2
S
,
\
r6
\
()
.2
S
.
endm
.
macro
transpose_8x8H
r0
,
r1
,
r2
,
r3
,
r4
,
r5
,
r6
,
r7
,
r8
,
r9
trn1
\
r8
\
()
.8
H
,
\
r0
\
()
.8
H
,
\
r1
\
()
.8
H
trn2
\
r9
\
()
.8
H
,
\
r0
\
()
.8
H
,
\
r1
\
()
.8
H
trn1
\
r1
\
()
.8
H
,
\
r2
\
()
.8
H
,
\
r3
\
()
.8
H
trn2
\
r3
\
()
.8
H
,
\
r2
\
()
.8
H
,
\
r3
\
()
.8
H
trn1
\
r0
\
()
.8
H
,
\
r4
\
()
.8
H
,
\
r5
\
()
.8
H
trn2
\
r5
\
()
.8
H
,
\
r4
\
()
.8
H
,
\
r5
\
()
.8
H
trn1
\
r2
\
()
.8
H
,
\
r6
\
()
.8
H
,
\
r7
\
()
.8
H
trn2
\
r7
\
()
.8
H
,
\
r6
\
()
.8
H
,
\
r7
\
()
.8
H
trn1
\
r4
\
()
.4
S
,
\
r0
\
()
.4
S
,
\
r2
\
()
.4
S
trn2
\
r2
\
()
.4
S
,
\
r0
\
()
.4
S
,
\
r2
\
()
.4
S
trn1
\
r6
\
()
.4
S
,
\
r5
\
()
.4
S
,
\
r7
\
()
.4
S
trn2
\
r7
\
()
.4
S
,
\
r5
\
()
.4
S
,
\
r7
\
()
.4
S
trn1
\
r5
\
()
.4
S
,
\
r9
\
()
.4
S
,
\
r3
\
()
.4
S
trn2
\
r9
\
()
.4
S
,
\
r9
\
()
.4
S
,
\
r3
\
()
.4
S
trn1
\
r3
\
()
.4
S
,
\
r8
\
()
.4
S
,
\
r1
\
()
.4
S
trn2
\
r8
\
()
.4
S
,
\
r8
\
()
.4
S
,
\
r1
\
()
.4
S
trn1
\
r0
\
()
.2
D
,
\
r3
\
()
.2
D
,
\
r4
\
()
.2
D
trn2
\
r4
\
()
.2
D
,
\
r3
\
()
.2
D
,
\
r4
\
()
.2
D
trn1
\
r1
\
()
.2
D
,
\
r5
\
()
.2
D
,
\
r6
\
()
.2
D
trn2
\
r5
\
()
.2
D
,
\
r5
\
()
.2
D
,
\
r6
\
()
.2
D
trn2
\
r6
\
()
.2
D
,
\
r8
\
()
.2
D
,
\
r2
\
()
.2
D
trn1
\
r2
\
()
.2
D
,
\
r8
\
()
.2
D
,
\
r2
\
()
.2
D
trn1
\
r3
\
()
.2
D
,
\
r9
\
()
.2
D
,
\
r7
\
()
.2
D
trn2
\
r7
\
()
.2
D
,
\
r9
\
()
.2
D
,
\
r7
\
()
.2
D
.
endm
libavcodec/h264dsp.c
浏览文件 @
8438b3f0
...
...
@@ -163,6 +163,7 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
}
c
->
h264_find_start_code_candidate
=
h264_find_start_code_candidate_c
;
if
(
ARCH_AARCH64
)
ff_h264dsp_init_aarch64
(
c
,
bit_depth
,
chroma_format_idc
);
if
(
ARCH_ARM
)
ff_h264dsp_init_arm
(
c
,
bit_depth
,
chroma_format_idc
);
if
(
ARCH_PPC
)
ff_h264dsp_init_ppc
(
c
,
bit_depth
,
chroma_format_idc
);
if
(
ARCH_X86
)
ff_h264dsp_init_x86
(
c
,
bit_depth
,
chroma_format_idc
);
...
...
libavcodec/h264dsp.h
浏览文件 @
8438b3f0
...
...
@@ -118,6 +118,8 @@ typedef struct H264DSPContext {
void
ff_h264dsp_init
(
H264DSPContext
*
c
,
const
int
bit_depth
,
const
int
chroma_format_idc
);
void
ff_h264dsp_init_aarch64
(
H264DSPContext
*
c
,
const
int
bit_depth
,
const
int
chroma_format_idc
);
void
ff_h264dsp_init_arm
(
H264DSPContext
*
c
,
const
int
bit_depth
,
const
int
chroma_format_idc
);
void
ff_h264dsp_init_ppc
(
H264DSPContext
*
c
,
const
int
bit_depth
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录