Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
小白菜888
Ffmpeg
提交
55aa03b9
F
Ffmpeg
项目概览
小白菜888
/
Ffmpeg
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
F
Ffmpeg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
55aa03b9
编写于
1月 19, 2013
作者:
R
Ronald S. Bultje
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
floatdsp: move vector_fmul_add from dsputil to avfloatdsp.
上级
0881cbf3
变更
18
隐藏空白更改
内联
并排
Showing
18 changed file
with
131 addition
and
108 deletion
+131
-108
libavcodec/aacsbr.c
libavcodec/aacsbr.c
+5
-5
libavcodec/arm/dsputil_init_neon.c
libavcodec/arm/dsputil_init_neon.c
+0
-3
libavcodec/arm/dsputil_neon.S
libavcodec/arm/dsputil_neon.S
+0
-27
libavcodec/dsputil.c
libavcodec/dsputil.c
+0
-7
libavcodec/dsputil.h
libavcodec/dsputil.h
+0
-2
libavcodec/ppc/float_altivec.c
libavcodec/ppc/float_altivec.c
+0
-25
libavcodec/wmadec.c
libavcodec/wmadec.c
+4
-4
libavcodec/x86/dsputil.asm
libavcodec/x86/dsputil.asm
+0
-28
libavcodec/x86/dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+0
-7
libavutil/arm/float_dsp_init_neon.c
libavutil/arm/float_dsp_init_neon.c
+4
-0
libavutil/arm/float_dsp_neon.S
libavutil/arm/float_dsp_neon.S
+27
-0
libavutil/float_dsp.c
libavutil/float_dsp.c
+9
-0
libavutil/float_dsp.h
libavutil/float_dsp.h
+18
-0
libavutil/ppc/float_dsp_altivec.c
libavutil/ppc/float_dsp_altivec.c
+24
-0
libavutil/ppc/float_dsp_altivec.h
libavutil/ppc/float_dsp_altivec.h
+4
-0
libavutil/ppc/float_dsp_init.c
libavutil/ppc/float_dsp_init.c
+1
-0
libavutil/x86/float_dsp.asm
libavutil/x86/float_dsp.asm
+28
-0
libavutil/x86/float_dsp_init.c
libavutil/x86/float_dsp_init.c
+7
-0
未找到文件。
libavcodec/aacsbr.c
浏览文件 @
55aa03b9
...
@@ -1172,8 +1172,8 @@ static void sbr_qmf_analysis(DSPContext *dsp, FFTContext *mdct,
...
@@ -1172,8 +1172,8 @@ static void sbr_qmf_analysis(DSPContext *dsp, FFTContext *mdct,
* Synthesis QMF Bank (14496-3 sp04 p206) and Downsampled Synthesis QMF Bank
* Synthesis QMF Bank (14496-3 sp04 p206) and Downsampled Synthesis QMF Bank
* (14496-3 sp04 p206)
* (14496-3 sp04 p206)
*/
*/
static
void
sbr_qmf_synthesis
(
DSPContext
*
dsp
,
FFTContext
*
mdct
,
static
void
sbr_qmf_synthesis
(
FFTContext
*
mdct
,
SBRDSPContext
*
sbrdsp
,
AVFloatDSPContext
*
f
dsp
,
SBRDSPContext
*
sbrdsp
,
AVFloatDSPContext
*
dsp
,
float
*
out
,
float
X
[
2
][
38
][
64
],
float
*
out
,
float
X
[
2
][
38
][
64
],
float
mdct_buf
[
2
][
64
],
float
mdct_buf
[
2
][
64
],
float
*
v0
,
int
*
v_off
,
const
unsigned
int
div
)
float
*
v0
,
int
*
v_off
,
const
unsigned
int
div
)
...
@@ -1204,7 +1204,7 @@ static void sbr_qmf_synthesis(DSPContext *dsp, FFTContext *mdct,
...
@@ -1204,7 +1204,7 @@ static void sbr_qmf_synthesis(DSPContext *dsp, FFTContext *mdct,
mdct
->
imdct_half
(
mdct
,
mdct_buf
[
1
],
X
[
1
][
i
]);
mdct
->
imdct_half
(
mdct
,
mdct_buf
[
1
],
X
[
1
][
i
]);
sbrdsp
->
qmf_deint_bfly
(
v
,
mdct_buf
[
1
],
mdct_buf
[
0
]);
sbrdsp
->
qmf_deint_bfly
(
v
,
mdct_buf
[
1
],
mdct_buf
[
0
]);
}
}
fdsp
->
vector_fmul
(
out
,
v
,
sbr_qmf_window
,
64
>>
div
);
dsp
->
vector_fmul
(
out
,
v
,
sbr_qmf_window
,
64
>>
div
);
dsp
->
vector_fmul_add
(
out
,
v
+
(
192
>>
div
),
sbr_qmf_window
+
(
64
>>
div
),
out
,
64
>>
div
);
dsp
->
vector_fmul_add
(
out
,
v
+
(
192
>>
div
),
sbr_qmf_window
+
(
64
>>
div
),
out
,
64
>>
div
);
dsp
->
vector_fmul_add
(
out
,
v
+
(
256
>>
div
),
sbr_qmf_window
+
(
128
>>
div
),
out
,
64
>>
div
);
dsp
->
vector_fmul_add
(
out
,
v
+
(
256
>>
div
),
sbr_qmf_window
+
(
128
>>
div
),
out
,
64
>>
div
);
dsp
->
vector_fmul_add
(
out
,
v
+
(
448
>>
div
),
sbr_qmf_window
+
(
192
>>
div
),
out
,
64
>>
div
);
dsp
->
vector_fmul_add
(
out
,
v
+
(
448
>>
div
),
sbr_qmf_window
+
(
192
>>
div
),
out
,
64
>>
div
);
...
@@ -1702,13 +1702,13 @@ void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
...
@@ -1702,13 +1702,13 @@ void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
nch
=
2
;
nch
=
2
;
}
}
sbr_qmf_synthesis
(
&
ac
->
dsp
,
&
sbr
->
mdct
,
&
sbr
->
dsp
,
&
ac
->
fdsp
,
sbr_qmf_synthesis
(
&
sbr
->
mdct
,
&
sbr
->
dsp
,
&
ac
->
fdsp
,
L
,
sbr
->
X
[
0
],
sbr
->
qmf_filter_scratch
,
L
,
sbr
->
X
[
0
],
sbr
->
qmf_filter_scratch
,
sbr
->
data
[
0
].
synthesis_filterbank_samples
,
sbr
->
data
[
0
].
synthesis_filterbank_samples
,
&
sbr
->
data
[
0
].
synthesis_filterbank_samples_offset
,
&
sbr
->
data
[
0
].
synthesis_filterbank_samples_offset
,
downsampled
);
downsampled
);
if
(
nch
==
2
)
if
(
nch
==
2
)
sbr_qmf_synthesis
(
&
ac
->
dsp
,
&
sbr
->
mdct
,
&
sbr
->
dsp
,
&
ac
->
fdsp
,
sbr_qmf_synthesis
(
&
sbr
->
mdct
,
&
sbr
->
dsp
,
&
ac
->
fdsp
,
R
,
sbr
->
X
[
1
],
sbr
->
qmf_filter_scratch
,
R
,
sbr
->
X
[
1
],
sbr
->
qmf_filter_scratch
,
sbr
->
data
[
1
].
synthesis_filterbank_samples
,
sbr
->
data
[
1
].
synthesis_filterbank_samples
,
&
sbr
->
data
[
1
].
synthesis_filterbank_samples_offset
,
&
sbr
->
data
[
1
].
synthesis_filterbank_samples_offset
,
...
...
libavcodec/arm/dsputil_init_neon.c
浏览文件 @
55aa03b9
...
@@ -146,8 +146,6 @@ void ff_butterflies_float_neon(float *v1, float *v2, int len);
...
@@ -146,8 +146,6 @@ void ff_butterflies_float_neon(float *v1, float *v2, int len);
float
ff_scalarproduct_float_neon
(
const
float
*
v1
,
const
float
*
v2
,
int
len
);
float
ff_scalarproduct_float_neon
(
const
float
*
v1
,
const
float
*
v2
,
int
len
);
void
ff_vector_fmul_reverse_neon
(
float
*
dst
,
const
float
*
src0
,
void
ff_vector_fmul_reverse_neon
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
const
float
*
src1
,
int
len
);
void
ff_vector_fmul_add_neon
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
void
ff_vector_clipf_neon
(
float
*
dst
,
const
float
*
src
,
float
min
,
float
max
,
void
ff_vector_clipf_neon
(
float
*
dst
,
const
float
*
src
,
float
min
,
float
max
,
int
len
);
int
len
);
...
@@ -301,7 +299,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
...
@@ -301,7 +299,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c
->
butterflies_float
=
ff_butterflies_float_neon
;
c
->
butterflies_float
=
ff_butterflies_float_neon
;
c
->
scalarproduct_float
=
ff_scalarproduct_float_neon
;
c
->
scalarproduct_float
=
ff_scalarproduct_float_neon
;
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_neon
;
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_neon
;
c
->
vector_fmul_add
=
ff_vector_fmul_add_neon
;
c
->
vector_clipf
=
ff_vector_clipf_neon
;
c
->
vector_clipf
=
ff_vector_clipf_neon
;
c
->
vector_clip_int32
=
ff_vector_clip_int32_neon
;
c
->
vector_clip_int32
=
ff_vector_clip_int32_neon
;
...
...
libavcodec/arm/dsputil_neon.S
浏览文件 @
55aa03b9
...
@@ -580,33 +580,6 @@ function ff_vector_fmul_reverse_neon, export=1
...
@@ -580,33 +580,6 @@ function ff_vector_fmul_reverse_neon, export=1
bx
lr
bx
lr
endfunc
endfunc
function
ff_vector_fmul_add_neon
,
export
=
1
ldr
r12
,
[
sp
]
vld1.32
{
q0
-
q1
},
[
r1
,:
128
]!
vld1.32
{
q8
-
q9
},
[
r2
,:
128
]!
vld1.32
{
q2
-
q3
},
[
r3
,:
128
]!
vmul.f32
q10
,
q0
,
q8
vmul.f32
q11
,
q1
,
q9
1
:
vadd.f32
q12
,
q2
,
q10
vadd.f32
q13
,
q3
,
q11
pld
[
r1
,
#
16
]
pld
[
r2
,
#
16
]
pld
[
r3
,
#
16
]
subs
r12
,
r12
,
#
8
beq
2
f
vld1.32
{
q0
},
[
r1
,:
128
]!
vld1.32
{
q8
},
[
r2
,:
128
]!
vmul.f32
q10
,
q0
,
q8
vld1.32
{
q1
},
[
r1
,:
128
]!
vld1.32
{
q9
},
[
r2
,:
128
]!
vmul.f32
q11
,
q1
,
q9
vld1.32
{
q2
-
q3
},
[
r3
,:
128
]!
vst1.32
{
q12
-
q13
},[
r0
,:
128
]!
b
1
b
2
:
vst1.32
{
q12
-
q13
},[
r0
,:
128
]!
bx
lr
endfunc
function
ff_vector_clipf_neon
,
export
=
1
function
ff_vector_clipf_neon
,
export
=
1
VFP
vdup.32
q1
,
d0
[
1
]
VFP
vdup.32
q1
,
d0
[
1
]
VFP
vdup.32
q0
,
d0
[
0
]
VFP
vdup.32
q0
,
d0
[
0
]
...
...
libavcodec/dsputil.c
浏览文件 @
55aa03b9
...
@@ -2360,12 +2360,6 @@ static void vector_fmul_reverse_c(float *dst, const float *src0, const float *sr
...
@@ -2360,12 +2360,6 @@ static void vector_fmul_reverse_c(float *dst, const float *src0, const float *sr
dst
[
i
]
=
src0
[
i
]
*
src1
[
-
i
];
dst
[
i
]
=
src0
[
i
]
*
src1
[
-
i
];
}
}
static
void
vector_fmul_add_c
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
){
int
i
;
for
(
i
=
0
;
i
<
len
;
i
++
)
dst
[
i
]
=
src0
[
i
]
*
src1
[
i
]
+
src2
[
i
];
}
static
void
butterflies_float_c
(
float
*
restrict
v1
,
float
*
restrict
v2
,
static
void
butterflies_float_c
(
float
*
restrict
v1
,
float
*
restrict
v2
,
int
len
)
int
len
)
{
{
...
@@ -2714,7 +2708,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
...
@@ -2714,7 +2708,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
c
->
add_8x8basis
=
add_8x8basis_c
;
c
->
add_8x8basis
=
add_8x8basis_c
;
c
->
vector_fmul_reverse
=
vector_fmul_reverse_c
;
c
->
vector_fmul_reverse
=
vector_fmul_reverse_c
;
c
->
vector_fmul_add
=
vector_fmul_add_c
;
c
->
vector_clipf
=
vector_clipf_c
;
c
->
vector_clipf
=
vector_clipf_c
;
c
->
scalarproduct_int16
=
scalarproduct_int16_c
;
c
->
scalarproduct_int16
=
scalarproduct_int16_c
;
c
->
scalarproduct_and_madd_int16
=
scalarproduct_and_madd_int16_c
;
c
->
scalarproduct_and_madd_int16
=
scalarproduct_and_madd_int16_c
;
...
...
libavcodec/dsputil.h
浏览文件 @
55aa03b9
...
@@ -342,8 +342,6 @@ typedef struct DSPContext {
...
@@ -342,8 +342,6 @@ typedef struct DSPContext {
/* assume len is a multiple of 16, and arrays are 32-byte aligned */
/* assume len is a multiple of 16, and arrays are 32-byte aligned */
void
(
*
vector_fmul_reverse
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
void
(
*
vector_fmul_reverse
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
void
(
*
vector_fmul_add
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
void
(
*
vector_clipf
)(
float
*
dst
/* align 16 */
,
const
float
*
src
/* align 16 */
,
float
min
,
float
max
,
int
len
/* align 16 */
);
void
(
*
vector_clipf
)(
float
*
dst
/* align 16 */
,
const
float
*
src
/* align 16 */
,
float
min
,
float
max
,
int
len
/* align 16 */
);
/**
/**
...
...
libavcodec/ppc/float_altivec.c
浏览文件 @
55aa03b9
...
@@ -51,32 +51,7 @@ static void vector_fmul_reverse_altivec(float *dst, const float *src0,
...
@@ -51,32 +51,7 @@ static void vector_fmul_reverse_altivec(float *dst, const float *src0,
}
}
}
}
static
void
vector_fmul_add_altivec
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
)
{
int
i
;
vector
float
d
,
s0
,
s1
,
s2
,
t0
,
t1
,
edges
;
vector
unsigned
char
align
=
vec_lvsr
(
0
,
dst
),
mask
=
vec_lvsl
(
0
,
dst
);
for
(
i
=
0
;
i
<
len
-
3
;
i
+=
4
)
{
t0
=
vec_ld
(
0
,
dst
+
i
);
t1
=
vec_ld
(
15
,
dst
+
i
);
s0
=
vec_ld
(
0
,
src0
+
i
);
s1
=
vec_ld
(
0
,
src1
+
i
);
s2
=
vec_ld
(
0
,
src2
+
i
);
edges
=
vec_perm
(
t1
,
t0
,
mask
);
d
=
vec_madd
(
s0
,
s1
,
s2
);
t1
=
vec_perm
(
d
,
edges
,
align
);
t0
=
vec_perm
(
edges
,
d
,
align
);
vec_st
(
t1
,
15
,
dst
+
i
);
vec_st
(
t0
,
0
,
dst
+
i
);
}
}
void
ff_float_init_altivec
(
DSPContext
*
c
,
AVCodecContext
*
avctx
)
void
ff_float_init_altivec
(
DSPContext
*
c
,
AVCodecContext
*
avctx
)
{
{
c
->
vector_fmul_reverse
=
vector_fmul_reverse_altivec
;
c
->
vector_fmul_reverse
=
vector_fmul_reverse_altivec
;
c
->
vector_fmul_add
=
vector_fmul_add_altivec
;
}
}
libavcodec/wmadec.c
浏览文件 @
55aa03b9
...
@@ -379,16 +379,16 @@ static void wma_window(WMACodecContext *s, float *out)
...
@@ -379,16 +379,16 @@ static void wma_window(WMACodecContext *s, float *out)
block_len
=
s
->
block_len
;
block_len
=
s
->
block_len
;
bsize
=
s
->
frame_len_bits
-
s
->
block_len_bits
;
bsize
=
s
->
frame_len_bits
-
s
->
block_len_bits
;
s
->
dsp
.
vector_fmul_add
(
out
,
in
,
s
->
windows
[
bsize
],
s
->
f
dsp
.
vector_fmul_add
(
out
,
in
,
s
->
windows
[
bsize
],
out
,
block_len
);
out
,
block_len
);
}
else
{
}
else
{
block_len
=
1
<<
s
->
prev_block_len_bits
;
block_len
=
1
<<
s
->
prev_block_len_bits
;
n
=
(
s
->
block_len
-
block_len
)
/
2
;
n
=
(
s
->
block_len
-
block_len
)
/
2
;
bsize
=
s
->
frame_len_bits
-
s
->
prev_block_len_bits
;
bsize
=
s
->
frame_len_bits
-
s
->
prev_block_len_bits
;
s
->
dsp
.
vector_fmul_add
(
out
+
n
,
in
+
n
,
s
->
windows
[
bsize
],
s
->
f
dsp
.
vector_fmul_add
(
out
+
n
,
in
+
n
,
s
->
windows
[
bsize
],
out
+
n
,
block_len
);
out
+
n
,
block_len
);
memcpy
(
out
+
n
+
block_len
,
in
+
n
+
block_len
,
n
*
sizeof
(
float
));
memcpy
(
out
+
n
+
block_len
,
in
+
n
+
block_len
,
n
*
sizeof
(
float
));
}
}
...
...
libavcodec/x86/dsputil.asm
浏览文件 @
55aa03b9
...
@@ -604,34 +604,6 @@ VECTOR_FMUL_REVERSE
...
@@ -604,34 +604,6 @@ VECTOR_FMUL_REVERSE
INIT_YMM
avx
INIT_YMM
avx
VECTOR_FMUL_REVERSE
VECTOR_FMUL_REVERSE
;-----------------------------------------------------------------------------
; vector_fmul_add(float *dst, const float *src0, const float *src1,
; const float *src2, int len)
;-----------------------------------------------------------------------------
%macro VECTOR_FMUL_ADD 0
cglobal
vector_fmul_add
,
5
,
5
,
2
,
ds
t
,
src0
,
src1
,
src2
,
len
lea
lenq
,
[
lend
*
4
-
2
*
mmsize
]
ALIGN
16
.loop:
mova
m0
,
[
src0q
+
lenq
]
mova
m1
,
[
src0q
+
lenq
+
mmsize
]
mulps
m0
,
m0
,
[
src1q
+
lenq
]
mulps
m1
,
m1
,
[
src1q
+
lenq
+
mmsize
]
addps
m0
,
m0
,
[
src2q
+
lenq
]
addps
m1
,
m1
,
[
src2q
+
lenq
+
mmsize
]
mova
[
ds
tq
+
lenq
],
m0
mova
[
ds
tq
+
lenq
+
mmsize
],
m1
sub
lenq
,
2
*
mmsize
jge
.loop
REP_RET
%endmacro
INIT_XMM
ss
e
VECTOR_FMUL_ADD
INIT_YMM
avx
VECTOR_FMUL_ADD
; %1 = aligned/unaligned
; %1 = aligned/unaligned
%macro BSWAP_LOOPS 1
%macro BSWAP_LOOPS 1
mov
r3
,
r2
mov
r3
,
r2
...
...
libavcodec/x86/dsputil_mmx.c
浏览文件 @
55aa03b9
...
@@ -1853,11 +1853,6 @@ void ff_vector_fmul_reverse_sse(float *dst, const float *src0,
...
@@ -1853,11 +1853,6 @@ void ff_vector_fmul_reverse_sse(float *dst, const float *src0,
void
ff_vector_fmul_reverse_avx
(
float
*
dst
,
const
float
*
src0
,
void
ff_vector_fmul_reverse_avx
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
const
float
*
src1
,
int
len
);
void
ff_vector_fmul_add_sse
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
void
ff_vector_fmul_add_avx
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
void
ff_vector_clip_int32_mmx
(
int32_t
*
dst
,
const
int32_t
*
src
,
void
ff_vector_clip_int32_mmx
(
int32_t
*
dst
,
const
int32_t
*
src
,
int32_t
min
,
int32_t
max
,
unsigned
int
len
);
int32_t
min
,
int32_t
max
,
unsigned
int
len
);
void
ff_vector_clip_int32_sse2
(
int32_t
*
dst
,
const
int32_t
*
src
,
void
ff_vector_clip_int32_sse2
(
int32_t
*
dst
,
const
int32_t
*
src
,
...
@@ -2141,7 +2136,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
...
@@ -2141,7 +2136,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
#if HAVE_YASM
#if HAVE_YASM
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_sse
;
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_sse
;
c
->
vector_fmul_add
=
ff_vector_fmul_add_sse
;
c
->
scalarproduct_float
=
ff_scalarproduct_float_sse
;
c
->
scalarproduct_float
=
ff_scalarproduct_float_sse
;
#endif
/* HAVE_YASM */
#endif
/* HAVE_YASM */
...
@@ -2295,7 +2289,6 @@ static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
...
@@ -2295,7 +2289,6 @@ static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
}
}
}
}
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_avx
;
c
->
vector_fmul_reverse
=
ff_vector_fmul_reverse_avx
;
c
->
vector_fmul_add
=
ff_vector_fmul_add_avx
;
#endif
/* HAVE_AVX_EXTERNAL */
#endif
/* HAVE_AVX_EXTERNAL */
}
}
...
...
libavutil/arm/float_dsp_init_neon.c
浏览文件 @
55aa03b9
...
@@ -35,10 +35,14 @@ void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
...
@@ -35,10 +35,14 @@ void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
void
ff_vector_fmul_window_neon
(
float
*
dst
,
const
float
*
src0
,
void
ff_vector_fmul_window_neon
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
);
const
float
*
src1
,
const
float
*
win
,
int
len
);
void
ff_vector_fmul_add_neon
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
void
ff_float_dsp_init_neon
(
AVFloatDSPContext
*
fdsp
)
void
ff_float_dsp_init_neon
(
AVFloatDSPContext
*
fdsp
)
{
{
fdsp
->
vector_fmul
=
ff_vector_fmul_neon
;
fdsp
->
vector_fmul
=
ff_vector_fmul_neon
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_neon
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_neon
;
fdsp
->
vector_fmul_scalar
=
ff_vector_fmul_scalar_neon
;
fdsp
->
vector_fmul_scalar
=
ff_vector_fmul_scalar_neon
;
fdsp
->
vector_fmul_window
=
ff_vector_fmul_window_neon
;
fdsp
->
vector_fmul_window
=
ff_vector_fmul_window_neon
;
fdsp
->
vector_fmul_add
=
ff_vector_fmul_add_neon
;
}
}
libavutil/arm/float_dsp_neon.S
浏览文件 @
55aa03b9
...
@@ -193,3 +193,30 @@ function ff_vector_fmul_window_neon, export=1
...
@@ -193,3 +193,30 @@ function ff_vector_fmul_window_neon, export=1
vst1.32
{
d22
,
d23
},[
ip
,:
128
],
r5
vst1.32
{
d22
,
d23
},[
ip
,:
128
],
r5
pop
{
r4
,
r5
,
pc
}
pop
{
r4
,
r5
,
pc
}
endfunc
endfunc
function
ff_vector_fmul_add_neon
,
export
=
1
ldr
r12
,
[
sp
]
vld1.32
{
q0
-
q1
},
[
r1
,:
128
]!
vld1.32
{
q8
-
q9
},
[
r2
,:
128
]!
vld1.32
{
q2
-
q3
},
[
r3
,:
128
]!
vmul.f32
q10
,
q0
,
q8
vmul.f32
q11
,
q1
,
q9
1
:
vadd.f32
q12
,
q2
,
q10
vadd.f32
q13
,
q3
,
q11
pld
[
r1
,
#
16
]
pld
[
r2
,
#
16
]
pld
[
r3
,
#
16
]
subs
r12
,
r12
,
#
8
beq
2
f
vld1.32
{
q0
},
[
r1
,:
128
]!
vld1.32
{
q8
},
[
r2
,:
128
]!
vmul.f32
q10
,
q0
,
q8
vld1.32
{
q1
},
[
r1
,:
128
]!
vld1.32
{
q9
},
[
r2
,:
128
]!
vmul.f32
q11
,
q1
,
q9
vld1.32
{
q2
-
q3
},
[
r3
,:
128
]!
vst1.32
{
q12
-
q13
},[
r0
,:
128
]!
b
1
b
2
:
vst1.32
{
q12
-
q13
},[
r0
,:
128
]!
bx
lr
endfunc
libavutil/float_dsp.c
浏览文件 @
55aa03b9
...
@@ -71,6 +71,14 @@ static void vector_fmul_window_c(float *dst, const float *src0,
...
@@ -71,6 +71,14 @@ static void vector_fmul_window_c(float *dst, const float *src0,
}
}
}
}
static
void
vector_fmul_add_c
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
){
int
i
;
for
(
i
=
0
;
i
<
len
;
i
++
)
dst
[
i
]
=
src0
[
i
]
*
src1
[
i
]
+
src2
[
i
];
}
void
avpriv_float_dsp_init
(
AVFloatDSPContext
*
fdsp
,
int
bit_exact
)
void
avpriv_float_dsp_init
(
AVFloatDSPContext
*
fdsp
,
int
bit_exact
)
{
{
fdsp
->
vector_fmul
=
vector_fmul_c
;
fdsp
->
vector_fmul
=
vector_fmul_c
;
...
@@ -78,6 +86,7 @@ void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
...
@@ -78,6 +86,7 @@ void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
fdsp
->
vector_fmul_scalar
=
vector_fmul_scalar_c
;
fdsp
->
vector_fmul_scalar
=
vector_fmul_scalar_c
;
fdsp
->
vector_dmul_scalar
=
vector_dmul_scalar_c
;
fdsp
->
vector_dmul_scalar
=
vector_dmul_scalar_c
;
fdsp
->
vector_fmul_window
=
vector_fmul_window_c
;
fdsp
->
vector_fmul_window
=
vector_fmul_window_c
;
fdsp
->
vector_fmul_add
=
vector_fmul_add_c
;
#if ARCH_ARM
#if ARCH_ARM
ff_float_dsp_init_arm
(
fdsp
);
ff_float_dsp_init_arm
(
fdsp
);
...
...
libavutil/float_dsp.h
浏览文件 @
55aa03b9
...
@@ -100,6 +100,24 @@ typedef struct AVFloatDSPContext {
...
@@ -100,6 +100,24 @@ typedef struct AVFloatDSPContext {
*/
*/
void
(
*
vector_fmul_window
)(
float
*
dst
,
const
float
*
src0
,
void
(
*
vector_fmul_window
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
int
len
);
const
float
*
src1
,
const
float
*
win
,
int
len
);
/**
* Calculate the product of two vectors of floats, add a third vector of
* floats and store the result in a vector of floats.
*
* @param dst output vector
* constraints: 32-byte aligned
* @param src0 first input vector
* constraints: 32-byte aligned
* @param src1 second input vector
* constraints: 32-byte aligned
* @param src1 third input vector
* constraints: 32-byte aligned
* @param len number of elements in the input
* constraints: multiple of 16
*/
void
(
*
vector_fmul_add
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
}
AVFloatDSPContext
;
}
AVFloatDSPContext
;
/**
/**
...
...
libavutil/ppc/float_dsp_altivec.c
浏览文件 @
55aa03b9
...
@@ -69,3 +69,27 @@ void ff_vector_fmul_window_altivec(float *dst, const float *src0,
...
@@ -69,3 +69,27 @@ void ff_vector_fmul_window_altivec(float *dst, const float *src0,
vec_st
(
t1
,
j
,
dst
);
vec_st
(
t1
,
j
,
dst
);
}
}
}
}
void
ff_vector_fmul_add_altivec
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
)
{
int
i
;
vector
float
d
,
s0
,
s1
,
s2
,
t0
,
t1
,
edges
;
vector
unsigned
char
align
=
vec_lvsr
(
0
,
dst
),
mask
=
vec_lvsl
(
0
,
dst
);
for
(
i
=
0
;
i
<
len
-
3
;
i
+=
4
)
{
t0
=
vec_ld
(
0
,
dst
+
i
);
t1
=
vec_ld
(
15
,
dst
+
i
);
s0
=
vec_ld
(
0
,
src0
+
i
);
s1
=
vec_ld
(
0
,
src1
+
i
);
s2
=
vec_ld
(
0
,
src2
+
i
);
edges
=
vec_perm
(
t1
,
t0
,
mask
);
d
=
vec_madd
(
s0
,
s1
,
s2
);
t1
=
vec_perm
(
d
,
edges
,
align
);
t0
=
vec_perm
(
edges
,
d
,
align
);
vec_st
(
t1
,
15
,
dst
+
i
);
vec_st
(
t0
,
0
,
dst
+
i
);
}
}
libavutil/ppc/float_dsp_altivec.h
浏览文件 @
55aa03b9
...
@@ -28,4 +28,8 @@ extern void ff_vector_fmul_window_altivec(float *dst, const float *src0,
...
@@ -28,4 +28,8 @@ extern void ff_vector_fmul_window_altivec(float *dst, const float *src0,
const
float
*
src1
,
const
float
*
win
,
const
float
*
src1
,
const
float
*
win
,
int
len
);
int
len
);
extern
void
ff_vector_fmul_add_altivec
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
#endif
/* AVUTIL_PPC_FLOAT_DSP_ALTIVEC_H */
#endif
/* AVUTIL_PPC_FLOAT_DSP_ALTIVEC_H */
libavutil/ppc/float_dsp_init.c
浏览文件 @
55aa03b9
...
@@ -32,6 +32,7 @@ void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int bit_exact)
...
@@ -32,6 +32,7 @@ void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int bit_exact)
return
;
return
;
fdsp
->
vector_fmul
=
ff_vector_fmul_altivec
;
fdsp
->
vector_fmul
=
ff_vector_fmul_altivec
;
fdsp
->
vector_fmul_add
=
ff_vector_fmul_add_altivec
;
if
(
!
bit_exact
)
{
if
(
!
bit_exact
)
{
fdsp
->
vector_fmul_window
=
ff_vector_fmul_window_altivec
;
fdsp
->
vector_fmul_window
=
ff_vector_fmul_window_altivec
;
...
...
libavutil/x86/float_dsp.asm
浏览文件 @
55aa03b9
...
@@ -162,3 +162,31 @@ VECTOR_DMUL_SCALAR
...
@@ -162,3 +162,31 @@ VECTOR_DMUL_SCALAR
INIT_YMM
avx
INIT_YMM
avx
VECTOR_DMUL_SCALAR
VECTOR_DMUL_SCALAR
%endif
%endif
;-----------------------------------------------------------------------------
; vector_fmul_add(float *dst, const float *src0, const float *src1,
; const float *src2, int len)
;-----------------------------------------------------------------------------
%macro VECTOR_FMUL_ADD 0
cglobal
vector_fmul_add
,
5
,
5
,
2
,
ds
t
,
src0
,
src1
,
src2
,
len
lea
lenq
,
[
lend
*
4
-
2
*
mmsize
]
ALIGN
16
.loop:
mova
m0
,
[
src0q
+
lenq
]
mova
m1
,
[
src0q
+
lenq
+
mmsize
]
mulps
m0
,
m0
,
[
src1q
+
lenq
]
mulps
m1
,
m1
,
[
src1q
+
lenq
+
mmsize
]
addps
m0
,
m0
,
[
src2q
+
lenq
]
addps
m1
,
m1
,
[
src2q
+
lenq
+
mmsize
]
mova
[
ds
tq
+
lenq
],
m0
mova
[
ds
tq
+
lenq
+
mmsize
],
m1
sub
lenq
,
2
*
mmsize
jge
.loop
REP_RET
%endmacro
INIT_XMM
ss
e
VECTOR_FMUL_ADD
INIT_YMM
avx
VECTOR_FMUL_ADD
libavutil/x86/float_dsp_init.c
浏览文件 @
55aa03b9
...
@@ -41,6 +41,11 @@ extern void ff_vector_dmul_scalar_sse2(double *dst, const double *src,
...
@@ -41,6 +41,11 @@ extern void ff_vector_dmul_scalar_sse2(double *dst, const double *src,
extern
void
ff_vector_dmul_scalar_avx
(
double
*
dst
,
const
double
*
src
,
extern
void
ff_vector_dmul_scalar_avx
(
double
*
dst
,
const
double
*
src
,
double
mul
,
int
len
);
double
mul
,
int
len
);
void
ff_vector_fmul_add_sse
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
void
ff_vector_fmul_add_avx
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
len
);
#if HAVE_6REGS && HAVE_INLINE_ASM
#if HAVE_6REGS && HAVE_INLINE_ASM
static
void
vector_fmul_window_3dnowext
(
float
*
dst
,
const
float
*
src0
,
static
void
vector_fmul_window_3dnowext
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
const
float
*
src1
,
const
float
*
win
,
...
@@ -123,6 +128,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
...
@@ -123,6 +128,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
fdsp
->
vector_fmul
=
ff_vector_fmul_sse
;
fdsp
->
vector_fmul
=
ff_vector_fmul_sse
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_sse
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_sse
;
fdsp
->
vector_fmul_scalar
=
ff_vector_fmul_scalar_sse
;
fdsp
->
vector_fmul_scalar
=
ff_vector_fmul_scalar_sse
;
fdsp
->
vector_fmul_add
=
ff_vector_fmul_add_sse
;
}
}
if
(
EXTERNAL_SSE2
(
mm_flags
))
{
if
(
EXTERNAL_SSE2
(
mm_flags
))
{
fdsp
->
vector_dmul_scalar
=
ff_vector_dmul_scalar_sse2
;
fdsp
->
vector_dmul_scalar
=
ff_vector_dmul_scalar_sse2
;
...
@@ -131,5 +137,6 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
...
@@ -131,5 +137,6 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
fdsp
->
vector_fmul
=
ff_vector_fmul_avx
;
fdsp
->
vector_fmul
=
ff_vector_fmul_avx
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_avx
;
fdsp
->
vector_fmac_scalar
=
ff_vector_fmac_scalar_avx
;
fdsp
->
vector_dmul_scalar
=
ff_vector_dmul_scalar_avx
;
fdsp
->
vector_dmul_scalar
=
ff_vector_dmul_scalar_avx
;
fdsp
->
vector_fmul_add
=
ff_vector_fmul_add_avx
;
}
}
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录