Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
小白菜888
Ffmpeg
提交
7b19e76a
F
Ffmpeg
项目概览
小白菜888
/
Ffmpeg
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
F
Ffmpeg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
7b19e76a
编写于
8月 15, 2017
作者:
P
Paul B Mahol
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
avfilter/vf_transpose: rewrite for x86 SIMD
Transpose first in chunks of 8x8 blocks. 15% faster overall.
上级
cbd8e070
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
141 addition
and
41 deletion
+141
-41
libavfilter/vf_transpose.c
libavfilter/vf_transpose.c
+141
-41
未找到文件。
libavfilter/vf_transpose.c
浏览文件 @
7b19e76a
...
...
@@ -58,6 +58,12 @@ typedef struct TransContext {
int
passthrough
;
///< PassthroughType, landscape passthrough mode enabled
int
dir
;
///< TransposeDir
void
(
*
transpose_8x8
)(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
);
void
(
*
transpose_block
)(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
,
int
w
,
int
h
);
}
TransContext
;
static
int
query_formats
(
AVFilterContext
*
ctx
)
...
...
@@ -79,6 +85,109 @@ static int query_formats(AVFilterContext *ctx)
return
ff_set_common_formats
(
ctx
,
pix_fmts
);
}
static
inline
void
transpose_block_8_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
,
int
w
,
int
h
)
{
int
x
,
y
;
for
(
y
=
0
;
y
<
h
;
y
++
,
dst
+=
dst_linesize
,
src
++
)
for
(
x
=
0
;
x
<
w
;
x
++
)
dst
[
x
]
=
src
[
x
*
src_linesize
];
}
static
void
transpose_8x8_8_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
)
{
transpose_block_8_c
(
src
,
src_linesize
,
dst
,
dst_linesize
,
8
,
8
);
}
static
inline
void
transpose_block_16_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
,
int
w
,
int
h
)
{
int
x
,
y
;
for
(
y
=
0
;
y
<
h
;
y
++
,
dst
+=
dst_linesize
,
src
+=
2
)
for
(
x
=
0
;
x
<
w
;
x
++
)
*
((
uint16_t
*
)(
dst
+
2
*
x
))
=
*
((
uint16_t
*
)(
src
+
x
*
src_linesize
));
}
static
void
transpose_8x8_16_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
)
{
transpose_block_16_c
(
src
,
src_linesize
,
dst
,
dst_linesize
,
8
,
8
);
}
static
inline
void
transpose_block_24_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
,
int
w
,
int
h
)
{
int
x
,
y
;
for
(
y
=
0
;
y
<
h
;
y
++
,
dst
+=
dst_linesize
)
{
for
(
x
=
0
;
x
<
w
;
x
++
)
{
int32_t
v
=
AV_RB24
(
src
+
x
*
src_linesize
+
y
*
3
);
AV_WB24
(
dst
+
3
*
x
,
v
);
}
}
}
static
void
transpose_8x8_24_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
)
{
transpose_block_24_c
(
src
,
src_linesize
,
dst
,
dst_linesize
,
8
,
8
);
}
static
inline
void
transpose_block_32_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
,
int
w
,
int
h
)
{
int
x
,
y
;
for
(
y
=
0
;
y
<
h
;
y
++
,
dst
+=
dst_linesize
,
src
+=
4
)
{
for
(
x
=
0
;
x
<
w
;
x
++
)
*
((
uint32_t
*
)(
dst
+
4
*
x
))
=
*
((
uint32_t
*
)(
src
+
x
*
src_linesize
));
}
}
static
void
transpose_8x8_32_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
)
{
transpose_block_32_c
(
src
,
src_linesize
,
dst
,
dst_linesize
,
8
,
8
);
}
static
inline
void
transpose_block_48_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
,
int
w
,
int
h
)
{
int
x
,
y
;
for
(
y
=
0
;
y
<
h
;
y
++
,
dst
+=
dst_linesize
,
src
+=
6
)
{
for
(
x
=
0
;
x
<
w
;
x
++
)
{
int64_t
v
=
AV_RB48
(
src
+
x
*
src_linesize
);
AV_WB48
(
dst
+
6
*
x
,
v
);
}
}
}
static
void
transpose_8x8_48_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
)
{
transpose_block_48_c
(
src
,
src_linesize
,
dst
,
dst_linesize
,
8
,
8
);
}
static
inline
void
transpose_block_64_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
,
int
w
,
int
h
)
{
int
x
,
y
;
for
(
y
=
0
;
y
<
h
;
y
++
,
dst
+=
dst_linesize
,
src
+=
8
)
for
(
x
=
0
;
x
<
w
;
x
++
)
*
((
uint64_t
*
)(
dst
+
8
*
x
))
=
*
((
uint64_t
*
)(
src
+
x
*
src_linesize
));
}
static
void
transpose_8x8_64_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
)
{
transpose_block_64_c
(
src
,
src_linesize
,
dst
,
dst_linesize
,
8
,
8
);
}
static
int
config_props_output
(
AVFilterLink
*
outlink
)
{
AVFilterContext
*
ctx
=
outlink
->
src
;
...
...
@@ -118,6 +227,21 @@ static int config_props_output(AVFilterLink *outlink)
else
outlink
->
sample_aspect_ratio
=
inlink
->
sample_aspect_ratio
;
switch
(
s
->
pixsteps
[
0
])
{
case
1
:
s
->
transpose_block
=
transpose_block_8_c
;
s
->
transpose_8x8
=
transpose_8x8_8_c
;
break
;
case
2
:
s
->
transpose_block
=
transpose_block_16_c
;
s
->
transpose_8x8
=
transpose_8x8_16_c
;
break
;
case
3
:
s
->
transpose_block
=
transpose_block_24_c
;
s
->
transpose_8x8
=
transpose_8x8_24_c
;
break
;
case
4
:
s
->
transpose_block
=
transpose_block_32_c
;
s
->
transpose_8x8
=
transpose_8x8_32_c
;
break
;
case
6
:
s
->
transpose_block
=
transpose_block_48_c
;
s
->
transpose_8x8
=
transpose_8x8_48_c
;
break
;
case
8
:
s
->
transpose_block
=
transpose_block_64_c
;
s
->
transpose_8x8
=
transpose_8x8_64_c
;
break
;
}
av_log
(
ctx
,
AV_LOG_VERBOSE
,
"w:%d h:%d dir:%d -> w:%d h:%d rotation:%s vflip:%d
\n
"
,
inlink
->
w
,
inlink
->
h
,
s
->
dir
,
outlink
->
w
,
outlink
->
h
,
...
...
@@ -176,49 +300,25 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr,
dstlinesize
*=
-
1
;
}
switch
(
pixstep
)
{
case
1
:
for
(
y
=
start
;
y
<
end
;
y
++
,
dst
+=
dstlinesize
)
for
(
x
=
0
;
x
<
outw
;
x
++
)
dst
[
x
]
=
src
[
x
*
srclinesize
+
y
];
break
;
case
2
:
for
(
y
=
start
;
y
<
end
;
y
++
,
dst
+=
dstlinesize
)
{
for
(
x
=
0
;
x
<
outw
;
x
++
)
*
((
uint16_t
*
)(
dst
+
2
*
x
))
=
*
((
uint16_t
*
)(
src
+
x
*
srclinesize
+
y
*
2
));
}
break
;
case
3
:
for
(
y
=
start
;
y
<
end
;
y
++
,
dst
+=
dstlinesize
)
{
for
(
x
=
0
;
x
<
outw
;
x
++
)
{
int32_t
v
=
AV_RB24
(
src
+
x
*
srclinesize
+
y
*
3
);
AV_WB24
(
dst
+
3
*
x
,
v
);
}
}
break
;
case
4
:
for
(
y
=
start
;
y
<
end
;
y
++
,
dst
+=
dstlinesize
)
{
for
(
x
=
0
;
x
<
outw
;
x
++
)
*
((
uint32_t
*
)(
dst
+
4
*
x
))
=
*
((
uint32_t
*
)(
src
+
x
*
srclinesize
+
y
*
4
));
for
(
y
=
start
;
y
<
end
-
7
;
y
+=
8
)
{
for
(
x
=
0
;
x
<
outw
-
7
;
x
+=
8
)
{
s
->
transpose_8x8
(
src
+
x
*
srclinesize
+
y
*
pixstep
,
srclinesize
,
dst
+
(
y
-
start
)
*
dstlinesize
+
x
*
pixstep
,
dstlinesize
);
}
break
;
case
6
:
for
(
y
=
start
;
y
<
end
;
y
++
,
dst
+=
dstlinesize
)
{
for
(
x
=
0
;
x
<
outw
;
x
++
)
{
int64_t
v
=
AV_RB48
(
src
+
x
*
srclinesize
+
y
*
6
);
AV_WB48
(
dst
+
6
*
x
,
v
);
}
}
break
;
case
8
:
for
(
y
=
start
;
y
<
end
;
y
++
,
dst
+=
dstlinesize
)
{
for
(
x
=
0
;
x
<
outw
;
x
++
)
*
((
uint64_t
*
)(
dst
+
8
*
x
))
=
*
((
uint64_t
*
)(
src
+
x
*
srclinesize
+
y
*
8
));
}
break
;
if
(
outw
-
x
>
0
&&
end
-
y
>
0
)
s
->
transpose_block
(
src
+
x
*
srclinesize
+
y
*
pixstep
,
srclinesize
,
dst
+
(
y
-
start
)
*
dstlinesize
+
x
*
pixstep
,
dstlinesize
,
outw
-
x
,
end
-
y
);
}
if
(
end
-
y
>
0
)
s
->
transpose_block
(
src
+
0
*
srclinesize
+
y
*
pixstep
,
srclinesize
,
dst
+
(
y
-
start
)
*
dstlinesize
+
0
*
pixstep
,
dstlinesize
,
outw
,
end
-
y
);
}
return
0
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录