Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
小白菜888
Ffmpeg
提交
058bbf48
F
Ffmpeg
项目概览
小白菜888
/
Ffmpeg
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
F
Ffmpeg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
058bbf48
编写于
9月 03, 2019
作者:
P
Paul B Mahol
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
avfilter/vf_v360: x86 SIMD for interpolations
上级
f0d8005e
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
394 addition
and
155 deletion
+394
-155
libavfilter/v360.h
libavfilter/v360.h
+113
-0
libavfilter/vf_v360.c
libavfilter/vf_v360.c
+87
-155
libavfilter/x86/Makefile
libavfilter/x86/Makefile
+2
-0
libavfilter/x86/vf_v360.asm
libavfilter/x86/vf_v360.asm
+142
-0
libavfilter/x86/vf_v360_init.c
libavfilter/x86/vf_v360_init.c
+50
-0
未找到文件。
libavfilter/v360.h
0 → 100644
浏览文件 @
058bbf48
/*
* Copyright (c) 2019 Eugene Lyapustin
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVFILTER_V360_H
#define AVFILTER_V360_H
#include "avfilter.h"
enum
Projections
{
EQUIRECTANGULAR
,
CUBEMAP_3_2
,
CUBEMAP_6_1
,
EQUIANGULAR
,
FLAT
,
DUAL_FISHEYE
,
BARREL
,
CUBEMAP_1_6
,
NB_PROJECTIONS
,
};
enum
InterpMethod
{
NEAREST
,
BILINEAR
,
BICUBIC
,
LANCZOS
,
NB_INTERP_METHODS
,
};
enum
Faces
{
TOP_LEFT
,
TOP_MIDDLE
,
TOP_RIGHT
,
BOTTOM_LEFT
,
BOTTOM_MIDDLE
,
BOTTOM_RIGHT
,
NB_FACES
,
};
enum
Direction
{
RIGHT
,
///< Axis +X
LEFT
,
///< Axis -X
UP
,
///< Axis +Y
DOWN
,
///< Axis -Y
FRONT
,
///< Axis -Z
BACK
,
///< Axis +Z
NB_DIRECTIONS
,
};
enum
Rotation
{
ROT_0
,
ROT_90
,
ROT_180
,
ROT_270
,
NB_ROTATIONS
,
};
typedef
struct
V360Context
{
const
AVClass
*
class
;
int
in
,
out
;
int
interp
;
int
width
,
height
;
char
*
in_forder
;
char
*
out_forder
;
char
*
in_frot
;
char
*
out_frot
;
int
in_cubemap_face_order
[
6
];
int
out_cubemap_direction_order
[
6
];
int
in_cubemap_face_rotation
[
6
];
int
out_cubemap_face_rotation
[
6
];
float
in_pad
,
out_pad
;
float
yaw
,
pitch
,
roll
;
int
h_flip
,
v_flip
,
d_flip
;
float
h_fov
,
v_fov
;
float
flat_range
[
3
];
int
planewidth
[
4
],
planeheight
[
4
];
int
inplanewidth
[
4
],
inplaneheight
[
4
];
int
nb_planes
;
uint16_t
*
u
[
4
],
*
v
[
4
];
int16_t
*
ker
[
4
];
int
(
*
remap_slice
)(
AVFilterContext
*
ctx
,
void
*
arg
,
int
jobnr
,
int
nb_jobs
);
void
(
*
remap_line
)(
uint8_t
*
dst
,
int
width
,
const
uint8_t
*
src
,
ptrdiff_t
in_linesize
,
const
uint16_t
*
u
,
const
uint16_t
*
v
,
const
int16_t
*
ker
);
}
V360Context
;
void
ff_v360_init
(
V360Context
*
s
,
int
depth
);
void
ff_v360_init_x86
(
V360Context
*
s
,
int
depth
);
#endif
/* AVFILTER_V360_H */
libavfilter/vf_v360.c
浏览文件 @
058bbf48
...
...
@@ -41,88 +41,7 @@
#include "formats.h"
#include "internal.h"
#include "video.h"
enum
Projections
{
EQUIRECTANGULAR
,
CUBEMAP_3_2
,
CUBEMAP_6_1
,
EQUIANGULAR
,
FLAT
,
DUAL_FISHEYE
,
BARREL
,
CUBEMAP_1_6
,
NB_PROJECTIONS
,
};
enum
InterpMethod
{
NEAREST
,
BILINEAR
,
BICUBIC
,
LANCZOS
,
NB_INTERP_METHODS
,
};
enum
Faces
{
TOP_LEFT
,
TOP_MIDDLE
,
TOP_RIGHT
,
BOTTOM_LEFT
,
BOTTOM_MIDDLE
,
BOTTOM_RIGHT
,
NB_FACES
,
};
enum
Direction
{
RIGHT
,
///< Axis +X
LEFT
,
///< Axis -X
UP
,
///< Axis +Y
DOWN
,
///< Axis -Y
FRONT
,
///< Axis -Z
BACK
,
///< Axis +Z
NB_DIRECTIONS
,
};
enum
Rotation
{
ROT_0
,
ROT_90
,
ROT_180
,
ROT_270
,
NB_ROTATIONS
,
};
typedef
struct
V360Context
{
const
AVClass
*
class
;
int
in
,
out
;
int
interp
;
int
width
,
height
;
char
*
in_forder
;
char
*
out_forder
;
char
*
in_frot
;
char
*
out_frot
;
int
in_cubemap_face_order
[
6
];
int
out_cubemap_direction_order
[
6
];
int
in_cubemap_face_rotation
[
6
];
int
out_cubemap_face_rotation
[
6
];
float
in_pad
,
out_pad
;
float
yaw
,
pitch
,
roll
;
int
h_flip
,
v_flip
,
d_flip
;
float
h_fov
,
v_fov
;
float
flat_range
[
3
];
int
planewidth
[
4
],
planeheight
[
4
];
int
inplanewidth
[
4
],
inplaneheight
[
4
];
int
nb_planes
;
uint16_t
*
u
[
4
],
*
v
[
4
];
int16_t
*
ker
[
4
];
int
(
*
remap_slice
)(
AVFilterContext
*
ctx
,
void
*
arg
,
int
jobnr
,
int
nb_jobs
);
}
V360Context
;
#include "v360.h"
typedef
struct
ThreadData
{
AVFrame
*
in
;
...
...
@@ -251,47 +170,22 @@ static int query_formats(AVFilterContext *ctx)
return
ff_set_common_formats
(
ctx
,
fmts_list
);
}
/**
* Generate no-interpolation remapping function with a given pixel depth.
*
* @param bits number of bits per pixel
* @param div number of bytes per pixel
*/
#define DEFINE_REMAP1(bits, div) \
static int remap1_##bits##bit_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \
{ \
ThreadData *td = (ThreadData*)arg; \
const V360Context *s = ctx->priv; \
const AVFrame *in = td->in; \
AVFrame *out = td->out; \
\
int plane, x, y; \
\
for (plane = 0; plane < s->nb_planes; plane++) { \
const int in_linesize = in->linesize[plane] / div; \
const int out_linesize = out->linesize[plane] / div; \
const uint##bits##_t *src = (const uint##bits##_t *)in->data[plane]; \
uint##bits##_t *dst = (uint##bits##_t *)out->data[plane]; \
const int width = s->planewidth[plane]; \
const int height = s->planeheight[plane]; \
\
const int slice_start = (height * jobnr ) / nb_jobs; \
const int slice_end = (height * (jobnr + 1)) / nb_jobs; \
\
for (y = slice_start; y < slice_end; y++) { \
const uint16_t *u = s->u[plane] + y * width; \
const uint16_t *v = s->v[plane] + y * width; \
uint##bits##_t *d = dst + y * out_linesize; \
for (x = 0; x < width; x++) \
*d++ = src[v[x] * in_linesize + u[x]]; \
} \
} \
\
return 0; \
#define DEFINE_REMAP1_LINE(bits, div) \
static void remap1_##bits##bit_line_c(uint8_t *dst, int width, const uint8_t *src, \
ptrdiff_t in_linesize, \
const uint16_t *u, const uint16_t *v, const int16_t *ker) \
{ \
const uint##bits##_t *s = (const uint##bits##_t *)src; \
uint##bits##_t *d = (uint##bits##_t *)dst; \
\
in_linesize /= div; \
\
for (int x = 0; x < width; x++) \
d[x] = s[v[x] * in_linesize + u[x]]; \
}
DEFINE_REMAP1
(
8
,
1
)
DEFINE_REMAP1
(
16
,
2
)
DEFINE_REMAP1
_LINE
(
8
,
1
)
DEFINE_REMAP1
_LINE
(
16
,
2
)
typedef
struct
XYRemap
{
uint16_t
u
[
4
][
4
];
...
...
@@ -304,9 +198,8 @@ typedef struct XYRemap {
*
* @param ws size of interpolation window
* @param bits number of bits per pixel
* @param div number of bytes per pixel
*/
#define DEFINE_REMAP(ws, bits
, div)
\
#define DEFINE_REMAP(ws, bits
)
\
static int remap##ws##_##bits##bit_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \
{ \
ThreadData *td = (ThreadData*)arg; \
...
...
@@ -314,48 +207,85 @@ static int remap##ws##_##bits##bit_slice(AVFilterContext *ctx, void *arg, int jo
const AVFrame *in = td->in; \
AVFrame *out = td->out; \
\
int plane, x, y, i, j; \
\
for (plane = 0; plane < s->nb_planes; plane++) { \
const int in_linesize = in->linesize[plane] / div; \
const int out_linesize = out->linesize[plane] / div; \
const uint##bits##_t *src = (const uint##bits##_t *)in->data[plane]; \
uint##bits##_t *dst = (uint##bits##_t *)out->data[plane]; \
for (int plane = 0; plane < s->nb_planes; plane++) { \
const int in_linesize = in->linesize[plane]; \
const int out_linesize = out->linesize[plane]; \
const uint8_t *src = in->data[plane]; \
uint8_t *dst = out->data[plane]; \
const int width = s->planewidth[plane]; \
const int height = s->planeheight[plane]; \
\
const int slice_start = (height * jobnr ) / nb_jobs; \
const int slice_end = (height * (jobnr + 1)) / nb_jobs; \
\
for (y = slice_start; y < slice_end; y++) { \
uint##bits##_t *d = dst + y * out_linesize; \
for (int y = slice_start; y < slice_end; y++) { \
const uint16_t *u = s->u[plane] + y * width * ws * ws; \
const uint16_t *v = s->v[plane] + y * width * ws * ws; \
const int16_t *ker = s->ker[plane] + y * width * ws * ws; \
for (x = 0; x < width; x++) { \
const uint16_t *uu = u + x * ws * ws; \
const uint16_t *vv = v + x * ws * ws; \
const int16_t *kker = ker + x * ws * ws; \
int tmp = 0; \
\
for (i = 0; i < ws; i++) { \
for (j = 0; j < ws; j++) { \
tmp += kker[i * ws + j] * src[vv[i * ws + j] * in_linesize + uu[i * ws + j]]; \
} \
} \
\
*d++ = av_clip_uint##bits(tmp >> (15 - ws)); \
} \
s->remap_line(dst + y * out_linesize, width, src, in_linesize, u, v, ker); \
} \
} \
\
return 0; \
}
DEFINE_REMAP
(
2
,
8
,
1
)
DEFINE_REMAP
(
4
,
8
,
1
)
DEFINE_REMAP
(
2
,
16
,
2
)
DEFINE_REMAP
(
4
,
16
,
2
)
DEFINE_REMAP
(
1
,
8
)
DEFINE_REMAP
(
2
,
8
)
DEFINE_REMAP
(
4
,
8
)
DEFINE_REMAP
(
1
,
16
)
DEFINE_REMAP
(
2
,
16
)
DEFINE_REMAP
(
4
,
16
)
#define DEFINE_REMAP_LINE(ws, bits, div) \
static void remap##ws##_##bits##bit_line_c(uint8_t *dst, int width, const uint8_t *src, \
ptrdiff_t in_linesize, \
const uint16_t *u, const uint16_t *v, const int16_t *ker) \
{ \
const uint##bits##_t *s = (const uint##bits##_t *)src; \
uint##bits##_t *d = (uint##bits##_t *)dst; \
\
in_linesize /= div; \
\
for (int x = 0; x < width; x++) { \
const uint16_t *uu = u + x * ws * ws; \
const uint16_t *vv = v + x * ws * ws; \
const int16_t *kker = ker + x * ws * ws; \
int tmp = 0; \
\
for (int i = 0; i < ws; i++) { \
for (int j = 0; j < ws; j++) { \
tmp += kker[i * ws + j] * s[vv[i * ws + j] * in_linesize + uu[i * ws + j]]; \
} \
} \
\
d[x] = av_clip_uint##bits(tmp >> 14); \
} \
}
DEFINE_REMAP_LINE
(
2
,
8
,
1
)
DEFINE_REMAP_LINE
(
4
,
8
,
1
)
DEFINE_REMAP_LINE
(
2
,
16
,
2
)
DEFINE_REMAP_LINE
(
4
,
16
,
2
)
void
ff_v360_init
(
V360Context
*
s
,
int
depth
)
{
switch
(
s
->
interp
)
{
case
NEAREST
:
s
->
remap_line
=
depth
<=
8
?
remap1_8bit_line_c
:
remap1_16bit_line_c
;
break
;
case
BILINEAR
:
s
->
remap_line
=
depth
<=
8
?
remap2_8bit_line_c
:
remap2_16bit_line_c
;
break
;
case
BICUBIC
:
case
LANCZOS
:
s
->
remap_line
=
depth
<=
8
?
remap4_8bit_line_c
:
remap4_16bit_line_c
;
break
;
}
if
(
ARCH_X86_64
)
ff_v360_init_x86
(
s
,
depth
);
}
/**
* Save nearest pixel coordinates for remapping.
...
...
@@ -399,10 +329,10 @@ static void bilinear_kernel(float du, float dv, const XYRemap *r_tmp,
}
}
ker
[
0
]
=
(
1
.
f
-
du
)
*
(
1
.
f
-
dv
)
*
8192
;
ker
[
1
]
=
du
*
(
1
.
f
-
dv
)
*
8192
;
ker
[
2
]
=
(
1
.
f
-
du
)
*
dv
*
8192
;
ker
[
3
]
=
du
*
dv
*
8192
;
ker
[
0
]
=
(
1
.
f
-
du
)
*
(
1
.
f
-
dv
)
*
16384
;
ker
[
1
]
=
du
*
(
1
.
f
-
dv
)
*
16384
;
ker
[
2
]
=
(
1
.
f
-
du
)
*
dv
*
16384
;
ker
[
3
]
=
du
*
dv
*
16384
;
}
/**
...
...
@@ -446,7 +376,7 @@ static void bicubic_kernel(float du, float dv, const XYRemap *r_tmp,
for
(
j
=
0
;
j
<
4
;
j
++
)
{
u
[
i
*
4
+
j
]
=
r_tmp
->
u
[
i
][
j
];
v
[
i
*
4
+
j
]
=
r_tmp
->
v
[
i
][
j
];
ker
[
i
*
4
+
j
]
=
du_coeffs
[
j
]
*
dv_coeffs
[
i
]
*
2048
;
ker
[
i
*
4
+
j
]
=
du_coeffs
[
j
]
*
dv_coeffs
[
i
]
*
16384
;
}
}
}
...
...
@@ -501,7 +431,7 @@ static void lanczos_kernel(float du, float dv, const XYRemap *r_tmp,
for
(
j
=
0
;
j
<
4
;
j
++
)
{
u
[
i
*
4
+
j
]
=
r_tmp
->
u
[
i
][
j
];
v
[
i
*
4
+
j
]
=
r_tmp
->
v
[
i
][
j
];
ker
[
i
*
4
+
j
]
=
du_coeffs
[
j
]
*
dv_coeffs
[
i
]
*
2048
;
ker
[
i
*
4
+
j
]
=
du_coeffs
[
j
]
*
dv_coeffs
[
i
]
*
16384
;
}
}
}
...
...
@@ -2038,6 +1968,8 @@ static int config_output(AVFilterLink *outlink)
av_assert0
(
0
);
}
ff_v360_init
(
s
,
depth
);
switch
(
s
->
in
)
{
case
EQUIRECTANGULAR
:
in_transform
=
xyz_to_equirect
;
...
...
libavfilter/x86/Makefile
浏览文件 @
058bbf48
...
...
@@ -31,6 +31,7 @@ OBJS-$(CONFIG_TBLEND_FILTER) += x86/vf_blend_init.o
OBJS-$(CONFIG_THRESHOLD_FILTER)
+=
x86/vf_threshold_init.o
OBJS-$(CONFIG_TINTERLACE_FILTER)
+=
x86/vf_tinterlace_init.o
OBJS-$(CONFIG_VOLUME_FILTER)
+=
x86/af_volume_init.o
OBJS-$(CONFIG_V360_FILTER)
+=
x86/vf_v360_init.o
OBJS-$(CONFIG_W3FDIF_FILTER)
+=
x86/vf_w3fdif_init.o
OBJS-$(CONFIG_YADIF_FILTER)
+=
x86/vf_yadif_init.o
...
...
@@ -66,5 +67,6 @@ X86ASM-OBJS-$(CONFIG_TBLEND_FILTER) += x86/vf_blend.o
X86ASM-OBJS-$(CONFIG_THRESHOLD_FILTER)
+=
x86/vf_threshold.o
X86ASM-OBJS-$(CONFIG_TINTERLACE_FILTER)
+=
x86/vf_interlace.o
X86ASM-OBJS-$(CONFIG_VOLUME_FILTER)
+=
x86/af_volume.o
X86ASM-OBJS-$(CONFIG_V360_FILTER)
+=
x86/vf_v360.o
X86ASM-OBJS-$(CONFIG_W3FDIF_FILTER)
+=
x86/vf_w3fdif.o
X86ASM-OBJS-$(CONFIG_YADIF_FILTER)
+=
x86/vf_yadif.o x86/yadif-16.o x86/yadif-10.o
libavfilter/x86/vf_v360.asm
0 → 100644
浏览文件 @
058bbf48
;*****************************************************************************
;* x86-optimized functions for v360 filter
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%if HAVE_AVX2_EXTERNAL && ARCH_X86_64
%include "libavutil/x86/x86util.asm"
SECTION
_RODATA
pb_mask:
db
0
,
4
,
8
,
12
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
pd_255:
times
4
dd
255
SECTION
.text
; void ff_remap2_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize,
; const uint16_t *u, const uint16_t *v, const int16_t *ker);
INIT_YMM
avx2
cglobal
remap1_8bit_line
,
6
,
7
,
6
,
ds
t
,
width
,
src
,
in_linesize
,
u
,
v
,
x
movsxdifnidn
widthq
,
widthd
xor
xq
,
xq
movd
xm0
,
in_linesized
pcmpeqw
m4
,
m4
VBROADCASTI128
m3
,
[
pb_mask
]
vpbroadcastd
m0
,
xm0
.loop:
pmovsxwd
m1
,
[
vq
+
xq
*
2
]
pmovsxwd
m2
,
[
uq
+
xq
*
2
]
pmulld
m1
,
m0
paddd
m1
,
m2
mova
m2
,
m4
vpgatherdd
m5
,
[
srcq
+
m1
],
m2
pshufb
m1
,
m5
,
m3
vextracti128
xm2
,
m1
,
1
movd
[
ds
tq
+
xq
],
xm1
movd
[
ds
tq
+
xq
+
4
],
xm2
add
xq
,
mmsize
/
4
cmp
xq
,
widthq
jl
.loop
RET
INIT_YMM
avx2
cglobal
remap2_8bit_line
,
7
,
8
,
8
,
ds
t
,
width
,
src
,
in_linesize
,
u
,
v
,
ker
,
x
movsxdifnidn
widthq
,
widthd
xor
xq
,
xq
movd
xm0
,
in_linesized
pcmpeqw
m7
,
m7
vpbroadcastd
m0
,
xm0
vpbroadcastd
m6
,
[
pd_255
]
.loop:
pmovsxwd
m1
,
[
kerq
+
xq
*
8
]
pmovsxwd
m2
,
[
vq
+
xq
*
8
]
pmovsxwd
m3
,
[
uq
+
xq
*
8
]
pmulld
m4
,
m2
,
m0
paddd
m4
,
m3
mova
m3
,
m7
vpgatherdd
m2
,
[
srcq
+
m4
],
m3
pand
m2
,
m6
pmulld
m2
,
m1
phaddd
m2
,
m2
phaddd
m1
,
m2
,
m2
psrld
m1
,
m1
,
0xe
vextracti128
xm2
,
m1
,
1
pextrb
[
ds
tq
+
xq
],
xm1
,
0
pextrb
[
ds
tq
+
xq
+
1
],
xm2
,
0
add
xq
,
mmsize
/
16
cmp
xq
,
widthq
jl
.loop
RET
INIT_YMM
avx2
cglobal
remap4_8bit_line
,
7
,
9
,
11
,
ds
t
,
width
,
src
,
in_linesize
,
u
,
v
,
ker
,
x
,
y
movsxdifnidn
widthq
,
widthd
xor
yq
,
yq
xor
xq
,
xq
movd
xm0
,
in_linesized
pcmpeqw
m7
,
m7
vpbroadcastd
m0
,
xm0
vpbroadcastd
m6
,
[
pd_255
]
.loop:
pmovsxwd
m1
,
[
kerq
+
yq
]
pmovsxwd
m5
,
[
kerq
+
yq
+
16
]
pmovsxwd
m2
,
[
vq
+
yq
]
pmovsxwd
m8
,
[
vq
+
yq
+
16
]
pmovsxwd
m3
,
[
uq
+
yq
]
pmovsxwd
m9
,
[
uq
+
yq
+
16
]
pmulld
m4
,
m2
,
m0
pmulld
m10
,
m8
,
m0
paddd
m4
,
m3
paddd
m10
,
m9
mova
m3
,
m7
vpgatherdd
m2
,
[
srcq
+
m4
],
m3
mova
m3
,
m7
vpgatherdd
m4
,
[
srcq
+
m10
],
m3
pand
m2
,
m6
pand
m4
,
m6
pmulld
m2
,
m1
pmulld
m4
,
m5
paddd
m2
,
m4
vextracti128
xm1
,
m2
,
1
paddd
m1
,
m2
phaddd
m1
,
m1
phaddd
m1
,
m1
psrld
m1
,
m1
,
0xe
packuswb
m1
,
m1
pextrb
[
ds
tq
+
xq
],
xm1
,
0
add
xq
,
1
add
yq
,
32
cmp
xq
,
widthq
jl
.loop
RET
%endif
libavfilter/x86/vf_v360_init.c
0 → 100644
浏览文件 @
058bbf48
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavfilter/v360.h"
void
ff_remap1_8bit_line_avx2
(
uint8_t
*
dst
,
int
width
,
const
uint8_t
*
src
,
ptrdiff_t
in_linesize
,
const
uint16_t
*
u
,
const
uint16_t
*
v
,
const
int16_t
*
ker
);
void
ff_remap2_8bit_line_avx2
(
uint8_t
*
dst
,
int
width
,
const
uint8_t
*
src
,
ptrdiff_t
in_linesize
,
const
uint16_t
*
u
,
const
uint16_t
*
v
,
const
int16_t
*
ker
);
void
ff_remap4_8bit_line_avx2
(
uint8_t
*
dst
,
int
width
,
const
uint8_t
*
src
,
ptrdiff_t
in_linesize
,
const
uint16_t
*
u
,
const
uint16_t
*
v
,
const
int16_t
*
ker
);
av_cold
void
ff_v360_init_x86
(
V360Context
*
s
,
int
depth
)
{
#if ARCH_X86_64
int
cpu_flags
=
av_get_cpu_flags
();
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
)
&&
s
->
interp
==
NEAREST
&&
depth
<=
8
)
s
->
remap_line
=
ff_remap1_8bit_line_avx2
;
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
)
&&
s
->
interp
==
BILINEAR
&&
depth
<=
8
)
s
->
remap_line
=
ff_remap2_8bit_line_avx2
;
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
)
&&
(
s
->
interp
==
BICUBIC
||
s
->
interp
==
LANCZOS
)
&&
depth
<=
8
)
s
->
remap_line
=
ff_remap4_8bit_line_avx2
;
#endif
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录