Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
f4135968
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
f4135968
编写于
3月 20, 2019
作者:
S
Sayed Adel
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
core:vsx Add support for VSX3 half precision conversions
上级
6c862fae
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
82 addition
and
15 deletion
+82
-15
cmake/OpenCVCompilerOptions.cmake
cmake/OpenCVCompilerOptions.cmake
+8
-1
cmake/checks/cpu_vsx_asm.cpp
cmake/checks/cpu_vsx_asm.cpp
+21
-0
modules/core/CMakeLists.txt
modules/core/CMakeLists.txt
+1
-1
modules/core/include/opencv2/core/hal/intrin_vsx.hpp
modules/core/include/opencv2/core/hal/intrin_vsx.hpp
+50
-13
modules/core/include/opencv2/core/vsx_utils.hpp
modules/core/include/opencv2/core/vsx_utils.hpp
+2
-0
未找到文件。
cmake/OpenCVCompilerOptions.cmake
浏览文件 @
f4135968
...
...
@@ -294,11 +294,18 @@ endif()
# workaround gcc bug for aligned ld/st
# https://github.com/opencv/opencv/issues/13211
if
((
PPC64LE AND NOT CMAKE_CROSSCOMPILING
)
OR OPENCV_FORCE_COMPILER_CHECK_VSX_ALIGNED
)
ocv_check_runtime_flag
(
"
${
CPU_BASELINE_FLAGS
}
"
"OPENCV_CHECK_VSX_ALIGNED"
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/runtime/cpu_vsx_aligned.cpp"
)
ocv_check_runtime_flag
(
"
${
CPU_BASELINE_FLAGS
}
"
OPENCV_CHECK_VSX_ALIGNED
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/runtime/cpu_vsx_aligned.cpp"
)
if
(
NOT OPENCV_CHECK_VSX_ALIGNED
)
add_extra_compiler_option_force
(
-DCV_COMPILER_VSX_BROKEN_ALIGNED
)
endif
()
endif
()
# validate inline asm with fixes register number and constraints wa, wd, wf
if
(
PPC64LE
)
ocv_check_compiler_flag
(
CXX
"
${
CPU_BASELINE_FLAGS
}
"
OPENCV_CHECK_VSX_ASM
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/cpu_vsx_asm.cpp"
)
if
(
NOT OPENCV_CHECK_VSX_ASM
)
add_extra_compiler_option_force
(
-DCV_COMPILER_VSX_BROKEN_ASM
)
endif
()
endif
()
# combine all "extra" options
if
(
NOT OPENCV_SKIP_EXTRA_COMPILER_FLAGS
)
...
...
cmake/checks/cpu_vsx_asm.cpp
0 → 100644
浏览文件 @
f4135968
#if defined(__VSX__)
#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
#include <altivec.h>
#else
#error "OpenCV only supports little-endian mode"
#endif
#else
#error "VSX is not supported"
#endif
/*
* xlc and wide versions of clang don't support %x<n> in the inline asm template which fixes register number
* when using any of the register constraints wa, wd, wf
*/
int
main
()
{
__vector
float
vf
;
__vector
signed
int
vi
;
__asm__
__volatile__
(
"xvcvsxwsp %x0,%x1"
:
"=wf"
(
vf
)
:
"wa"
(
vi
));
return
0
;
}
\ No newline at end of file
modules/core/CMakeLists.txt
浏览文件 @
f4135968
...
...
@@ -3,7 +3,7 @@ set(the_description "The Core Functionality")
ocv_add_dispatched_file
(
mathfuncs_core SSE2 AVX AVX2
)
ocv_add_dispatched_file
(
stat SSE4_2 AVX2
)
ocv_add_dispatched_file
(
arithm SSE2 SSE4_1 AVX2 VSX3
)
ocv_add_dispatched_file
(
convert SSE2 AVX2
)
ocv_add_dispatched_file
(
convert SSE2 AVX2
VSX3
)
ocv_add_dispatched_file
(
convert_scale SSE2 AVX2
)
ocv_add_dispatched_file
(
count_non_zero SSE2 AVX2
)
ocv_add_dispatched_file
(
matmul SSE2 AVX2
)
...
...
modules/core/include/opencv2/core/hal/intrin_vsx.hpp
浏览文件 @
f4135968
...
...
@@ -11,11 +11,6 @@
#define CV_SIMD128 1
#define CV_SIMD128_64F 1
/**
* todo: supporting half precision for power9
* convert instractions xvcvhpsp, xvcvsphp
**/
namespace
cv
{
...
...
@@ -1203,20 +1198,62 @@ inline v_float32x4 v_pack_triplets(const v_float32x4& vec)
/////// FP16 support ////////
// [TODO] implement these 2 using VSX or universal intrinsics (copy from intrin_sse.cpp and adopt)
inline
v_float32x4
v_load_expand
(
const
float16_t
*
ptr
)
{
return
v_float32x4
((
float
)
ptr
[
0
],
(
float
)
ptr
[
1
],
(
float
)
ptr
[
2
],
(
float
)
ptr
[
3
]);
vec_ushort8
vf16
=
vec_ld_l8
((
const
ushort
*
)
ptr
);
#if CV_VSX3 && defined(vec_extract_fp_from_shorth)
return
v_float32x4
(
vec_extract_fp_from_shorth
(
vf16
));
#elif CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)
vec_float4
vf32
;
__asm__
__volatile__
(
"xvcvhpsp %x0,%x1"
:
"=wf"
(
vf32
)
:
"wa"
(
vec_mergeh
(
vf16
,
vf16
)));
return
v_float32x4
(
vf32
);
#else
const
vec_int4
z
=
vec_int4_z
,
delta
=
vec_int4_sp
(
0x38000000
);
const
vec_int4
signmask
=
vec_int4_sp
(
0x80000000
);
const
vec_int4
maxexp
=
vec_int4_sp
(
0x7c000000
);
const
vec_float4
deltaf
=
vec_float4_c
(
vec_int4_sp
(
0x38800000
));
vec_int4
bits
=
vec_int4_c
(
vec_mergeh
(
vec_short8_c
(
z
),
vec_short8_c
(
vf16
)));
vec_int4
e
=
vec_and
(
bits
,
maxexp
),
sign
=
vec_and
(
bits
,
signmask
);
vec_int4
t
=
vec_add
(
vec_sr
(
vec_xor
(
bits
,
sign
),
vec_uint4_sp
(
3
)),
delta
);
// ((h & 0x7fff) << 13) + delta
vec_int4
zt
=
vec_int4_c
(
vec_sub
(
vec_float4_c
(
vec_add
(
t
,
vec_int4_sp
(
1
<<
23
))),
deltaf
));
t
=
vec_add
(
t
,
vec_and
(
delta
,
vec_cmpeq
(
maxexp
,
e
)));
vec_bint4
zmask
=
vec_cmpeq
(
e
,
z
);
vec_int4
ft
=
vec_sel
(
t
,
zt
,
zmask
);
return
v_float32x4
(
vec_float4_c
(
vec_or
(
ft
,
sign
)));
#endif
}
inline
void
v_pack_store
(
float16_t
*
ptr
,
const
v_float32x4
&
v
)
{
float
CV_DECL_ALIGNED
(
32
)
f
[
4
];
v_store_aligned
(
f
,
v
);
ptr
[
0
]
=
float16_t
(
f
[
0
]);
ptr
[
1
]
=
float16_t
(
f
[
1
]);
ptr
[
2
]
=
float16_t
(
f
[
2
]);
ptr
[
3
]
=
float16_t
(
f
[
3
]);
// fixme: Is there any buitin op or intrinsic that cover "xvcvsphp"?
#if CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)
vec_ushort8
vf16
;
__asm__
__volatile__
(
"xvcvsphp %x0,%x1"
:
"=wa"
(
vf16
)
:
"wf"
(
v
.
val
));
vec_st_l8
(
vec_mergesqe
(
vf16
,
vf16
),
ptr
);
#else
const
vec_int4
signmask
=
vec_int4_sp
(
0x80000000
);
const
vec_int4
rval
=
vec_int4_sp
(
0x3f000000
);
vec_int4
t
=
vec_int4_c
(
v
.
val
);
vec_int4
sign
=
vec_sra
(
vec_and
(
t
,
signmask
),
vec_uint4_sp
(
16
));
t
=
vec_and
(
vec_nor
(
signmask
,
signmask
),
t
);
vec_bint4
finitemask
=
vec_cmpgt
(
vec_int4_sp
(
0x47800000
),
t
);
vec_bint4
isnan
=
vec_cmpgt
(
t
,
vec_int4_sp
(
0x7f800000
));
vec_int4
naninf
=
vec_sel
(
vec_int4_sp
(
0x7c00
),
vec_int4_sp
(
0x7e00
),
isnan
);
vec_bint4
tinymask
=
vec_cmpgt
(
vec_int4_sp
(
0x38800000
),
t
);
vec_int4
tt
=
vec_int4_c
(
vec_add
(
vec_float4_c
(
t
),
vec_float4_c
(
rval
)));
tt
=
vec_sub
(
tt
,
rval
);
vec_int4
odd
=
vec_and
(
vec_sr
(
t
,
vec_uint4_sp
(
13
)),
vec_int4_sp
(
1
));
vec_int4
nt
=
vec_add
(
t
,
vec_int4_sp
(
0xc8000fff
));
nt
=
vec_sr
(
vec_add
(
nt
,
odd
),
vec_uint4_sp
(
13
));
t
=
vec_sel
(
nt
,
tt
,
tinymask
);
t
=
vec_sel
(
naninf
,
t
,
finitemask
);
t
=
vec_or
(
t
,
sign
);
vec_st_l8
(
vec_packs
(
t
,
t
),
ptr
);
#endif
}
inline
void
v_cleanup
()
{}
...
...
modules/core/include/opencv2/core/vsx_utils.hpp
浏览文件 @
f4135968
...
...
@@ -291,6 +291,8 @@ VSX_IMPL_1RG(vec_udword2, wi, vec_float4, wf, xvcvspuxds, vec_ctulo)
*
* So we're not able to use inline asm and only use built-in functions that CLANG supports
* and use __builtin_convertvector if clang missng any of vector conversions built-in functions
*
* todo: clang asm template bug is fixed, need to reconsider the current workarounds.
*/
// convert vector helper
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录