Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
5ca25ab8
O
Opencv
项目概览
Greenplum
/
Opencv
10 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
5ca25ab8
编写于
10月 12, 2014
作者:
I
Ilya Lavrenov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
cv::pow (integer power)
上级
ccdc7128
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
246 addition
and
6 deletion
+246
-6
modules/core/include/opencv2/core/base.hpp
modules/core/include/opencv2/core/base.hpp
+14
-4
modules/core/src/mathfuncs.cpp
modules/core/src/mathfuncs.cpp
+232
-2
未找到文件。
modules/core/include/opencv2/core/base.hpp
浏览文件 @
5ca25ab8
...
...
@@ -621,20 +621,30 @@ inline float32x2_t cv_vrecp_f32(float32x2_t val)
return
reciprocal
;
}
inline
float32x4_t
cv_vsqrtq_f32
(
float32x4_t
val
)
inline
float32x4_t
cv_v
r
sqrtq_f32
(
float32x4_t
val
)
{
float32x4_t
e
=
vrsqrteq_f32
(
val
);
e
=
vmulq_f32
(
vrsqrtsq_f32
(
vmulq_f32
(
e
,
e
),
val
),
e
);
e
=
vmulq_f32
(
vrsqrtsq_f32
(
vmulq_f32
(
e
,
e
),
val
),
e
);
return
cv_vrecpq_f32
(
e
)
;
return
e
;
}
inline
float32x2_t
cv_vsqrt_f32
(
float32x2_t
val
)
inline
float32x2_t
cv_v
r
sqrt_f32
(
float32x2_t
val
)
{
float32x2_t
e
=
vrsqrte_f32
(
val
);
e
=
vmul_f32
(
vrsqrts_f32
(
vmul_f32
(
e
,
e
),
val
),
e
);
e
=
vmul_f32
(
vrsqrts_f32
(
vmul_f32
(
e
,
e
),
val
),
e
);
return
cv_vrecp_f32
(
e
);
return
e
;
}
inline
float32x4_t
cv_vsqrtq_f32
(
float32x4_t
val
)
{
return
cv_vrecpq_f32
(
cv_vrsqrtq_f32
(
val
));
}
inline
float32x2_t
cv_vsqrt_f32
(
float32x2_t
val
)
{
return
cv_vrecp_f32
(
cv_vrsqrt_f32
(
val
));
}
#endif
...
...
modules/core/src/mathfuncs.cpp
浏览文件 @
5ca25ab8
...
...
@@ -393,6 +393,12 @@ static void InvSqrt_32f(const float* src, float* dst, int len)
_mm_storeu_ps
(
dst
+
i
,
t0
);
_mm_storeu_ps
(
dst
+
i
+
4
,
t1
);
}
}
#elif CV_NEON
for
(
;
i
<=
len
-
8
;
i
+=
8
)
{
vst1q_f32
(
dst
+
i
,
cv_vrsqrtq_f32
(
vld1q_f32
(
src
+
i
)));
vst1q_f32
(
dst
+
i
+
4
,
cv_vrsqrtq_f32
(
vld1q_f32
(
src
+
i
+
4
)));
}
#endif
for
(
;
i
<
len
;
i
++
)
...
...
@@ -451,6 +457,12 @@ static void Sqrt_32f(const float* src, float* dst, int len)
_mm_storeu_ps
(
dst
+
i
,
t0
);
_mm_storeu_ps
(
dst
+
i
+
4
,
t1
);
}
}
#elif CV_NEON
for
(
;
i
<=
len
-
8
;
i
+=
8
)
{
vst1q_f32
(
dst
+
i
,
cv_vsqrtq_f32
(
vld1q_f32
(
src
+
i
)));
vst1q_f32
(
dst
+
i
+
4
,
cv_vsqrtq_f32
(
vld1q_f32
(
src
+
i
+
4
)));
}
#endif
for
(
;
i
<
len
;
i
++
)
...
...
@@ -2157,12 +2169,230 @@ void log( InputArray _src, OutputArray _dst )
* P O W E R *
\****************************************************************************************/
template
<
typename
T
,
typename
WT
>
struct
iPow_SIMD
{
int
operator
()
(
const
T
*
,
T
*
,
int
,
int
)
{
return
0
;
}
};
#if CV_NEON
template
<
>
struct
iPow_SIMD
<
uchar
,
int
>
{
int
operator
()
(
const
uchar
*
src
,
uchar
*
dst
,
int
len
,
int
power
)
{
int
i
=
0
;
uint32x4_t
v_1
=
vdupq_n_u32
(
1u
);
for
(
;
i
<=
len
-
8
;
i
+=
8
)
{
uint32x4_t
v_a1
=
v_1
,
v_a2
=
v_1
;
uint16x8_t
v_src
=
vmovl_u8
(
vld1_u8
(
src
+
i
));
uint32x4_t
v_b1
=
vmovl_u16
(
vget_low_u16
(
v_src
)),
v_b2
=
vmovl_u16
(
vget_high_u16
(
v_src
));
int
p
=
power
;
while
(
p
>
1
)
{
if
(
p
&
1
)
{
v_a1
=
vmulq_u32
(
v_a1
,
v_b1
);
v_a2
=
vmulq_u32
(
v_a2
,
v_b2
);
}
v_b1
=
vmulq_u32
(
v_b1
,
v_b1
);
v_b2
=
vmulq_u32
(
v_b2
,
v_b2
);
p
>>=
1
;
}
v_a1
=
vmulq_u32
(
v_a1
,
v_b1
);
v_a2
=
vmulq_u32
(
v_a2
,
v_b2
);
vst1_u8
(
dst
+
i
,
vqmovn_u16
(
vcombine_u16
(
vqmovn_u32
(
v_a1
),
vqmovn_u32
(
v_a2
))));
}
return
i
;
}
};
template
<
>
struct
iPow_SIMD
<
schar
,
int
>
{
int
operator
()
(
const
schar
*
src
,
schar
*
dst
,
int
len
,
int
power
)
{
int
i
=
0
;
int32x4_t
v_1
=
vdupq_n_s32
(
1
);
for
(
;
i
<=
len
-
8
;
i
+=
8
)
{
int32x4_t
v_a1
=
v_1
,
v_a2
=
v_1
;
int16x8_t
v_src
=
vmovl_s8
(
vld1_s8
(
src
+
i
));
int32x4_t
v_b1
=
vmovl_s16
(
vget_low_s16
(
v_src
)),
v_b2
=
vmovl_s16
(
vget_high_s16
(
v_src
));
int
p
=
power
;
while
(
p
>
1
)
{
if
(
p
&
1
)
{
v_a1
=
vmulq_s32
(
v_a1
,
v_b1
);
v_a2
=
vmulq_s32
(
v_a2
,
v_b2
);
}
v_b1
=
vmulq_s32
(
v_b1
,
v_b1
);
v_b2
=
vmulq_s32
(
v_b2
,
v_b2
);
p
>>=
1
;
}
v_a1
=
vmulq_s32
(
v_a1
,
v_b1
);
v_a2
=
vmulq_s32
(
v_a2
,
v_b2
);
vst1_s8
(
dst
+
i
,
vqmovn_s16
(
vcombine_s16
(
vqmovn_s32
(
v_a1
),
vqmovn_s32
(
v_a2
))));
}
return
i
;
}
};
template
<
>
struct
iPow_SIMD
<
ushort
,
int
>
{
int
operator
()
(
const
ushort
*
src
,
ushort
*
dst
,
int
len
,
int
power
)
{
int
i
=
0
;
uint32x4_t
v_1
=
vdupq_n_u32
(
1u
);
for
(
;
i
<=
len
-
8
;
i
+=
8
)
{
uint32x4_t
v_a1
=
v_1
,
v_a2
=
v_1
;
uint16x8_t
v_src
=
vld1q_u16
(
src
+
i
);
uint32x4_t
v_b1
=
vmovl_u16
(
vget_low_u16
(
v_src
)),
v_b2
=
vmovl_u16
(
vget_high_u16
(
v_src
));
int
p
=
power
;
while
(
p
>
1
)
{
if
(
p
&
1
)
{
v_a1
=
vmulq_u32
(
v_a1
,
v_b1
);
v_a2
=
vmulq_u32
(
v_a2
,
v_b2
);
}
v_b1
=
vmulq_u32
(
v_b1
,
v_b1
);
v_b2
=
vmulq_u32
(
v_b2
,
v_b2
);
p
>>=
1
;
}
v_a1
=
vmulq_u32
(
v_a1
,
v_b1
);
v_a2
=
vmulq_u32
(
v_a2
,
v_b2
);
vst1q_u16
(
dst
+
i
,
vcombine_u16
(
vqmovn_u32
(
v_a1
),
vqmovn_u32
(
v_a2
)));
}
return
i
;
}
};
template
<
>
struct
iPow_SIMD
<
short
,
int
>
{
int
operator
()
(
const
short
*
src
,
short
*
dst
,
int
len
,
int
power
)
{
int
i
=
0
;
int32x4_t
v_1
=
vdupq_n_s32
(
1
);
for
(
;
i
<=
len
-
8
;
i
+=
8
)
{
int32x4_t
v_a1
=
v_1
,
v_a2
=
v_1
;
int16x8_t
v_src
=
vld1q_s16
(
src
+
i
);
int32x4_t
v_b1
=
vmovl_s16
(
vget_low_s16
(
v_src
)),
v_b2
=
vmovl_s16
(
vget_high_s16
(
v_src
));
int
p
=
power
;
while
(
p
>
1
)
{
if
(
p
&
1
)
{
v_a1
=
vmulq_s32
(
v_a1
,
v_b1
);
v_a2
=
vmulq_s32
(
v_a2
,
v_b2
);
}
v_b1
=
vmulq_s32
(
v_b1
,
v_b1
);
v_b2
=
vmulq_s32
(
v_b2
,
v_b2
);
p
>>=
1
;
}
v_a1
=
vmulq_s32
(
v_a1
,
v_b1
);
v_a2
=
vmulq_s32
(
v_a2
,
v_b2
);
vst1q_s16
(
dst
+
i
,
vcombine_s16
(
vqmovn_s32
(
v_a1
),
vqmovn_s32
(
v_a2
)));
}
return
i
;
}
};
template
<
>
struct
iPow_SIMD
<
int
,
int
>
{
int
operator
()
(
const
int
*
src
,
int
*
dst
,
int
len
,
int
power
)
{
int
i
=
0
;
int32x4_t
v_1
=
vdupq_n_s32
(
1
);
for
(
;
i
<=
len
-
4
;
i
+=
4
)
{
int32x4_t
v_b
=
vld1q_s32
(
src
+
i
),
v_a
=
v_1
;
int
p
=
power
;
while
(
p
>
1
)
{
if
(
p
&
1
)
v_a
=
vmulq_s32
(
v_a
,
v_b
);
v_b
=
vmulq_s32
(
v_b
,
v_b
);
p
>>=
1
;
}
v_a
=
vmulq_s32
(
v_a
,
v_b
);
vst1q_s32
(
dst
+
i
,
v_a
);
}
return
i
;
}
};
template
<
>
struct
iPow_SIMD
<
float
,
float
>
{
int
operator
()
(
const
float
*
src
,
float
*
dst
,
int
len
,
int
power
)
{
int
i
=
0
;
float32x4_t
v_1
=
vdupq_n_f32
(
1.0
f
);
for
(
;
i
<=
len
-
4
;
i
+=
4
)
{
float32x4_t
v_b
=
vld1q_f32
(
src
+
i
),
v_a
=
v_1
;
int
p
=
power
;
while
(
p
>
1
)
{
if
(
p
&
1
)
v_a
=
vmulq_f32
(
v_a
,
v_b
);
v_b
=
vmulq_f32
(
v_b
,
v_b
);
p
>>=
1
;
}
v_a
=
vmulq_f32
(
v_a
,
v_b
);
vst1q_f32
(
dst
+
i
,
v_a
);
}
return
i
;
}
};
#endif
template
<
typename
T
,
typename
WT
>
static
void
iPow_
(
const
T
*
src
,
T
*
dst
,
int
len
,
int
power
)
{
int
i
;
for
(
i
=
0
;
i
<
len
;
i
++
)
iPow_SIMD
<
T
,
WT
>
vop
;
int
i
=
vop
(
src
,
dst
,
len
,
power
);
for
(
;
i
<
len
;
i
++
)
{
WT
a
=
1
,
b
=
src
[
i
];
int
p
=
power
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录