Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
9ae8443d
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
9ae8443d
编写于
10月 07, 2011
作者:
A
Andrey Kamaev
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Tegra optimized FastAtan2
上级
78bd2133
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
45 addition
and
40 deletion
+45
-40
modules/core/src/mathfuncs.cpp
modules/core/src/mathfuncs.cpp
+45
-40
未找到文件。
modules/core/src/mathfuncs.cpp
浏览文件 @
9ae8443d
...
...
@@ -63,60 +63,65 @@ float fastAtan2( float y, float x )
static
void
FastAtan2_32f
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
=
true
)
{
int
i
=
0
;
float
scale
=
angleInDegrees
?
(
float
)(
180
/
CV_PI
)
:
1.
f
;
int
i
=
0
;
float
scale
=
angleInDegrees
?
(
float
)(
180
/
CV_PI
)
:
1.
f
;
#ifdef HAVE_TEGRA_OPTIMIZATION
if
(
tegra
::
FastAtan2_32f
(
Y
,
X
,
angle
,
len
,
scale
))
return
;
#endif
#if CV_SSE2
if
(
USE_SSE2
)
{
Cv32suf
iabsmask
;
iabsmask
.
i
=
0x7fffffff
;
__m128
eps
=
_mm_set1_ps
((
float
)
DBL_EPSILON
),
absmask
=
_mm_set1_ps
(
iabsmask
.
f
);
__m128
_90
=
_mm_set1_ps
((
float
)(
CV_PI
*
0.5
)),
_180
=
_mm_set1_ps
((
float
)
CV_PI
),
_360
=
_mm_set1_ps
((
float
)(
CV_PI
*
2
));
__m128
zero
=
_mm_setzero_ps
(),
scale4
=
_mm_set1_ps
(
scale
);
__m128
p0
=
_mm_set1_ps
(
0.43157974
f
),
q0
=
_mm_set1_ps
(
0.76443945
f
),
q1
=
_mm_set1_ps
(
0.05831938
f
);
for
(
;
i
<=
len
-
4
;
i
+=
4
)
if
(
USE_SSE2
)
{
__m128
x4
=
_mm_loadu_ps
(
X
+
i
),
y4
=
_mm_loadu_ps
(
Y
+
i
);
__m128
xq4
=
_mm_mul_ps
(
x4
,
x4
),
yq4
=
_mm_mul_ps
(
y4
,
y4
);
__m128
xly
=
_mm_cmplt_ps
(
xq4
,
yq4
);
__m128
t
=
_mm_min_ps
(
xq4
,
yq4
);
xq4
=
_mm_max_ps
(
xq4
,
yq4
);
yq4
=
t
;
__m128
z4
=
_mm_div_ps
(
_mm_mul_ps
(
_mm_mul_ps
(
x4
,
y4
),
_mm_add_ps
(
xq4
,
_mm_mul_ps
(
yq4
,
p0
))),
_mm_add_ps
(
eps
,
_mm_add_ps
(
_mm_mul_ps
(
xq4
,
xq4
),
_mm_mul_ps
(
yq4
,
_mm_add_ps
(
_mm_mul_ps
(
xq4
,
q0
),
_mm_mul_ps
(
yq4
,
q1
))))));
// a4 <- x < y ? 90 : 0;
__m128
a4
=
_mm_and_ps
(
xly
,
_90
);
// a4 <- (y < 0 ? 360 - a4 : a4) == ((x < y ? y < 0 ? 270 : 90) : (y < 0 ? 360 : 0))
__m128
mask
=
_mm_cmplt_ps
(
y4
,
zero
);
a4
=
_mm_or_ps
(
_mm_and_ps
(
_mm_sub_ps
(
_360
,
a4
),
mask
),
_mm_andnot_ps
(
mask
,
a4
));
// a4 <- (x < 0 && !(x < y) ? 180 : a4)
mask
=
_mm_andnot_ps
(
xly
,
_mm_cmplt_ps
(
x4
,
zero
));
a4
=
_mm_or_ps
(
_mm_and_ps
(
_180
,
mask
),
_mm_andnot_ps
(
mask
,
a4
));
// a4 <- (x < y ? a4 - z4 : a4 + z4)
a4
=
_mm_mul_ps
(
_mm_add_ps
(
_mm_xor_ps
(
z4
,
_mm_andnot_ps
(
absmask
,
xly
)),
a4
),
scale4
);
_mm_storeu_ps
(
angle
+
i
,
a4
);
Cv32suf
iabsmask
;
iabsmask
.
i
=
0x7fffffff
;
__m128
eps
=
_mm_set1_ps
((
float
)
DBL_EPSILON
),
absmask
=
_mm_set1_ps
(
iabsmask
.
f
);
__m128
_90
=
_mm_set1_ps
((
float
)(
CV_PI
*
0.5
)),
_180
=
_mm_set1_ps
((
float
)
CV_PI
),
_360
=
_mm_set1_ps
((
float
)(
CV_PI
*
2
));
__m128
zero
=
_mm_setzero_ps
(),
scale4
=
_mm_set1_ps
(
scale
);
__m128
p0
=
_mm_set1_ps
(
0.43157974
f
),
q0
=
_mm_set1_ps
(
0.76443945
f
),
q1
=
_mm_set1_ps
(
0.05831938
f
);
for
(
;
i
<=
len
-
4
;
i
+=
4
)
{
__m128
x4
=
_mm_loadu_ps
(
X
+
i
),
y4
=
_mm_loadu_ps
(
Y
+
i
);
__m128
xq4
=
_mm_mul_ps
(
x4
,
x4
),
yq4
=
_mm_mul_ps
(
y4
,
y4
);
__m128
xly
=
_mm_cmplt_ps
(
xq4
,
yq4
);
__m128
t
=
_mm_min_ps
(
xq4
,
yq4
);
xq4
=
_mm_max_ps
(
xq4
,
yq4
);
yq4
=
t
;
__m128
z4
=
_mm_div_ps
(
_mm_mul_ps
(
_mm_mul_ps
(
x4
,
y4
),
_mm_add_ps
(
xq4
,
_mm_mul_ps
(
yq4
,
p0
))),
_mm_add_ps
(
eps
,
_mm_add_ps
(
_mm_mul_ps
(
xq4
,
xq4
),
_mm_mul_ps
(
yq4
,
_mm_add_ps
(
_mm_mul_ps
(
xq4
,
q0
),
_mm_mul_ps
(
yq4
,
q1
))))));
// a4 <- x < y ? 90 : 0;
__m128
a4
=
_mm_and_ps
(
xly
,
_90
);
// a4 <- (y < 0 ? 360 - a4 : a4) == ((x < y ? y < 0 ? 270 : 90) : (y < 0 ? 360 : 0))
__m128
mask
=
_mm_cmplt_ps
(
y4
,
zero
);
a4
=
_mm_or_ps
(
_mm_and_ps
(
_mm_sub_ps
(
_360
,
a4
),
mask
),
_mm_andnot_ps
(
mask
,
a4
));
// a4 <- (x < 0 && !(x < y) ? 180 : a4)
mask
=
_mm_andnot_ps
(
xly
,
_mm_cmplt_ps
(
x4
,
zero
));
a4
=
_mm_or_ps
(
_mm_and_ps
(
_180
,
mask
),
_mm_andnot_ps
(
mask
,
a4
));
// a4 <- (x < y ? a4 - z4 : a4 + z4)
a4
=
_mm_mul_ps
(
_mm_add_ps
(
_mm_xor_ps
(
z4
,
_mm_andnot_ps
(
absmask
,
xly
)),
a4
),
scale4
);
_mm_storeu_ps
(
angle
+
i
,
a4
);
}
}
}
#endif
for
(
;
i
<
len
;
i
++
)
{
{
double
x
=
X
[
i
],
y
=
Y
[
i
],
x2
=
x
*
x
,
y2
=
y
*
y
,
a
;
if
(
y2
<=
x2
)
a
=
(
x
<
0
?
CV_PI
:
y
>=
0
?
0
:
CV_PI
*
2
)
+
x
*
y
*
(
x2
+
0.43157974
*
y2
)
/
(
x2
*
x2
+
y2
*
(
0.76443945
*
x2
+
0.05831938
*
y2
)
+
(
float
)
DBL_EPSILON
);
x
*
y
*
(
x2
+
0.43157974
*
y2
)
/
(
x2
*
x2
+
y2
*
(
0.76443945
*
x2
+
0.05831938
*
y2
)
+
(
float
)
DBL_EPSILON
);
else
{
a
=
(
y
>=
0
?
CV_PI
*
0.5
:
CV_PI
*
1.5
)
-
x
*
y
*
(
y2
+
0.43157974
*
x2
)
/
(
y2
*
y2
+
x2
*
(
0.76443945
*
y2
+
0.05831938
*
x2
)
+
(
float
)
DBL_EPSILON
);
x
*
y
*
(
y2
+
0.43157974
*
x2
)
/
(
y2
*
y2
+
x2
*
(
0.76443945
*
y2
+
0.05831938
*
x2
)
+
(
float
)
DBL_EPSILON
);
}
angle
[
i
]
=
(
float
)(
a
*
scale
);
}
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录