Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
116fb275
O
Opencv
项目概览
Greenplum
/
Opencv
大约 1 年 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
116fb275
编写于
1月 12, 2015
作者:
I
Ilya Lavrenov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
convertTo from 8s
上级
19e77e47
变更
1
显示空白变更内容
内联
并排
Showing
1 changed file
with
234 addition
and
0 deletion
+234
-0
modules/core/src/convert.cpp
modules/core/src/convert.cpp
+234
-0
未找到文件。
modules/core/src/convert.cpp
浏览文件 @
116fb275
...
...
@@ -1571,6 +1571,8 @@ struct cvtScale_SIMD
#if CV_SSE2
// from uchar
template
<
>
struct
cvtScale_SIMD
<
uchar
,
uchar
,
float
>
{
...
...
@@ -1801,6 +1803,238 @@ struct cvtScale_SIMD<uchar, double, float>
}
};
// from schar
template
<
>
struct
cvtScale_SIMD
<
schar
,
uchar
,
float
>
{
int
operator
()
(
const
schar
*
src
,
uchar
*
dst
,
int
width
,
float
scale
,
float
shift
)
const
{
int
x
=
0
;
if
(
!
USE_SSE2
)
return
x
;
__m128i
v_zero
=
_mm_setzero_si128
();
__m128
v_scale
=
_mm_set1_ps
(
scale
),
v_shift
=
_mm_set1_ps
(
shift
);
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
__m128i
v_src
=
_mm_srai_epi16
(
_mm_unpacklo_epi8
(
v_zero
,
_mm_loadl_epi64
((
__m128i
const
*
)(
src
+
x
))),
8
);
__m128
v_src_f
=
_mm_cvtepi32_ps
(
_mm_srai_epi32
(
_mm_unpacklo_epi16
(
v_zero
,
v_src
),
16
));
__m128
v_dst_0
=
_mm_add_ps
(
_mm_mul_ps
(
v_src_f
,
v_scale
),
v_shift
);
v_src_f
=
_mm_cvtepi32_ps
(
_mm_srai_epi32
(
_mm_unpackhi_epi16
(
v_zero
,
v_src
),
16
));
__m128
v_dst_1
=
_mm_add_ps
(
_mm_mul_ps
(
v_src_f
,
v_scale
),
v_shift
);
__m128i
v_dst
=
_mm_packs_epi32
(
_mm_cvtps_epi32
(
v_dst_0
),
_mm_cvtps_epi32
(
v_dst_1
));
_mm_storel_epi64
((
__m128i
*
)(
dst
+
x
),
_mm_packus_epi16
(
v_dst
,
v_zero
));
}
return
x
;
}
};
template
<
>
struct
cvtScale_SIMD
<
schar
,
schar
,
float
>
{
int
operator
()
(
const
schar
*
src
,
schar
*
dst
,
int
width
,
float
scale
,
float
shift
)
const
{
int
x
=
0
;
if
(
!
USE_SSE2
)
return
x
;
__m128i
v_zero
=
_mm_setzero_si128
();
__m128
v_scale
=
_mm_set1_ps
(
scale
),
v_shift
=
_mm_set1_ps
(
shift
);
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
__m128i
v_src
=
_mm_srai_epi16
(
_mm_unpacklo_epi8
(
v_zero
,
_mm_loadl_epi64
((
__m128i
const
*
)(
src
+
x
))),
8
);
__m128
v_src_f
=
_mm_cvtepi32_ps
(
_mm_srai_epi32
(
_mm_unpacklo_epi16
(
v_zero
,
v_src
),
16
));
__m128
v_dst_0
=
_mm_add_ps
(
_mm_mul_ps
(
v_src_f
,
v_scale
),
v_shift
);
v_src_f
=
_mm_cvtepi32_ps
(
_mm_srai_epi32
(
_mm_unpackhi_epi16
(
v_zero
,
v_src
),
16
));
__m128
v_dst_1
=
_mm_add_ps
(
_mm_mul_ps
(
v_src_f
,
v_scale
),
v_shift
);
__m128i
v_dst
=
_mm_packs_epi32
(
_mm_cvtps_epi32
(
v_dst_0
),
_mm_cvtps_epi32
(
v_dst_1
));
_mm_storel_epi64
((
__m128i
*
)(
dst
+
x
),
_mm_packs_epi16
(
v_dst
,
v_zero
));
}
return
x
;
}
};
#if CV_SSE4_1
template
<
>
struct
cvtScale_SIMD
<
schar
,
ushort
,
float
>
{
cvtScale_SIMD
()
{
haveSSE
=
checkHardwareSupport
(
CV_CPU_SSE4_1
);
}
int
operator
()
(
const
schar
*
src
,
ushort
*
dst
,
int
width
,
float
scale
,
float
shift
)
const
{
int
x
=
0
;
if
(
!
haveSSE
)
return
x
;
__m128i
v_zero
=
_mm_setzero_si128
();
__m128
v_scale
=
_mm_set1_ps
(
scale
),
v_shift
=
_mm_set1_ps
(
shift
);
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
__m128i
v_src
=
_mm_srai_epi16
(
_mm_unpacklo_epi8
(
v_zero
,
_mm_loadl_epi64
((
__m128i
const
*
)(
src
+
x
))),
8
);
__m128
v_src_f
=
_mm_cvtepi32_ps
(
_mm_srai_epi32
(
_mm_unpacklo_epi16
(
v_zero
,
v_src
),
16
));
__m128
v_dst_0
=
_mm_add_ps
(
_mm_mul_ps
(
v_src_f
,
v_scale
),
v_shift
);
v_src_f
=
_mm_cvtepi32_ps
(
_mm_srai_epi32
(
_mm_unpackhi_epi16
(
v_zero
,
v_src
),
16
));
__m128
v_dst_1
=
_mm_add_ps
(
_mm_mul_ps
(
v_src_f
,
v_scale
),
v_shift
);
__m128i
v_dst
=
_mm_packus_epi32
(
_mm_cvtps_epi32
(
v_dst_0
),
_mm_cvtps_epi32
(
v_dst_1
));
_mm_storeu_si128
((
__m128i
*
)(
dst
+
x
),
v_dst
);
}
return
x
;
}
bool
haveSSE
;
};
#endif
template
<
>
struct
cvtScale_SIMD
<
schar
,
short
,
float
>
{
int
operator
()
(
const
schar
*
src
,
short
*
dst
,
int
width
,
float
scale
,
float
shift
)
const
{
int
x
=
0
;
if
(
!
USE_SSE2
)
return
x
;
__m128i
v_zero
=
_mm_setzero_si128
();
__m128
v_scale
=
_mm_set1_ps
(
scale
),
v_shift
=
_mm_set1_ps
(
shift
);
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
__m128i
v_src
=
_mm_srai_epi16
(
_mm_unpacklo_epi8
(
v_zero
,
_mm_loadl_epi64
((
__m128i
const
*
)(
src
+
x
))),
8
);
__m128
v_src_f
=
_mm_cvtepi32_ps
(
_mm_srai_epi32
(
_mm_unpacklo_epi16
(
v_zero
,
v_src
),
16
));
__m128
v_dst_0
=
_mm_add_ps
(
_mm_mul_ps
(
v_src_f
,
v_scale
),
v_shift
);
v_src_f
=
_mm_cvtepi32_ps
(
_mm_srai_epi32
(
_mm_unpackhi_epi16
(
v_zero
,
v_src
),
16
));
__m128
v_dst_1
=
_mm_add_ps
(
_mm_mul_ps
(
v_src_f
,
v_scale
),
v_shift
);
__m128i
v_dst
=
_mm_packs_epi32
(
_mm_cvtps_epi32
(
v_dst_0
),
_mm_cvtps_epi32
(
v_dst_1
));
_mm_storeu_si128
((
__m128i
*
)(
dst
+
x
),
v_dst
);
}
return
x
;
}
};
template
<
>
struct
cvtScale_SIMD
<
schar
,
int
,
float
>
{
int
operator
()
(
const
schar
*
src
,
int
*
dst
,
int
width
,
float
scale
,
float
shift
)
const
{
int
x
=
0
;
if
(
!
USE_SSE2
)
return
x
;
__m128i
v_zero
=
_mm_setzero_si128
();
__m128
v_scale
=
_mm_set1_ps
(
scale
),
v_shift
=
_mm_set1_ps
(
shift
);
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
__m128i
v_src
=
_mm_srai_epi16
(
_mm_unpacklo_epi8
(
v_zero
,
_mm_loadl_epi64
((
__m128i
const
*
)(
src
+
x
))),
8
);
__m128
v_src_f
=
_mm_cvtepi32_ps
(
_mm_srai_epi32
(
_mm_unpacklo_epi16
(
v_zero
,
v_src
),
16
));
__m128
v_dst_0
=
_mm_add_ps
(
_mm_mul_ps
(
v_src_f
,
v_scale
),
v_shift
);
v_src_f
=
_mm_cvtepi32_ps
(
_mm_srai_epi32
(
_mm_unpackhi_epi16
(
v_zero
,
v_src
),
16
));
__m128
v_dst_1
=
_mm_add_ps
(
_mm_mul_ps
(
v_src_f
,
v_scale
),
v_shift
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
x
),
_mm_cvtps_epi32
(
v_dst_0
));
_mm_storeu_si128
((
__m128i
*
)(
dst
+
x
+
4
),
_mm_cvtps_epi32
(
v_dst_1
));
}
return
x
;
}
};
template
<
>
struct
cvtScale_SIMD
<
schar
,
float
,
float
>
{
int
operator
()
(
const
schar
*
src
,
float
*
dst
,
int
width
,
float
scale
,
float
shift
)
const
{
int
x
=
0
;
if
(
!
USE_SSE2
)
return
x
;
__m128i
v_zero
=
_mm_setzero_si128
();
__m128
v_scale
=
_mm_set1_ps
(
scale
),
v_shift
=
_mm_set1_ps
(
shift
);
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
__m128i
v_src
=
_mm_srai_epi16
(
_mm_unpacklo_epi8
(
v_zero
,
_mm_loadl_epi64
((
__m128i
const
*
)(
src
+
x
))),
8
);
__m128
v_src_f
=
_mm_cvtepi32_ps
(
_mm_srai_epi32
(
_mm_unpacklo_epi16
(
v_zero
,
v_src
),
16
));
__m128
v_dst_0
=
_mm_add_ps
(
_mm_mul_ps
(
v_src_f
,
v_scale
),
v_shift
);
v_src_f
=
_mm_cvtepi32_ps
(
_mm_srai_epi32
(
_mm_unpackhi_epi16
(
v_zero
,
v_src
),
16
));
__m128
v_dst_1
=
_mm_add_ps
(
_mm_mul_ps
(
v_src_f
,
v_scale
),
v_shift
);
_mm_storeu_ps
(
dst
+
x
,
v_dst_0
);
_mm_storeu_ps
(
dst
+
x
+
4
,
v_dst_1
);
}
return
x
;
}
};
template
<
>
struct
cvtScale_SIMD
<
schar
,
double
,
float
>
{
int
operator
()
(
const
schar
*
src
,
double
*
dst
,
int
width
,
float
scale
,
float
shift
)
const
{
int
x
=
0
;
if
(
!
USE_SSE2
)
return
x
;
__m128i
v_zero
=
_mm_setzero_si128
();
__m128
v_scale
=
_mm_set1_ps
(
scale
),
v_shift
=
_mm_set1_ps
(
shift
);
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
__m128i
v_src
=
_mm_srai_epi16
(
_mm_unpacklo_epi8
(
v_zero
,
_mm_loadl_epi64
((
__m128i
const
*
)(
src
+
x
))),
8
);
__m128
v_src_f
=
_mm_cvtepi32_ps
(
_mm_srai_epi32
(
_mm_unpacklo_epi16
(
v_zero
,
v_src
),
16
));
__m128
v_dst_0
=
_mm_add_ps
(
_mm_mul_ps
(
v_src_f
,
v_scale
),
v_shift
);
v_src_f
=
_mm_cvtepi32_ps
(
_mm_srai_epi32
(
_mm_unpackhi_epi16
(
v_zero
,
v_src
),
16
));
__m128
v_dst_1
=
_mm_add_ps
(
_mm_mul_ps
(
v_src_f
,
v_scale
),
v_shift
);
_mm_storeu_pd
(
dst
+
x
,
_mm_cvtps_pd
(
v_dst_0
));
_mm_storeu_pd
(
dst
+
x
+
4
,
_mm_cvtps_pd
(
_mm_castsi128_ps
(
_mm_srli_si128
(
_mm_castps_si128
(
v_dst_0
),
16
))));
_mm_storeu_pd
(
dst
+
x
+
8
,
_mm_cvtps_pd
(
v_dst_1
));
_mm_storeu_pd
(
dst
+
x
+
12
,
_mm_cvtps_pd
(
_mm_castsi128_ps
(
_mm_srli_si128
(
_mm_castps_si128
(
v_dst_1
),
16
))));
}
return
x
;
}
};
#elif CV_NEON
// from uchar
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录