Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
b782d8bb
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
b782d8bb
编写于
7月 24, 2012
作者:
V
Vadim Pisarevsky
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
integrated patch with some SSE2/SSE4.2 optimizations from Grigory Frolov
上级
54d68da8
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
220 addition
and
10 deletion
+220
-10
modules/core/include/opencv2/core/internal.hpp
modules/core/include/opencv2/core/internal.hpp
+18
-3
modules/core/src/convert.cpp
modules/core/src/convert.cpp
+57
-1
modules/core/src/copy.cpp
modules/core/src/copy.cpp
+61
-1
modules/core/src/lapack.cpp
modules/core/src/lapack.cpp
+48
-3
modules/core/src/precomp.hpp
modules/core/src/precomp.hpp
+2
-0
modules/core/src/stat.cpp
modules/core/src/stat.cpp
+31
-1
modules/core/src/system.cpp
modules/core/src/system.cpp
+3
-1
未找到文件。
modules/core/include/opencv2/core/internal.hpp
浏览文件 @
b782d8bb
...
...
@@ -120,12 +120,27 @@ CV_INLINE IppiSize ippiSize(int width, int height)
# else
# define CV_SSSE3 0
# endif
#else
# if defined __SSE4_1__ || _MSC_VER >= 1600
# include <smmintrin.h>
# define CV_SSE4_1 1
# endif
# if defined __SSE4_2__ || _MSC_VER >= 1600
# include <nmmintrin.h>
# define CV_SSE4_2 1
# endif
# if defined __AVX__ || _MSC_VER >= 1600
# include <immintrin.h>
# define CV_AVX 1
# endif
# else
# define CV_SSE 0
# define CV_SSE2 0
# define CV_SSE3 0
# define CV_SSSE3 0
#endif
# define CV_SSE4_1 0
# define CV_SSE4_2 0
# define CV_AVX 0
# endif
#if defined ANDROID && defined __ARM_NEON__
# include "arm_neon.h"
...
...
@@ -764,4 +779,4 @@ CV_EXPORTS bool icvCheckGlError(const char* file, const int line, const char* fu
#endif //__cplusplus
#endif // __OPENCV_CORE_INTERNAL_HPP__
#endif // __OPENCV_CORE_INTERNAL_HPP__
\ No newline at end of file
modules/core/src/convert.cpp
浏览文件 @
b782d8bb
...
...
@@ -657,6 +657,62 @@ cvtScale_<short, short, float>( const short* src, size_t sstep,
}
}
template
<
>
void
cvtScale_
<
short
,
int
,
float
>
(
const
short
*
src
,
size_t
sstep
,
int
*
dst
,
size_t
dstep
,
Size
size
,
float
scale
,
float
shift
)
{
sstep
/=
sizeof
(
src
[
0
]);
dstep
/=
sizeof
(
dst
[
0
]);
for
(
;
size
.
height
--
;
src
+=
sstep
,
dst
+=
dstep
)
{
int
x
=
0
;
#if CV_SSE2
if
(
USE_SSE2
)
//~5X
{
__m128
scale128
=
_mm_set1_ps
(
scale
);
__m128
shift128
=
_mm_set1_ps
(
shift
);
for
(;
x
<=
size
.
width
-
8
;
x
+=
8
)
{
__m128i
r0
=
_mm_loadl_epi64
((
const
__m128i
*
)(
src
+
x
));
__m128i
r1
=
_mm_loadl_epi64
((
const
__m128i
*
)(
src
+
x
+
4
));
__m128
rf0
=
_mm_cvtepi32_ps
(
_mm_srai_epi32
(
_mm_unpacklo_epi16
(
r0
,
r0
),
16
));
__m128
rf1
=
_mm_cvtepi32_ps
(
_mm_srai_epi32
(
_mm_unpacklo_epi16
(
r1
,
r1
),
16
));
rf0
=
_mm_add_ps
(
_mm_mul_ps
(
rf0
,
scale128
),
shift128
);
rf1
=
_mm_add_ps
(
_mm_mul_ps
(
rf1
,
scale128
),
shift128
);
r0
=
_mm_cvtps_epi32
(
rf0
);
r1
=
_mm_cvtps_epi32
(
rf1
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
x
),
r0
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
x
+
4
),
r1
);
}
}
#endif
//We will wait Haswell
/*
#if CV_AVX
if(USE_AVX)//2X - bad variant
{
////TODO:AVX implementation (optimization?) required
__m256 scale256 = _mm256_set1_ps (scale);
__m256 shift256 = _mm256_set1_ps (shift);
for(; x <= size.width - 8; x += 8 )
{
__m256i buf = _mm256_set_epi32((int)(*(src+x+7)),(int)(*(src+x+6)),(int)(*(src+x+5)),(int)(*(src+x+4)),(int)(*(src+x+3)),(int)(*(src+x+2)),(int)(*(src+x+1)),(int)(*(src+x)));
__m256 r0 = _mm256_add_ps( _mm256_mul_ps(_mm256_cvtepi32_ps (buf), scale256), shift256);
__m256i res = _mm256_cvtps_epi32(r0);
_mm256_storeu_si256 ((__m256i*)(dst+x), res);
}
}
#endif*/
for
(;
x
<
size
.
width
;
x
++
)
dst
[
x
]
=
saturate_cast
<
int
>
(
src
[
x
]
*
scale
+
shift
);
}
}
template
<
typename
T
,
typename
DT
>
static
void
cvt_
(
const
T
*
src
,
size_t
sstep
,
...
...
@@ -1305,4 +1361,4 @@ CV_IMPL void cvNormalize( const CvArr* srcarr, CvArr* dstarr,
cv
::
normalize
(
src
,
dst
,
a
,
b
,
norm_type
,
dst
.
type
(),
mask
);
}
/* End of file. */
/* End of file. */
\ No newline at end of file
modules/core/src/copy.cpp
浏览文件 @
b782d8bb
...
...
@@ -78,6 +78,66 @@ copyMask_(const uchar* _src, size_t sstep, const uchar* mask, size_t mstep, ucha
}
}
template
<
>
static
void
copyMask_
<
uchar
>
(
const
uchar
*
_src
,
size_t
sstep
,
const
uchar
*
mask
,
size_t
mstep
,
uchar
*
_dst
,
size_t
dstep
,
Size
size
)
{
for
(
;
size
.
height
--
;
mask
+=
mstep
,
_src
+=
sstep
,
_dst
+=
dstep
)
{
const
uchar
*
src
=
(
const
uchar
*
)
_src
;
uchar
*
dst
=
(
uchar
*
)
_dst
;
int
x
=
0
;
#if CV_SSE4_2
if
(
USE_SSE4_2
)
//
{
__m128i
zero
=
_mm_setzero_si128
();
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
{
const
__m128i
rSrc
=
_mm_lddqu_si128
((
const
__m128i
*
)(
src
+
x
));
__m128i
_mask
=
_mm_lddqu_si128
((
const
__m128i
*
)(
mask
+
x
));
__m128i
rDst
=
_mm_lddqu_si128
((
__m128i
*
)(
dst
+
x
));
__m128i
_negMask
=
_mm_cmpeq_epi8
(
_mask
,
zero
);
rDst
=
_mm_blendv_epi8
(
rSrc
,
rDst
,
_negMask
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
x
),
rDst
);
}
}
#endif
for
(
;
x
<
size
.
width
;
x
++
)
if
(
mask
[
x
]
)
dst
[
x
]
=
src
[
x
];
}
}
template
<
>
static
void
copyMask_
<
ushort
>
(
const
uchar
*
_src
,
size_t
sstep
,
const
uchar
*
mask
,
size_t
mstep
,
uchar
*
_dst
,
size_t
dstep
,
Size
size
)
{
for
(
;
size
.
height
--
;
mask
+=
mstep
,
_src
+=
sstep
,
_dst
+=
dstep
)
{
const
ushort
*
src
=
(
const
ushort
*
)
_src
;
ushort
*
dst
=
(
ushort
*
)
_dst
;
int
x
=
0
;
#if CV_SSE4_2
if
(
USE_SSE4_2
)
//
{
__m128i
zero
=
_mm_setzero_si128
();
for
(
;
x
<=
size
.
width
-
8
;
x
+=
8
)
{
const
__m128i
rSrc
=
_mm_lddqu_si128
((
const
__m128i
*
)(
src
+
x
));
__m128i
_mask
=
_mm_loadl_epi64
((
const
__m128i
*
)(
mask
+
x
));
_mask
=
_mm_unpacklo_epi8
(
_mask
,
_mask
);
__m128i
rDst
=
_mm_lddqu_si128
((
const
__m128i
*
)(
dst
+
x
));
__m128i
_negMask
=
_mm_cmpeq_epi8
(
_mask
,
zero
);
rDst
=
_mm_blendv_epi8
(
rSrc
,
rDst
,
_negMask
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
x
),
rDst
);
}
}
#endif
for
(
;
x
<
size
.
width
;
x
++
)
if
(
mask
[
x
]
)
dst
[
x
]
=
src
[
x
];
}
}
static
void
copyMaskGeneric
(
const
uchar
*
_src
,
size_t
sstep
,
const
uchar
*
mask
,
size_t
mstep
,
uchar
*
_dst
,
size_t
dstep
,
Size
size
,
void
*
_esz
)
{
...
...
@@ -567,4 +627,4 @@ cvRepeat( const CvArr* srcarr, CvArr* dstarr )
cv
::
repeat
(
src
,
dst
.
rows
/
src
.
rows
,
dst
.
cols
/
src
.
cols
,
dst
);
}
/* End of file. */
/* End of file. */
\ No newline at end of file
modules/core/src/lapack.cpp
浏览文件 @
b782d8bb
...
...
@@ -1010,6 +1010,25 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
if
(
type
==
CV_32FC1
)
{
double
d
=
det2
(
Sf
);
#if CV_SSE4_2
if
(
USE_SSE4_2
)
{
__m128
zero
=
_mm_setzero_ps
();
__m128
t0
=
_mm_loadl_pi
(
zero
,
(
const
__m64
*
)
srcdata
);
//t0 = sf(0,0) sf(0,1)
__m128
t1
=
_mm_loadh_pi
(
zero
,(
const
__m64
*
)((
const
float
*
)(
srcdata
+
srcstep
)));
//t1 = sf(1,0) sf(1,1)
__m128
s0
=
_mm_blend_ps
(
t0
,
t1
,
12
);
d
=
1.
/
d
;
result
=
true
;
__m128
det
=
_mm_set1_ps
((
float
)
d
);
s0
=
_mm_mul_ps
(
s0
,
det
);
const
uchar
CV_DECL_ALIGNED
(
16
)
inv
[
16
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0x80
,
0
,
0
,
0
,
0x80
,
0
,
0
,
0
,
0
};
__m128
pattern
=
_mm_load_ps
((
const
float
*
)
inv
);
s0
=
_mm_xor_ps
(
s0
,
pattern
);
//==-1*s0
s0
=
_mm_shuffle_ps
(
s0
,
s0
,
_MM_SHUFFLE
(
0
,
2
,
1
,
3
));
_mm_storel_pi
((
__m64
*
)
dstdata
,
s0
);
_mm_storeh_pi
((
__m64
*
)((
float
*
)(
dstdata
+
dststep
)),
s0
);
}
#else
if
(
d
!=
0.
)
{
double
t0
,
t1
;
...
...
@@ -1022,12 +1041,36 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
t0
=
-
Sf
(
0
,
1
)
*
d
;
t1
=
-
Sf
(
1
,
0
)
*
d
;
Df
(
0
,
1
)
=
(
float
)
t0
;
Df
(
1
,
0
)
=
(
float
)
t1
;
Df
(
1
,
0
)
=
(
float
)
t1
;
}
#endif
}
else
{
double
d
=
det2
(
Sd
);
double
d
=
det2
(
Sd
);
#if CV_SSE2
if
(
USE_SSE2
)
{
__m128d
s0
=
_mm_loadu_pd
((
const
double
*
)
srcdata
);
//s0 = sf(0,0) sf(0,1)
__m128d
s1
=
_mm_loadu_pd
((
const
double
*
)(
srcdata
+
srcstep
));
//s1 = sf(1,0) sf(1,1)
__m128d
sm
=
_mm_shuffle_pd
(
s0
,
s1
,
_MM_SHUFFLE2
(
1
,
0
));
//sm = sf(0,0) sf(1,1) - main diagonal
__m128d
ss
=
_mm_shuffle_pd
(
s0
,
s1
,
_MM_SHUFFLE2
(
0
,
1
));
//sm = sf(0,1) sf(1,0) - secondary diagonal
result
=
true
;
d
=
1.
/
d
;
__m128d
det
=
_mm_load1_pd
((
const
double
*
)
&
d
);
sm
=
_mm_mul_pd
(
sm
,
det
);
//__m128d pattern = _mm_set1_pd(-1.);
static
const
uchar
CV_DECL_ALIGNED
(
16
)
inv
[
8
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0x80
};
__m128d
pattern
=
_mm_load1_pd
((
double
*
)
inv
);
ss
=
_mm_mul_pd
(
ss
,
det
);
ss
=
_mm_xor_pd
(
ss
,
pattern
);
//==-1*ss
//ss = _mm_mul_pd(ss,pattern);
s0
=
_mm_shuffle_pd
(
sm
,
ss
,
_MM_SHUFFLE2
(
0
,
1
));
s1
=
_mm_shuffle_pd
(
ss
,
sm
,
_MM_SHUFFLE2
(
0
,
1
));
_mm_store_pd
((
double
*
)
dstdata
,
s0
);
_mm_store_pd
((
double
*
)(
dstdata
+
dststep
),
s1
);
}
#else
if
(
d
!=
0.
)
{
double
t0
,
t1
;
...
...
@@ -1042,6 +1085,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
Dd
(
0
,
1
)
=
t0
;
Dd
(
1
,
0
)
=
t1
;
}
#endif
}
}
else
if
(
n
==
3
)
...
...
@@ -1148,6 +1192,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
return
result
;
}
/****************************************************************************************\
* Solving a linear system *
\****************************************************************************************/
...
...
@@ -1797,4 +1842,4 @@ cvSVBkSb( const CvArr* warr, const CvArr* uarr,
cv
::
SVD
::
backSubst
(
w
,
u
,
v
,
rhs
,
dst
);
CV_Assert
(
dst
.
data
==
dst0
.
data
);
}
}
\ No newline at end of file
modules/core/src/precomp.hpp
浏览文件 @
b782d8bb
...
...
@@ -170,6 +170,8 @@ struct NoVec
};
extern
volatile
bool
USE_SSE2
;
extern
volatile
bool
USE_SSE4_2
;
extern
volatile
bool
USE_AVX
;
enum
{
BLOCK_SIZE
=
1024
};
...
...
modules/core/src/stat.cpp
浏览文件 @
b782d8bb
...
...
@@ -221,6 +221,36 @@ static int countNonZero_(const T* src, int len )
return
nz
;
}
template
<
>
int
countNonZero_
<
uchar
>
(
const
uchar
*
src
,
int
len
)
{
int
i
=
0
,
nz
=
0
;
#if CV_SSE4_2
if
(
USE_SSE4_2
)
//5x-6x
{
__m128i
pattern
=
_mm_setzero_si128
();
__m128i
inv
=
_mm_set1_epi8
((
char
)
1
);
__int64
CV_DECL_ALIGNED
(
16
)
buf
[
2
];
for
(;
i
<=
len
-
16
;
i
+=
16
)
{
__m128i
r0
=
_mm_lddqu_si128
((
const
__m128i
*
)(
src
+
i
));
__m128i
res
=
_mm_cmpeq_epi8
(
r0
,
pattern
);
res
=
_mm_add_epi8
(
res
,
inv
);
//11111111+1=00000000, 00000000+1=00000001
_mm_store_si128
((
__m128i
*
)
buf
,
res
);
__int64
countLow
=
_mm_popcnt_u64
(
buf
[
0
]);
nz
+=
countLow
;
__int64
countHigh
=
_mm_popcnt_u64
(
buf
[
1
]);
nz
+=
countHigh
;
}
}
#endif
for
(
;
i
<
len
;
i
++
)
nz
+=
src
[
i
]
!=
0
;
return
nz
;
}
static
int
countNonZero8u
(
const
uchar
*
src
,
int
len
)
{
return
countNonZero_
(
src
,
len
);
}
...
...
@@ -1982,4 +2012,4 @@ cvNorm( const void* imgA, const void* imgB, int normType, const void* maskarr )
cv
::
extractImageCOI
(
imgB
,
b
);
return
!
maskarr
?
cv
::
norm
(
a
,
b
,
normType
)
:
cv
::
norm
(
a
,
b
,
normType
,
mask
);
}
}
\ No newline at end of file
modules/core/src/system.cpp
浏览文件 @
b782d8bb
...
...
@@ -205,6 +205,8 @@ IPPInitializer ippInitializer;
#endif
volatile
bool
USE_SSE2
=
featuresEnabled
.
have
[
CV_CPU_SSE2
];
volatile
bool
USE_SSE4_2
=
featuresEnabled
.
have
[
CV_CPU_SSE4_2
];
volatile
bool
USE_AVX
=
featuresEnabled
.
have
[
CV_CPU_AVX
];
void
setUseOptimized
(
bool
flag
)
{
...
...
@@ -921,4 +923,4 @@ BOOL WINAPI DllMain( HINSTANCE, DWORD fdwReason, LPVOID )
}
#endif
/* End of file. */
/* End of file. */
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录