Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
44d473fb
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
44d473fb
编写于
7月 08, 2020
作者:
A
Alexander Alekhin
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
上级
988bc804
5bc6b6fc
变更
18
隐藏空白更改
内联
并排
Showing
18 changed file
with
794 addition
and
300 deletion
+794
-300
modules/dnn/src/dnn.cpp
modules/dnn/src/dnn.cpp
+8
-6
modules/features2d/src/sift.simd.hpp
modules/features2d/src/sift.simd.hpp
+122
-158
modules/flann/include/opencv2/flann/hierarchical_clustering_index.h
...ann/include/opencv2/flann/hierarchical_clustering_index.h
+5
-3
modules/imgproc/perf/opencl/perf_filters.cpp
modules/imgproc/perf/opencl/perf_filters.cpp
+56
-0
modules/imgproc/src/filter.dispatch.cpp
modules/imgproc/src/filter.dispatch.cpp
+177
-56
modules/imgproc/src/filter.hpp
modules/imgproc/src/filter.hpp
+17
-5
modules/imgproc/src/opencl/filterSepCol.cl
modules/imgproc/src/opencl/filterSepCol.cl
+13
-11
modules/imgproc/src/opencl/filterSepRow.cl
modules/imgproc/src/opencl/filterSepRow.cl
+7
-3
modules/imgproc/src/opencl/filterSep_singlePass.cl
modules/imgproc/src/opencl/filterSep_singlePass.cl
+9
-7
modules/imgproc/src/smooth.dispatch.cpp
modules/imgproc/src/smooth.dispatch.cpp
+44
-24
modules/imgproc/test/ocl/test_sepfilter2d.cpp
modules/imgproc/test/ocl/test_sepfilter2d.cpp
+33
-4
modules/stitching/src/exposure_compensate.cpp
modules/stitching/src/exposure_compensate.cpp
+5
-1
platforms/winpack_dldt/2020.4/20200413-dldt-pdb.patch
platforms/winpack_dldt/2020.4/20200413-dldt-pdb.patch
+14
-0
platforms/winpack_dldt/2020.4/20200604-dldt-disable-multidevice.patch
...npack_dldt/2020.4/20200604-dldt-disable-multidevice.patch
+13
-0
platforms/winpack_dldt/2020.4/20200701-dldt-disable-unused-targets.patch
...ck_dldt/2020.4/20200701-dldt-disable-unused-targets.patch
+173
-0
platforms/winpack_dldt/2020.4/patch.config.py
platforms/winpack_dldt/2020.4/patch.config.py
+3
-0
platforms/winpack_dldt/2020.4/sysroot.config.py
platforms/winpack_dldt/2020.4/sysroot.config.py
+56
-0
platforms/winpack_dldt/build_package.py
platforms/winpack_dldt/build_package.py
+39
-22
未找到文件。
modules/dnn/src/dnn.cpp
浏览文件 @
44d473fb
...
...
@@ -2744,8 +2744,7 @@ struct Net::Impl : public detail::NetImplBase
// (and so we eliminate the concatenation layer, because the channels
// are concatenated implicitly).
Ptr
<
ConcatLayer
>
concatLayer
=
ld
.
layerInstance
.
dynamicCast
<
ConcatLayer
>
();
if
(
!
concatLayer
.
empty
()
&&
concatLayer
->
axis
==
1
&&
!
concatLayer
->
padding
&&
ld
.
outputBlobs
.
size
()
==
1
)
if
(
!
concatLayer
.
empty
()
&&
!
concatLayer
->
padding
&&
ld
.
outputBlobs
.
size
()
==
1
)
{
Mat
&
output
=
ld
.
outputBlobs
[
0
];
UMat
umat_output
;
...
...
@@ -2782,7 +2781,8 @@ struct Net::Impl : public detail::NetImplBase
// the concatenation optimization is applied with batch_size > 1.
// so, for now, we only apply this optimization in the most popular
// case batch_size == 1.
if
(
output
.
dims
==
4
&&
output
.
size
[
0
]
==
1
)
int
axis
=
clamp
(
concatLayer
->
axis
,
output
.
dims
);
if
(
output
.
total
(
0
,
axis
)
==
1
)
{
size_t
i
,
ninputs
=
ld
.
inputBlobsId
.
size
();
std
::
vector
<
LayerPin
>
realinputs
(
ninputs
);
...
...
@@ -2836,18 +2836,20 @@ struct Net::Impl : public detail::NetImplBase
OpenCLBackendWrapper
::
update
(
ld
.
outputBlobsWrappers
,
umats
);
}
#endif
#ifdef HAVE_CUDA
if
(
preferableBackend
==
DNN_BACKEND_CUDA
)
ld
.
outputBlobsWrappers
[
0
]
=
wrap
(
output
);
#endif
Range
chrange
[]
=
{
Range
::
all
(),
Range
::
all
(),
Range
::
all
(),
Range
::
all
()
};
std
::
vector
<
Range
>
chrange
(
output
.
dims
,
Range
::
all
());
int
ofs
=
0
;
for
(
i
=
0
;
i
<
ninputs
;
i
++
)
{
LayerPin
pin
=
realinputs
[
i
];
LayerData
*
inp_i_data
=
&
layers
[
pin
.
lid
];
int
channels_i
=
ld
.
inputBlobs
[
i
]
->
size
[
1
];
chrange
[
1
]
=
Range
(
ofs
,
ofs
+
channels_i
);
int
channels_i
=
ld
.
inputBlobs
[
i
]
->
size
[
axis
];
chrange
[
axis
]
=
Range
(
ofs
,
ofs
+
channels_i
);
printf_
((
"
\t
output %s(%d) to channels (%d, %d)
\n
"
,
inp_i_data
->
layerInstance
->
name
.
c_str
(),
pin
.
oid
,
ofs
,
ofs
+
channels_i
));
ofs
+=
channels_i
;
...
...
modules/features2d/src/sift.simd.hpp
浏览文件 @
44d473fb
...
...
@@ -167,9 +167,23 @@ float calcOrientationHist(
int
i
,
j
,
k
,
len
=
(
radius
*
2
+
1
)
*
(
radius
*
2
+
1
);
float
expf_scale
=
-
1.
f
/
(
2.
f
*
sigma
*
sigma
);
#if CV_SIMD
AutoBuffer
<
float
>
bufX
(
len
+
v_float32
::
nlanes
);
AutoBuffer
<
float
>
bufY
(
len
+
v_float32
::
nlanes
);
AutoBuffer
<
float
>
bufO
(
len
+
v_float32
::
nlanes
);
AutoBuffer
<
float
>
bufW
(
len
+
v_float32
::
nlanes
);
AutoBuffer
<
float
>
bufT
(
n
+
4
+
v_float32
::
nlanes
);
float
*
X
=
alignPtr
(
bufX
.
data
(),
CV_SIMD_WIDTH
);
float
*
Y
=
alignPtr
(
bufY
.
data
(),
CV_SIMD_WIDTH
);
float
*
Mag
=
X
;
float
*
Ori
=
alignPtr
(
bufO
.
data
(),
CV_SIMD_WIDTH
);
float
*
W
=
alignPtr
(
bufW
.
data
(),
CV_SIMD_WIDTH
);
float
*
temphist
=
alignPtr
(
bufT
.
data
(),
CV_SIMD_WIDTH
)
+
2
;
#else
AutoBuffer
<
float
>
buf
(
len
*
4
+
n
+
4
);
float
*
X
=
buf
.
data
(),
*
Y
=
X
+
len
,
*
Mag
=
X
,
*
Ori
=
Y
+
len
,
*
W
=
Ori
+
len
;
float
*
temphist
=
W
+
len
+
2
;
#endif
for
(
i
=
0
;
i
<
n
;
i
++
)
temphist
[
i
]
=
0.
f
;
...
...
@@ -201,32 +215,29 @@ float calcOrientationHist(
cv
::
hal
::
magnitude32f
(
X
,
Y
,
Mag
,
len
);
k
=
0
;
#if CV_AVX2
#if CV_SIMD
const
int
vecsize
=
v_float32
::
nlanes
;
v_float32
nd360
=
vx_setall_f32
(
n
/
360.
f
);
v_int32
__n
=
vx_setall_s32
(
n
);
int
CV_DECL_ALIGNED
(
CV_SIMD_WIDTH
)
bin_buf
[
vecsize
];
float
CV_DECL_ALIGNED
(
CV_SIMD_WIDTH
)
w_mul_mag_buf
[
vecsize
];
for
(
;
k
<=
len
-
vecsize
;
k
+=
vecsize
)
{
__m256
__nd360
=
_mm256_set1_ps
(
n
/
360.
f
);
__m256i
__n
=
_mm256_set1_epi32
(
n
);
int
CV_DECL_ALIGNED
(
32
)
bin_buf
[
8
];
float
CV_DECL_ALIGNED
(
32
)
w_mul_mag_buf
[
8
];
for
(
;
k
<=
len
-
8
;
k
+=
8
)
v_float32
w
=
vx_load_aligned
(
W
+
k
);
v_float32
mag
=
vx_load_aligned
(
Mag
+
k
);
v_float32
ori
=
vx_load_aligned
(
Ori
+
k
);
v_int32
bin
=
v_round
(
nd360
*
ori
);
bin
=
v_select
(
bin
>=
__n
,
bin
-
__n
,
bin
);
bin
=
v_select
(
bin
<
vx_setzero_s32
(),
bin
+
__n
,
bin
);
w
=
w
*
mag
;
v_store_aligned
(
bin_buf
,
bin
);
v_store_aligned
(
w_mul_mag_buf
,
w
);
for
(
int
vi
=
0
;
vi
<
vecsize
;
vi
++
)
{
__m256i
__bin
=
_mm256_cvtps_epi32
(
_mm256_mul_ps
(
__nd360
,
_mm256_loadu_ps
(
&
Ori
[
k
])));
__bin
=
_mm256_sub_epi32
(
__bin
,
_mm256_andnot_si256
(
_mm256_cmpgt_epi32
(
__n
,
__bin
),
__n
));
__bin
=
_mm256_add_epi32
(
__bin
,
_mm256_and_si256
(
__n
,
_mm256_cmpgt_epi32
(
_mm256_setzero_si256
(),
__bin
)));
__m256
__w_mul_mag
=
_mm256_mul_ps
(
_mm256_loadu_ps
(
&
W
[
k
]),
_mm256_loadu_ps
(
&
Mag
[
k
]));
_mm256_store_si256
((
__m256i
*
)
bin_buf
,
__bin
);
_mm256_store_ps
(
w_mul_mag_buf
,
__w_mul_mag
);
temphist
[
bin_buf
[
0
]]
+=
w_mul_mag_buf
[
0
];
temphist
[
bin_buf
[
1
]]
+=
w_mul_mag_buf
[
1
];
temphist
[
bin_buf
[
2
]]
+=
w_mul_mag_buf
[
2
];
temphist
[
bin_buf
[
3
]]
+=
w_mul_mag_buf
[
3
];
temphist
[
bin_buf
[
4
]]
+=
w_mul_mag_buf
[
4
];
temphist
[
bin_buf
[
5
]]
+=
w_mul_mag_buf
[
5
];
temphist
[
bin_buf
[
6
]]
+=
w_mul_mag_buf
[
6
];
temphist
[
bin_buf
[
7
]]
+=
w_mul_mag_buf
[
7
];
temphist
[
bin_buf
[
vi
]]
+=
w_mul_mag_buf
[
vi
];
}
}
#endif
...
...
@@ -247,34 +258,20 @@ float calcOrientationHist(
temphist
[
n
+
1
]
=
temphist
[
1
];
i
=
0
;
#if CV_AVX2
#if CV_SIMD
v_float32
d_1_16
=
vx_setall_f32
(
1.
f
/
16.
f
);
v_float32
d_4_16
=
vx_setall_f32
(
4.
f
/
16.
f
);
v_float32
d_6_16
=
vx_setall_f32
(
6.
f
/
16.
f
);
for
(
;
i
<=
n
-
v_float32
::
nlanes
;
i
+=
v_float32
::
nlanes
)
{
__m256
__d_1_16
=
_mm256_set1_ps
(
1.
f
/
16.
f
);
__m256
__d_4_16
=
_mm256_set1_ps
(
4.
f
/
16.
f
);
__m256
__d_6_16
=
_mm256_set1_ps
(
6.
f
/
16.
f
);
for
(
;
i
<=
n
-
8
;
i
+=
8
)
{
#if CV_FMA3
__m256
__hist
=
_mm256_fmadd_ps
(
_mm256_add_ps
(
_mm256_loadu_ps
(
&
temphist
[
i
-
2
]),
_mm256_loadu_ps
(
&
temphist
[
i
+
2
])),
__d_1_16
,
_mm256_fmadd_ps
(
_mm256_add_ps
(
_mm256_loadu_ps
(
&
temphist
[
i
-
1
]),
_mm256_loadu_ps
(
&
temphist
[
i
+
1
])),
__d_4_16
,
_mm256_mul_ps
(
_mm256_loadu_ps
(
&
temphist
[
i
]),
__d_6_16
)));
#else
__m256
__hist
=
_mm256_add_ps
(
_mm256_mul_ps
(
_mm256_add_ps
(
_mm256_loadu_ps
(
&
temphist
[
i
-
2
]),
_mm256_loadu_ps
(
&
temphist
[
i
+
2
])),
__d_1_16
),
_mm256_add_ps
(
_mm256_mul_ps
(
_mm256_add_ps
(
_mm256_loadu_ps
(
&
temphist
[
i
-
1
]),
_mm256_loadu_ps
(
&
temphist
[
i
+
1
])),
__d_4_16
),
_mm256_mul_ps
(
_mm256_loadu_ps
(
&
temphist
[
i
]),
__d_6_16
)));
#endif
_mm256_storeu_ps
(
&
hist
[
i
],
__hist
);
}
v_float32
tn2
=
vx_load_aligned
(
temphist
+
i
-
2
);
v_float32
tn1
=
vx_load
(
temphist
+
i
-
1
);
v_float32
t0
=
vx_load
(
temphist
+
i
);
v_float32
t1
=
vx_load
(
temphist
+
i
+
1
);
v_float32
t2
=
vx_load
(
temphist
+
i
+
2
);
v_float32
_hist
=
v_fma
(
tn2
+
t2
,
d_1_16
,
v_fma
(
tn1
+
t1
,
d_4_16
,
t0
*
d_6_16
));
v_store
(
hist
+
i
,
_hist
);
}
#endif
for
(
;
i
<
n
;
i
++
)
...
...
@@ -623,91 +620,65 @@ void calcSIFTDescriptor(
cv
::
hal
::
exp32f
(
W
,
W
,
len
);
k
=
0
;
#if CV_
AVX2
#if CV_
SIMD
{
int
CV_DECL_ALIGNED
(
32
)
idx_buf
[
8
];
float
CV_DECL_ALIGNED
(
32
)
rco_buf
[
64
];
const
__m256
__ori
=
_mm256_set1_ps
(
ori
);
const
__m256
__bins_per_rad
=
_mm256_set1_ps
(
bins_per_rad
);
const
__m256i
__n
=
_mm256_set1_epi32
(
n
);
for
(
;
k
<=
len
-
8
;
k
+=
8
)
const
int
vecsize
=
v_float32
::
nlanes
;
int
CV_DECL_ALIGNED
(
CV_SIMD_WIDTH
)
idx_buf
[
vecsize
];
float
CV_DECL_ALIGNED
(
CV_SIMD_WIDTH
)
rco_buf
[
8
*
vecsize
];
const
v_float32
__ori
=
vx_setall_f32
(
ori
);
const
v_float32
__bins_per_rad
=
vx_setall_f32
(
bins_per_rad
);
const
v_int32
__n
=
vx_setall_s32
(
n
);
const
v_int32
__1
=
vx_setall_s32
(
1
);
const
v_int32
__d_plus_2
=
vx_setall_s32
(
d
+
2
);
const
v_int32
__n_plus_2
=
vx_setall_s32
(
n
+
2
);
for
(
;
k
<=
len
-
vecsize
;
k
+=
vecsize
)
{
__m256
__rbin
=
_mm256_loadu_ps
(
&
RBin
[
k
]);
__m256
__cbin
=
_mm256_loadu_ps
(
&
CBin
[
k
]);
__m256
__obin
=
_mm256_mul_ps
(
_mm256_sub_ps
(
_mm256_loadu_ps
(
&
Ori
[
k
]),
__ori
),
__bins_per_rad
);
__m256
__mag
=
_mm256_mul_ps
(
_mm256_loadu_ps
(
&
Mag
[
k
]),
_mm256_loadu_ps
(
&
W
[
k
]));
__m256
__r0
=
_mm256_floor_ps
(
__rbin
);
__rbin
=
_mm256_sub_ps
(
__rbin
,
__r0
);
__m256
__c0
=
_mm256_floor_ps
(
__cbin
);
__cbin
=
_mm256_sub_ps
(
__cbin
,
__c0
);
__m256
__o0
=
_mm256_floor_ps
(
__obin
);
__obin
=
_mm256_sub_ps
(
__obin
,
__o0
);
__m256i
__o0i
=
_mm256_cvtps_epi32
(
__o0
);
__o0i
=
_mm256_add_epi32
(
__o0i
,
_mm256_and_si256
(
__n
,
_mm256_cmpgt_epi32
(
_mm256_setzero_si256
(),
__o0i
)));
__o0i
=
_mm256_sub_epi32
(
__o0i
,
_mm256_andnot_si256
(
_mm256_cmpgt_epi32
(
__n
,
__o0i
),
__n
));
__m256
__v_r1
=
_mm256_mul_ps
(
__mag
,
__rbin
);
__m256
__v_r0
=
_mm256_sub_ps
(
__mag
,
__v_r1
);
__m256
__v_rc11
=
_mm256_mul_ps
(
__v_r1
,
__cbin
);
__m256
__v_rc10
=
_mm256_sub_ps
(
__v_r1
,
__v_rc11
);
__m256
__v_rc01
=
_mm256_mul_ps
(
__v_r0
,
__cbin
);
__m256
__v_rc00
=
_mm256_sub_ps
(
__v_r0
,
__v_rc01
);
__m256
__v_rco111
=
_mm256_mul_ps
(
__v_rc11
,
__obin
);
__m256
__v_rco110
=
_mm256_sub_ps
(
__v_rc11
,
__v_rco111
);
__m256
__v_rco101
=
_mm256_mul_ps
(
__v_rc10
,
__obin
);
__m256
__v_rco100
=
_mm256_sub_ps
(
__v_rc10
,
__v_rco101
);
__m256
__v_rco011
=
_mm256_mul_ps
(
__v_rc01
,
__obin
);
__m256
__v_rco010
=
_mm256_sub_ps
(
__v_rc01
,
__v_rco011
);
__m256
__v_rco001
=
_mm256_mul_ps
(
__v_rc00
,
__obin
);
__m256
__v_rco000
=
_mm256_sub_ps
(
__v_rc00
,
__v_rco001
);
__m256i
__one
=
_mm256_set1_epi32
(
1
);
__m256i
__idx
=
_mm256_add_epi32
(
_mm256_mullo_epi32
(
_mm256_add_epi32
(
_mm256_mullo_epi32
(
_mm256_add_epi32
(
_mm256_cvtps_epi32
(
__r0
),
__one
),
_mm256_set1_epi32
(
d
+
2
)),
_mm256_add_epi32
(
_mm256_cvtps_epi32
(
__c0
),
__one
)),
_mm256_set1_epi32
(
n
+
2
)),
__o0i
);
_mm256_store_si256
((
__m256i
*
)
idx_buf
,
__idx
);
_mm256_store_ps
(
&
(
rco_buf
[
0
]),
__v_rco000
);
_mm256_store_ps
(
&
(
rco_buf
[
8
]),
__v_rco001
);
_mm256_store_ps
(
&
(
rco_buf
[
16
]),
__v_rco010
);
_mm256_store_ps
(
&
(
rco_buf
[
24
]),
__v_rco011
);
_mm256_store_ps
(
&
(
rco_buf
[
32
]),
__v_rco100
);
_mm256_store_ps
(
&
(
rco_buf
[
40
]),
__v_rco101
);
_mm256_store_ps
(
&
(
rco_buf
[
48
]),
__v_rco110
);
_mm256_store_ps
(
&
(
rco_buf
[
56
]),
__v_rco111
);
#define HIST_SUM_HELPER(id) \
hist[idx_buf[(id)]] += rco_buf[(id)]; \
hist[idx_buf[(id)]+1] += rco_buf[8 + (id)]; \
hist[idx_buf[(id)]+(n+2)] += rco_buf[16 + (id)]; \
hist[idx_buf[(id)]+(n+3)] += rco_buf[24 + (id)]; \
hist[idx_buf[(id)]+(d+2)*(n+2)] += rco_buf[32 + (id)]; \
hist[idx_buf[(id)]+(d+2)*(n+2)+1] += rco_buf[40 + (id)]; \
hist[idx_buf[(id)]+(d+3)*(n+2)] += rco_buf[48 + (id)]; \
hist[idx_buf[(id)]+(d+3)*(n+2)+1] += rco_buf[56 + (id)];
HIST_SUM_HELPER
(
0
);
HIST_SUM_HELPER
(
1
);
HIST_SUM_HELPER
(
2
);
HIST_SUM_HELPER
(
3
);
HIST_SUM_HELPER
(
4
);
HIST_SUM_HELPER
(
5
);
HIST_SUM_HELPER
(
6
);
HIST_SUM_HELPER
(
7
);
#undef HIST_SUM_HELPER
v_float32
rbin
=
vx_load
(
RBin
+
k
);
v_float32
cbin
=
vx_load
(
CBin
+
k
);
v_float32
obin
=
(
vx_load
(
Ori
+
k
)
-
__ori
)
*
__bins_per_rad
;
v_float32
mag
=
vx_load
(
Mag
+
k
)
*
vx_load
(
W
+
k
);
v_int32
r0
=
v_floor
(
rbin
);
v_int32
c0
=
v_floor
(
cbin
);
v_int32
o0
=
v_floor
(
obin
);
rbin
-=
v_cvt_f32
(
r0
);
cbin
-=
v_cvt_f32
(
c0
);
obin
-=
v_cvt_f32
(
o0
);
o0
=
v_select
(
o0
<
vx_setzero_s32
(),
o0
+
__n
,
o0
);
o0
=
v_select
(
o0
>=
__n
,
o0
-
__n
,
o0
);
v_float32
v_r1
=
mag
*
rbin
,
v_r0
=
mag
-
v_r1
;
v_float32
v_rc11
=
v_r1
*
cbin
,
v_rc10
=
v_r1
-
v_rc11
;
v_float32
v_rc01
=
v_r0
*
cbin
,
v_rc00
=
v_r0
-
v_rc01
;
v_float32
v_rco111
=
v_rc11
*
obin
,
v_rco110
=
v_rc11
-
v_rco111
;
v_float32
v_rco101
=
v_rc10
*
obin
,
v_rco100
=
v_rc10
-
v_rco101
;
v_float32
v_rco011
=
v_rc01
*
obin
,
v_rco010
=
v_rc01
-
v_rco011
;
v_float32
v_rco001
=
v_rc00
*
obin
,
v_rco000
=
v_rc00
-
v_rco001
;
v_int32
idx
=
v_fma
(
v_fma
(
r0
+
__1
,
__d_plus_2
,
c0
+
__1
),
__n_plus_2
,
o0
);
v_store_aligned
(
idx_buf
,
idx
);
v_store_aligned
(
rco_buf
,
v_rco000
);
v_store_aligned
(
rco_buf
+
vecsize
,
v_rco001
);
v_store_aligned
(
rco_buf
+
vecsize
*
2
,
v_rco010
);
v_store_aligned
(
rco_buf
+
vecsize
*
3
,
v_rco011
);
v_store_aligned
(
rco_buf
+
vecsize
*
4
,
v_rco100
);
v_store_aligned
(
rco_buf
+
vecsize
*
5
,
v_rco101
);
v_store_aligned
(
rco_buf
+
vecsize
*
6
,
v_rco110
);
v_store_aligned
(
rco_buf
+
vecsize
*
7
,
v_rco111
);
for
(
int
id
=
0
;
id
<
vecsize
;
id
++
)
{
hist
[
idx_buf
[
id
]]
+=
rco_buf
[
id
];
hist
[
idx_buf
[
id
]
+
1
]
+=
rco_buf
[
vecsize
+
id
];
hist
[
idx_buf
[
id
]
+
(
n
+
2
)]
+=
rco_buf
[
2
*
vecsize
+
id
];
hist
[
idx_buf
[
id
]
+
(
n
+
3
)]
+=
rco_buf
[
3
*
vecsize
+
id
];
hist
[
idx_buf
[
id
]
+
(
d
+
2
)
*
(
n
+
2
)]
+=
rco_buf
[
4
*
vecsize
+
id
];
hist
[
idx_buf
[
id
]
+
(
d
+
2
)
*
(
n
+
2
)
+
1
]
+=
rco_buf
[
5
*
vecsize
+
id
];
hist
[
idx_buf
[
id
]
+
(
d
+
3
)
*
(
n
+
2
)]
+=
rco_buf
[
6
*
vecsize
+
id
];
hist
[
idx_buf
[
id
]
+
(
d
+
3
)
*
(
n
+
2
)
+
1
]
+=
rco_buf
[
7
*
vecsize
+
id
];
}
}
}
#endif
...
...
@@ -766,23 +737,16 @@ void calcSIFTDescriptor(
float
nrm2
=
0
;
len
=
d
*
d
*
n
;
k
=
0
;
#if CV_
AVX2
#if CV_
SIMD
{
float
CV_DECL_ALIGNED
(
32
)
nrm2_buf
[
8
];
__m256
__nrm2
=
_mm256_setzero_ps
();
__m256
__dst
;
for
(
;
k
<=
len
-
8
;
k
+=
8
)
v_float32
__nrm2
=
vx_setzero_f32
();
v_float32
__dst
;
for
(
;
k
<=
len
-
v_float32
::
nlanes
;
k
+=
v_float32
::
nlanes
)
{
__dst
=
_mm256_loadu_ps
(
&
dst
[
k
]);
#if CV_FMA3
__nrm2
=
_mm256_fmadd_ps
(
__dst
,
__dst
,
__nrm2
);
#else
__nrm2
=
_mm256_add_ps
(
__nrm2
,
_mm256_mul_ps
(
__dst
,
__dst
));
#endif
__dst
=
vx_load
(
dst
+
k
);
__nrm2
=
v_fma
(
__dst
,
__dst
,
__nrm2
);
}
_mm256_store_ps
(
nrm2_buf
,
__nrm2
);
nrm2
=
nrm2_buf
[
0
]
+
nrm2_buf
[
1
]
+
nrm2_buf
[
2
]
+
nrm2_buf
[
3
]
+
nrm2_buf
[
4
]
+
nrm2_buf
[
5
]
+
nrm2_buf
[
6
]
+
nrm2_buf
[
7
];
nrm2
=
(
float
)
v_reduce_sum
(
__nrm2
);
}
#endif
for
(
;
k
<
len
;
k
++
)
...
...
@@ -795,7 +759,7 @@ void calcSIFTDescriptor(
// This code cannot be enabled because it sums nrm2 in a different order,
// thus producing slightly different results
{
float CV_DECL_ALIGNED(
32
) nrm2_buf[8];
float CV_DECL_ALIGNED(
CV_SIMD_WIDTH
) nrm2_buf[8];
__m256 __dst;
__m256 __nrm2 = _mm256_setzero_ps();
__m256 __thr = _mm256_set1_ps(thr);
...
...
@@ -825,17 +789,17 @@ void calcSIFTDescriptor(
#if 1
k
=
0
;
#if CV_
AVX2
#if CV_
SIMD
{
__m256
__dst
;
__m256
__min
=
_mm256_setzero_ps
();
__m256
__max
=
_mm256_set1_ps
(
255.0
f
);
// max of uchar
__m256
__nrm2
=
_mm256_set1_ps
(
nrm2
);
for
(
k
=
0
;
k
<=
len
-
8
;
k
+=
8
)
v_float32
__dst
;
v_float32
__min
=
vx_setzero_f32
();
v_float32
__max
=
vx_setall_f32
(
255.0
f
);
// max of uchar
v_float32
__nrm2
=
vx_setall_f32
(
nrm2
);
for
(
k
=
0
;
k
<=
len
-
v_float32
::
nlanes
;
k
+=
v_float32
::
nlanes
)
{
__dst
=
_mm256_loadu_ps
(
&
dst
[
k
]
);
__dst
=
_mm256_min_ps
(
_mm256_max_ps
(
_mm256_round_ps
(
_mm256_mul_ps
(
__dst
,
__nrm2
),
_MM_FROUND_TO_NEAREST_INT
|
_MM_FROUND_NO_EXC
),
__min
),
__max
);
_mm256_storeu_ps
(
&
dst
[
k
]
,
__dst
);
__dst
=
vx_load
(
dst
+
k
);
__dst
=
v_min
(
v_max
(
v_cvt_f32
(
v_round
(
__dst
*
__nrm2
)
),
__min
),
__max
);
v_store
(
dst
+
k
,
__dst
);
}
}
#endif
...
...
modules/flann/include/opencv2/flann/hierarchical_clustering_index.h
浏览文件 @
44d473fb
...
...
@@ -547,7 +547,7 @@ public:
void
findNeighbors
(
ResultSet
<
DistanceType
>&
result
,
const
ElementType
*
vec
,
const
SearchParams
&
searchParams
)
CV_OVERRIDE
{
int
maxChecks
=
get_param
(
searchParams
,
"checks"
,
32
);
const
int
maxChecks
=
get_param
(
searchParams
,
"checks"
,
32
);
// Priority queue storing intermediate branches in the best-bin-first search
Heap
<
BranchSt
>*
heap
=
new
Heap
<
BranchSt
>
((
int
)
size_
);
...
...
@@ -556,6 +556,8 @@ public:
int
checks
=
0
;
for
(
int
i
=
0
;
i
<
trees_
;
++
i
)
{
findNN
(
root
[
i
],
result
,
vec
,
checks
,
maxChecks
,
heap
,
checked
);
if
((
checks
>=
maxChecks
)
&&
result
.
full
())
break
;
}
BranchSt
branch
;
...
...
@@ -747,8 +749,8 @@ private:
Heap
<
BranchSt
>*
heap
,
std
::
vector
<
bool
>&
checked
)
{
if
(
node
->
childs
==
NULL
)
{
if
(
checks
>=
maxChecks
)
{
if
(
result
.
full
())
return
;
if
(
(
checks
>=
maxChecks
)
&&
result
.
full
()
)
{
return
;
}
for
(
int
i
=
0
;
i
<
node
->
size
;
++
i
)
{
int
index
=
node
->
indices
[
i
];
...
...
modules/imgproc/perf/opencl/perf_filters.cpp
浏览文件 @
44d473fb
...
...
@@ -313,6 +313,62 @@ OCL_PERF_TEST_P(Filter2DFixture, Filter2D,
SANITY_CHECK
(
dst
,
eps
);
}
///////////// SepFilter2D /////////////
typedef
FilterFixture
OCL_SepFilter2D
;
PERF_TEST_P_
(
OCL_SepFilter2D
,
SepFilter2D
)
{
const
FilterParams
&
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
const
int
type
=
get
<
1
>
(
params
),
ksize
=
get
<
2
>
(
params
);
checkDeviceMaxMemoryAllocSize
(
srcSize
,
type
);
UMat
src
(
srcSize
,
type
),
dst
(
srcSize
,
type
);
declare
.
in
(
src
,
WARMUP_RNG
).
out
(
dst
);
Mat
kernelX
(
1
,
ksize
,
CV_32FC1
);
randu
(
kernelX
,
-
3.0
,
3.0
);
Mat
kernelY
(
1
,
ksize
,
CV_32FC1
);
randu
(
kernelY
,
-
3.0
,
3.0
);
OCL_TEST_CYCLE
()
cv
::
sepFilter2D
(
src
,
dst
,
-
1
,
kernelX
,
kernelY
,
cv
::
Point
(
-
1
,
-
1
),
1.0
f
,
cv
::
BORDER_CONSTANT
);
SANITY_CHECK_NOTHING
();
}
PERF_TEST_P_
(
OCL_SepFilter2D
,
SepFilter2D_BitExact
)
{
const
FilterParams
&
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
const
int
type
=
get
<
1
>
(
params
),
ksize
=
get
<
2
>
(
params
);
checkDeviceMaxMemoryAllocSize
(
srcSize
,
type
);
UMat
src
(
srcSize
,
type
),
dst
(
srcSize
,
type
);
declare
.
in
(
src
,
WARMUP_RNG
).
out
(
dst
);
Mat
kernelX
(
1
,
ksize
,
CV_32SC1
);
randu
(
kernelX
,
-
16.0
,
16.0
);
kernelX
.
convertTo
(
kernelX
,
CV_32FC1
,
1
/
16.0
f
,
0
);
Mat
kernelY
(
1
,
ksize
,
CV_32SC1
);
randu
(
kernelY
,
-
16.0
,
16.0
);
kernelY
.
convertTo
(
kernelY
,
CV_32FC1
,
1
/
16.0
f
,
0
);
OCL_TEST_CYCLE
()
cv
::
sepFilter2D
(
src
,
dst
,
-
1
,
kernelX
,
kernelY
,
cv
::
Point
(
-
1
,
-
1
),
1.0
f
,
cv
::
BORDER_CONSTANT
);
SANITY_CHECK_NOTHING
();
}
INSTANTIATE_TEST_CASE_P
(
/*nothing*/
,
OCL_SepFilter2D
,
::
testing
::
Combine
(
::
testing
::
Values
(
sz1080p
),
OCL_TEST_TYPES
,
OCL_PERF_ENUM
(
3
,
5
,
7
,
9
,
11
)
)
);
///////////// Bilateral ////////////////////////
typedef
TestBaseWithParam
<
Size
>
BilateralFixture
;
...
...
modules/imgproc/src/filter.dispatch.cpp
浏览文件 @
44d473fb
...
...
@@ -729,11 +729,12 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
const
int
shift_bits
=
8
;
static
bool
ocl_sepRowFilter2D
(
const
UMat
&
src
,
UMat
&
buf
,
const
Mat
&
kernelX
,
int
anchor
,
int
borderType
,
int
ddepth
,
bool
fast8uc1
,
bool
int_arithm
)
int
borderType
,
int
ddepth
,
bool
fast8uc1
,
bool
int_arithm
,
int
shift_bits
)
{
CV_Assert
(
shift_bits
==
0
||
int_arithm
);
int
type
=
src
.
type
(),
cn
=
CV_MAT_CN
(
type
),
sdepth
=
CV_MAT_DEPTH
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
Size
bufSize
=
buf
.
size
();
...
...
@@ -801,8 +802,11 @@ static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
static
bool
ocl_sepColFilter2D
(
const
UMat
&
buf
,
UMat
&
dst
,
const
Mat
&
kernelY
,
double
delta
,
int
anchor
,
bool
int_arithm
)
static
bool
ocl_sepColFilter2D
(
const
UMat
&
buf
,
UMat
&
dst
,
const
Mat
&
kernelY
,
double
delta
,
int
anchor
,
bool
int_arithm
,
int
shift_bits
)
{
CV_Assert
(
shift_bits
==
0
||
int_arithm
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
dst
.
depth
()
==
CV_64F
&&
!
doubleSupport
)
return
false
;
...
...
@@ -821,13 +825,16 @@ static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY
globalsize
[
1
]
=
DIVUP
(
sz
.
height
,
localsize
[
1
])
*
localsize
[
1
];
globalsize
[
0
]
=
DIVUP
(
sz
.
width
,
localsize
[
0
])
*
localsize
[
0
];
char
cvt
[
40
];
char
cvt
[
2
][
40
];
int
floatT
=
std
::
max
(
CV_32F
,
bdepth
);
cv
::
String
build_options
=
cv
::
format
(
"-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d"
" -D srcT=%s -D dstT=%s -D convertToDstT=%s"
" -D srcT=%s -D dstT=%s -D convertTo
FloatT=%s -D floatT=%s -D convertTo
DstT=%s"
" -D srcT1=%s -D dstT1=%s -D SHIFT_BITS=%d%s%s"
,
anchor
,
(
int
)
localsize
[
0
],
(
int
)
localsize
[
1
],
cn
,
ocl
::
typeToStr
(
buf_type
),
ocl
::
typeToStr
(
dtype
),
ocl
::
convertTypeStr
(
bdepth
,
ddepth
,
cn
,
cvt
),
ocl
::
convertTypeStr
(
bdepth
,
floatT
,
cn
,
cvt
[
0
]),
ocl
::
typeToStr
(
CV_MAKETYPE
(
floatT
,
cn
)),
ocl
::
convertTypeStr
(
shift_bits
?
floatT
:
bdepth
,
ddepth
,
cn
,
cvt
[
1
]),
ocl
::
typeToStr
(
bdepth
),
ocl
::
typeToStr
(
ddepth
),
2
*
shift_bits
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
int_arithm
?
" -D INTEGER_ARITHMETIC"
:
""
);
...
...
@@ -839,7 +846,7 @@ static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY
return
false
;
k
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
buf
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
static_cast
<
float
>
(
delta
));
static_cast
<
float
>
(
delta
*
(
1u
<<
(
2
*
shift_bits
))
));
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
...
...
@@ -848,16 +855,21 @@ const int optimizedSepFilterLocalWidth = 16;
const
int
optimizedSepFilterLocalHeight
=
8
;
static
bool
ocl_sepFilter2D_SinglePass
(
InputArray
_src
,
OutputArray
_dst
,
Mat
row_kernel
,
Mat
col_kernel
,
double
delta
,
int
borderType
,
int
ddepth
,
int
bdepth
,
bool
int_arithm
)
const
Mat
&
kernelX_
,
const
Mat
&
kernelY_
,
double
delta
,
int
borderType
,
int
ddepth
,
int
bdepth
,
bool
int_arithm
,
int
shift_bits
)
{
Size
size
=
_src
.
size
(),
wholeSize
;
Point
origin
;
//CV_Assert(shift_bits == 0 || int_arithm);
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
Size
size
=
_src
.
size
();
int
stype
=
_src
.
type
(),
sdepth
=
CV_MAT_DEPTH
(
stype
),
cn
=
CV_MAT_CN
(
stype
),
esz
=
CV_ELEM_SIZE
(
stype
),
wdepth
=
std
::
max
(
std
::
max
(
sdepth
,
ddepth
),
bdepth
),
dtype
=
CV_MAKE_TYPE
(
ddepth
,
cn
);
size_t
src_step
=
_src
.
step
(),
src_offset
=
_src
.
offset
();
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
bool
doubleSupport
=
d
.
doubleFPConfig
()
>
0
;
if
(
esz
==
0
||
src_step
==
0
||
(
src_offset
%
src_step
)
%
esz
!=
0
...
...
@@ -869,6 +881,13 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
||
borderType
==
BORDER_REFLECT_101
))
return
false
;
Mat
kernelX
,
kernelY
;
kernelX_
.
convertTo
(
kernelX
,
wdepth
);
if
(
kernelX_
.
data
!=
kernelY_
.
data
)
kernelY_
.
convertTo
(
kernelY
,
wdepth
);
else
kernelY
=
kernelX
;
size_t
lt2
[
2
]
=
{
optimizedSepFilterLocalWidth
,
optimizedSepFilterLocalHeight
};
size_t
gt2
[
2
]
=
{
lt2
[
0
]
*
(
1
+
(
size
.
width
-
1
)
/
lt2
[
0
]),
lt2
[
1
]};
...
...
@@ -879,9 +898,9 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
String
opts
=
cv
::
format
(
"-D BLK_X=%d -D BLK_Y=%d -D RADIUSX=%d -D RADIUSY=%d%s%s"
" -D srcT=%s -D convertToWT=%s -D WT=%s -D dstT=%s -D convertToDstT=%s"
" -D %s -D srcT1=%s -D dstT1=%s -D WT1=%s -D CN=%d -D SHIFT_BITS=%d%s"
,
(
int
)
lt2
[
0
],
(
int
)
lt2
[
1
],
row_kernel
.
cols
/
2
,
col_kernel
.
cols
/
2
,
ocl
::
kernelToStr
(
row_kernel
,
wdepth
,
"KERNEL_MATRIX_X"
).
c_str
(),
ocl
::
kernelToStr
(
col_kernel
,
wdepth
,
"KERNEL_MATRIX_Y"
).
c_str
(),
(
int
)
lt2
[
0
],
(
int
)
lt2
[
1
],
kernelX
.
cols
/
2
,
kernelY
.
cols
/
2
,
ocl
::
kernelToStr
(
kernelX
,
wdepth
,
"KERNEL_MATRIX_X"
).
c_str
(),
ocl
::
kernelToStr
(
kernelY
,
wdepth
,
"KERNEL_MATRIX_Y"
).
c_str
(),
ocl
::
typeToStr
(
stype
),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
0
]),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
cn
)),
ocl
::
typeToStr
(
dtype
),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
1
]),
borderMap
[
borderType
],
...
...
@@ -896,21 +915,30 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
_dst
.
create
(
size
,
dtype
);
UMat
dst
=
_dst
.
getUMat
();
int
src_offset_x
=
static_cast
<
int
>
((
src_offset
%
src_step
)
/
esz
);
int
src_offset_y
=
static_cast
<
int
>
(
src_offset
/
src_step
);
// TODO Future: emit error on inplace processing
//CV_Assert(src.u != dst.u && "Inplace processing is not allowed with UMat");
if
(
src
.
u
==
dst
.
u
)
{
CV_LOG_ONCE_WARNING
(
NULL
,
"sepFilter2D: inplace arguments are not allowed for non-inplace operations. Performance impact warning."
);
src
=
src
.
clone
();
}
Size
wholeSize
;
Point
origin
;
src
.
locateROI
(
wholeSize
,
origin
);
k
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
src
),
(
int
)
src_step
,
src_offset_x
,
src_offset_
y
,
k
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
src
),
(
int
)
src_step
,
origin
.
x
,
origin
.
y
,
wholeSize
.
height
,
wholeSize
.
width
,
ocl
::
KernelArg
::
WriteOnly
(
dst
),
static_cast
<
float
>
(
delta
));
static_cast
<
float
>
(
delta
*
(
1u
<<
(
2
*
shift_bits
))
));
return
k
.
run
(
2
,
gt2
,
lt2
,
false
);
}
bool
ocl_sepFilter2D
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
InputArray
_kernelX
,
InputArray
_kernelY
,
Point
anchor
,
double
delta
,
int
borderType
)
bool
ocl_sepFilter2D
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
InputArray
_kernelX
,
InputArray
_kernelY
,
Point
anchor
,
double
delta
,
int
borderType
)
{
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
Size
imgSize
=
_src
.
size
();
...
...
@@ -934,59 +962,152 @@ bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
if
(
anchor
.
y
<
0
)
anchor
.
y
=
kernelY
.
cols
>>
1
;
int
rtype
=
getKernelType
(
kernelX
,
kernelX
.
rows
==
1
?
Point
(
anchor
.
x
,
0
)
:
Point
(
0
,
anchor
.
x
));
int
ctype
=
getKernelType
(
kernelY
,
kernelY
.
rows
==
1
?
Point
(
anchor
.
y
,
0
)
:
Point
(
0
,
anchor
.
y
));
int
bdepth
=
CV_32F
;
bool
int_arithm
=
false
;
i
f
(
sdepth
==
CV_8U
&&
ddepth
==
CV_8U
&&
rtype
==
KERNEL_SMOOTH
+
KERNEL_SYMMETRICAL
&&
ctype
==
KERNEL_SMOOTH
+
KERNEL_SYMMETRICAL
)
i
nt
shift_bits
=
0
;
while
(
sdepth
==
CV_8U
&&
ddepth
==
CV_8U
)
{
if
(
ocl
::
Device
::
getDefault
().
isIntel
())
int
bits_
=
8
;
if
(
delta
*
256.0
f
!=
(
float
)(
int
)(
delta
*
256
))
{
for
(
int
i
=
0
;
i
<
kernelX
.
cols
;
i
++
)
kernelX
.
at
<
float
>
(
0
,
i
)
=
(
float
)
cvRound
(
kernelX
.
at
<
float
>
(
0
,
i
)
*
(
1
<<
shift_bits
));
if
(
kernelX
.
data
!=
kernelY
.
data
)
for
(
int
i
=
0
;
i
<
kernelX
.
cols
;
i
++
)
kernelY
.
at
<
float
>
(
0
,
i
)
=
(
float
)
cvRound
(
kernelY
.
at
<
float
>
(
0
,
i
)
*
(
1
<<
shift_bits
));
}
else
CV_LOG_DEBUG
(
NULL
,
"ocl_sepFilter2D: bit-exact delta can't be applied: delta="
<<
delta
);
break
;
}
Mat
kernelX_BitExact
,
kernelY_BitExact
;
bool
isValidBitExactRowKernel
=
createBitExactKernel_32S
(
kernelX
,
kernelX_BitExact
,
bits_
);
bool
isValidBitExactColumnKernel
=
createBitExactKernel_32S
(
kernelY
,
kernelY_BitExact
,
bits_
);
if
(
!
isValidBitExactRowKernel
)
{
CV_LOG_DEBUG
(
NULL
,
"ocl_sepFilter2D: bit-exact row-kernel can't be applied: ksize="
<<
kernelX_BitExact
.
total
());
}
else
if
(
!
isValidBitExactColumnKernel
)
{
CV_LOG_DEBUG
(
NULL
,
"ocl_sepFilter2D: bit-exact column-kernel can't be applied: ksize="
<<
kernelY_BitExact
.
total
());
}
else
{
bdepth
=
CV_32S
;
kernelX
.
convertTo
(
kernelX
,
bdepth
,
1
<<
shift_bits
);
kernelY
.
convertTo
(
kernelY
,
bdepth
,
1
<<
shift_bits
);
shift_bits
=
bits_
;
int_arithm
=
true
;
kernelX
=
kernelX_BitExact
;
kernelY
=
kernelY_BitExact
;
}
int_arithm
=
true
;
break
;
}
CV_OCL_RUN_
(
kernelY
.
cols
<=
21
&&
kernelX
.
cols
<=
21
&&
imgSize
.
width
>
optimizedSepFilterLocalWidth
+
anchor
.
x
&&
imgSize
.
height
>
optimizedSepFilterLocalHeight
+
anchor
.
y
&&
(
!
(
borderType
&
BORDER_ISOLATED
)
||
_src
.
offset
()
==
0
)
&&
anchor
==
Point
(
kernelX
.
cols
>>
1
,
kernelY
.
cols
>>
1
)
&&
OCL_PERFORMANCE_CHECK
(
d
.
isIntel
()),
// TODO FIXIT
ocl_sepFilter2D_SinglePass
(
_src
,
_dst
,
kernelX
,
kernelY
,
delta
,
borderType
&
~
BORDER_ISOLATED
,
ddepth
,
bdepth
,
int_arithm
),
true
)
CV_OCL_RUN_
(
kernelY
.
cols
<=
21
&&
kernelX
.
cols
<=
21
&&
imgSize
.
width
>
optimizedSepFilterLocalWidth
+
anchor
.
x
&&
imgSize
.
height
>
optimizedSepFilterLocalHeight
+
anchor
.
y
&&
(
!
(
borderType
&
BORDER_ISOLATED
)
||
_src
.
offset
()
==
0
)
&&
anchor
==
Point
(
kernelX
.
cols
>>
1
,
kernelY
.
cols
>>
1
)
&&
OCL_PERFORMANCE_CHECK
(
d
.
isIntel
()),
// TODO FIXIT
ocl_sepFilter2D_SinglePass
(
_src
,
_dst
,
kernelX
,
kernelY
,
delta
,
borderType
&
~
BORDER_ISOLATED
,
ddepth
,
CV_32F
,
// force FP32 mode
false
,
shift_bits
),
true
);
UMat
src
=
_src
.
getUMat
();
Size
srcWholeSize
;
Point
srcOffset
;
src
.
locateROI
(
srcWholeSize
,
srcOffset
);
bool
fast8uc1
=
type
==
CV_8UC1
&&
srcOffset
.
x
%
4
==
0
&&
src
.
cols
%
4
==
0
&&
src
.
step
%
4
==
0
;
bool
fast8uc1
=
false
;
if
(
type
==
CV_8UC1
)
{
Size
srcWholeSize
;
Point
srcOffset
;
src
.
locateROI
(
srcWholeSize
,
srcOffset
);
fast8uc1
=
srcOffset
.
x
%
4
==
0
&&
src
.
cols
%
4
==
0
&&
src
.
step
%
4
==
0
;
}
Size
srcSize
=
src
.
size
();
Size
bufSize
(
srcSize
.
width
,
srcSize
.
height
+
kernelY
.
cols
-
1
);
UMat
buf
(
bufSize
,
CV_MAKETYPE
(
bdepth
,
cn
));
if
(
!
ocl_sepRowFilter2D
(
src
,
buf
,
kernelX
,
anchor
.
x
,
borderType
,
ddepth
,
fast8uc1
,
int_arithm
,
shift_bits
))
return
false
;
_dst
.
create
(
srcSize
,
CV_MAKETYPE
(
ddepth
,
cn
));
UMat
dst
=
_dst
.
getUMat
();
return
ocl_sepColFilter2D
(
buf
,
dst
,
kernelY
,
delta
,
anchor
.
y
,
int_arithm
,
shift_bits
);
}
bool
ocl_sepFilter2D_BitExact
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
const
Size
&
ksize
,
const
uint16_t
*
fkx
,
const
uint16_t
*
fky
,
Point
anchor
,
double
delta
,
int
borderType
,
int
shift_bits
)
{
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
Size
imgSize
=
_src
.
size
();
int
type
=
_src
.
type
(),
sdepth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
if
(
cn
>
4
)
return
false
;
if
(
ksize
.
width
%
2
!=
1
)
return
false
;
if
(
ksize
.
height
%
2
!=
1
)
return
false
;
Mat
kernelX
(
1
,
ksize
.
width
,
CV_16SC1
,
(
void
*
)
fkx
);
Mat
kernelY
(
1
,
ksize
.
height
,
CV_16SC1
,
(
void
*
)
fky
);
if
(
ddepth
<
0
)
ddepth
=
sdepth
;
if
(
anchor
.
x
<
0
)
anchor
.
x
=
kernelX
.
cols
>>
1
;
if
(
anchor
.
y
<
0
)
anchor
.
y
=
kernelY
.
cols
>>
1
;
int
bdepth
=
sdepth
==
CV_8U
?
CV_32S
:
CV_32F
;
CV_OCL_RUN_
(
kernelY
.
cols
<=
21
&&
kernelX
.
cols
<=
21
&&
imgSize
.
width
>
optimizedSepFilterLocalWidth
+
anchor
.
x
&&
imgSize
.
height
>
optimizedSepFilterLocalHeight
+
anchor
.
y
&&
(
!
(
borderType
&
BORDER_ISOLATED
)
||
_src
.
offset
()
==
0
)
&&
anchor
==
Point
(
kernelX
.
cols
>>
1
,
kernelY
.
cols
>>
1
)
&&
OCL_PERFORMANCE_CHECK
(
d
.
isIntel
()),
// TODO FIXIT
ocl_sepFilter2D_SinglePass
(
_src
,
_dst
,
kernelX
,
kernelY
,
delta
,
borderType
&
~
BORDER_ISOLATED
,
ddepth
,
bdepth
,
true
,
shift_bits
),
true
);
UMat
src
=
_src
.
getUMat
();
bool
fast8uc1
=
false
;
if
(
type
==
CV_8UC1
)
{
Size
srcWholeSize
;
Point
srcOffset
;
src
.
locateROI
(
srcWholeSize
,
srcOffset
);
fast8uc1
=
srcOffset
.
x
%
4
==
0
&&
src
.
cols
%
4
==
0
&&
src
.
step
%
4
==
0
;
}
Size
srcSize
=
src
.
size
();
Size
bufSize
(
srcSize
.
width
,
srcSize
.
height
+
kernelY
.
cols
-
1
);
UMat
buf
(
bufSize
,
CV_MAKETYPE
(
bdepth
,
cn
));
if
(
!
ocl_sepRowFilter2D
(
src
,
buf
,
kernelX
,
anchor
.
x
,
borderType
,
ddepth
,
fast8uc1
,
int_arithm
))
if
(
!
ocl_sepRowFilter2D
(
src
,
buf
,
kernelX
,
anchor
.
x
,
borderType
,
ddepth
,
fast8uc1
,
true
,
shift_bits
))
return
false
;
_dst
.
create
(
srcSize
,
CV_MAKETYPE
(
ddepth
,
cn
));
UMat
dst
=
_dst
.
getUMat
();
return
ocl_sepColFilter2D
(
buf
,
dst
,
kernelY
,
delta
,
anchor
.
y
,
int_arithm
);
return
ocl_sepColFilter2D
(
buf
,
dst
,
kernelY
,
delta
,
anchor
.
y
,
true
,
shift_bits
);
}
#endif
...
...
@@ -1444,7 +1565,7 @@ void sepFilter2D(InputArray _src, OutputArray _dst, int ddepth,
CV_Assert
(
!
_kernelX
.
empty
());
CV_Assert
(
!
_kernelY
.
empty
());
CV_OCL_RUN
(
_dst
.
isUMat
()
&&
_src
.
dims
()
<=
2
&&
(
size_t
)
_src
.
rows
()
>
_kernelY
.
total
()
&&
(
size_t
)
_src
.
cols
()
>
_kernelX
.
total
(),
CV_OCL_RUN
(
_dst
.
isUMat
()
&&
_src
.
dims
()
<=
2
&&
(
size_t
)
_src
.
rows
()
>
=
_kernelY
.
total
()
&&
(
size_t
)
_src
.
cols
()
>=
_kernelX
.
total
(),
ocl_sepFilter2D
(
_src
,
_dst
,
ddepth
,
_kernelX
,
_kernelY
,
anchor
,
delta
,
borderType
))
Mat
src
=
_src
.
getMat
(),
kernelX
=
_kernelX
.
getMat
(),
kernelY
=
_kernelY
.
getMat
();
...
...
modules/imgproc/src/filter.hpp
浏览文件 @
44d473fb
...
...
@@ -46,13 +46,25 @@
namespace
cv
{
#ifdef HAVE_OPENCL
bool
ocl_sepFilter2D
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
InputArray
_kernelX
,
InputArray
_kernelY
,
Point
anchor
,
double
delta
,
int
borderType
);
bool
ocl_sepFilter2D
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
InputArray
_kernelX
,
InputArray
_kernelY
,
Point
anchor
,
double
delta
,
int
borderType
);
bool
ocl_sepFilter2D_BitExact
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
const
Size
&
ksize
,
const
uint16_t
*
fkx
,
const
uint16_t
*
fky
,
Point
anchor
,
double
delta
,
int
borderType
,
int
shift_bits
);
#endif
void
preprocess2DKernel
(
const
Mat
&
kernel
,
std
::
vector
<
Point
>&
coords
,
std
::
vector
<
uchar
>&
coeffs
);
}
void
preprocess2DKernel
(
const
Mat
&
kernel
,
std
::
vector
<
Point
>&
coords
,
std
::
vector
<
uchar
>&
coeffs
);
}
// namespace
#endif
...
...
modules/imgproc/src/opencl/filterSepCol.cl
浏览文件 @
44d473fb
...
...
@@ -61,7 +61,11 @@
#
endif
#
define
DIG
(
a
)
a,
#
if
defined
(
INTEGER_ARITHMETIC
)
__constant
int
mat_kernel[]
=
{
COEFF
}
;
#
else
__constant
srcT1
mat_kernel[]
=
{
COEFF
}
;
#
endif
__kernel
void
col_filter
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
int
src_whole_rows,
int
src_whole_cols,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
float
delta
)
...
...
@@ -92,30 +96,28 @@ __kernel void col_filter(__global const uchar * src, int src_step, int src_offse
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
read
pixels
from
lds
and
calculate
the
result
sum
=
LDS_DAT[l_y
+
RADIUSY][l_x]
*
mat_kernel[RADIUSY]
;
sum
=
LDS_DAT[l_y
+
RADIUSY][l_x]
*
mat_kernel[RADIUSY]
+
(
srcT
)
delta
;
for
(
int
i
=
1
; i <= RADIUSY; ++i)
{
temp[0]
=
LDS_DAT[l_y
+
RADIUSY
-
i][l_x]
;
temp[1]
=
LDS_DAT[l_y
+
RADIUSY
+
i][l_x]
;
#
if
(
defined
(
INTEGER_ARITHMETIC
)
&&
!INTEL_DEVICE
)
#
if
defined
(
INTEGER_ARITHMETIC
)
sum
+=
mad24
(
temp[0],mat_kernel[RADIUSY
-
i],
temp[1]
*
mat_kernel[RADIUSY
+
i]
)
;
#
else
sum
+=
mad
(
temp[0],
mat_kernel[RADIUSY
-
i],
temp[1]
*
mat_kernel[RADIUSY
+
i]
)
;
#
endif
}
#
ifdef
INTEGER_ARITHMETIC
#
ifdef
INTEL_DEVICE
sum
=
(
sum
+
(
1
<<
(
SHIFT_BITS-1
)))
/
(
1
<<
SHIFT_BITS
)
;
#
else
sum
=
(
sum
+
(
1
<<
(
SHIFT_BITS-1
)))
>>
SHIFT_BITS
;
#
endif
#
endif
//
write
the
result
to
dst
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
#
if
defined
(
SHIFT_BITS
)
&&
SHIFT_BITS
>
0
dstT
result
=
convertToDstT
(
convertToFloatT
(
sum
)
*
(
floatT
)(
1.0f
/
(
1
<<
SHIFT_BITS
)))
;
#
else
dstT
result
=
convertToDstT
(
sum
)
;
#
endif
start_addr
=
mad24
(
y,
dst_step,
mad24
(
DSTSIZE,
x,
dst_offset
))
;
storepix
(
convertToDstT
(
sum
+
(
srcT
)(
delta
))
,
dst
+
start_addr
)
;
storepix
(
result
,
dst
+
start_addr
)
;
}
}
modules/imgproc/src/opencl/filterSepRow.cl
浏览文件 @
44d473fb
...
...
@@ -139,9 +139,13 @@
#
endif
#
define
DIG
(
a
)
a,
#
if
defined
(
INTEGER_ARITHMETIC
)
__constant
int
mat_kernel[]
=
{
COEFF
}
;
#
else
__constant
dstT1
mat_kernel[]
=
{
COEFF
}
;
#
endif
#
if
(
defined
(
INTEGER_ARITHMETIC
)
&&
!INTEL_DEVICE
)
#
if
defined
(
INTEGER_ARITHMETIC
)
#
define
dstT4
int4
#
define
convertDstVec
convert_int4
#
else
...
...
@@ -263,7 +267,7 @@ __kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel
{
temp[0]
=
vload4
(
0
,
(
__local
uchar*
)
&LDS_DAT[l_y][l_x]
+
RADIUSX
+
offset
-
i
)
;
temp[1]
=
vload4
(
0
,
(
__local
uchar*
)
&LDS_DAT[l_y][l_x]
+
RADIUSX
+
offset
+
i
)
;
#
if
(
defined
(
INTEGER_ARITHMETIC
)
&&
!INTEL_DEVICE
)
#
if
defined
(
INTEGER_ARITHMETIC
)
sum
+=
mad24
(
convertDstVec
(
temp[0]
)
,
mat_kernel[RADIUSX-i],
convertDstVec
(
temp[1]
)
*
mat_kernel[RADIUSX
+
i]
)
;
#
else
sum
+=
mad
(
convertDstVec
(
temp[0]
)
,
mat_kernel[RADIUSX-i],
convertDstVec
(
temp[1]
)
*
mat_kernel[RADIUSX
+
i]
)
;
...
...
@@ -368,7 +372,7 @@ __kernel void row_filter(__global const uchar * src, int src_step, int src_offse
{
temp[0]
=
LDS_DAT[l_y][l_x
+
RADIUSX
-
i]
;
temp[1]
=
LDS_DAT[l_y][l_x
+
RADIUSX
+
i]
;
#
if
(
defined
(
INTEGER_ARITHMETIC
)
&&
!INTEL_DEVICE
)
#
if
defined
(
INTEGER_ARITHMETIC
)
sum
+=
mad24
(
convertToDstT
(
temp[0]
)
,
mat_kernel[RADIUSX
-
i],
convertToDstT
(
temp[1]
)
*
mat_kernel[RADIUSX
+
i]
)
;
#
else
sum
+=
mad
(
convertToDstT
(
temp[0]
)
,
mat_kernel[RADIUSX
-
i],
convertToDstT
(
temp[1]
)
*
mat_kernel[RADIUSX
+
i]
)
;
...
...
modules/imgproc/src/opencl/filterSep_singlePass.cl
浏览文件 @
44d473fb
...
...
@@ -160,7 +160,7 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int
{
sum
=
(
WT
)
0
;
for
(
i=0
; i<=2*RADIUSY; i++)
#
if
(
defined
(
INTEGER_ARITHMETIC
)
&&
!INTEL_DEVICE
)
#
if
defined
(
INTEGER_ARITHMETIC
)
sum
=
mad24
(
lsmem[liy
+
i][clocX],
mat_kernelY[i],
sum
)
;
#
else
sum
=
mad
(
lsmem[liy
+
i][clocX],
mat_kernelY[i],
sum
)
;
...
...
@@ -177,25 +177,27 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int
{
//
do
second
horizontal
filter
pass
//
and
calculate
final
result
sum
=
0.0f
;
sum
=
(
WT
)(
delta
)
;
for
(
i=0
; i<=2*RADIUSX; i++)
#
if
(
defined
(
INTEGER_ARITHMETIC
)
&&
!INTEL_DEVICE
)
#
if
defined
(
INTEGER_ARITHMETIC
)
sum
=
mad24
(
lsmemDy[liy][lix+i],
mat_kernelX[i],
sum
)
;
#
else
sum
=
mad
(
lsmemDy[liy][lix+i],
mat_kernelX[i],
sum
)
;
#
endif
#
if
def
INTEGER_ARITHMETIC
#
if
def
INTEL_DEVICE
sum
=
(
sum
+
(
1
<<
(
SHIFT_BITS-1
)))
/
(
1
<<
SHIFT_BITS
)
;
#
if
defined
(
SHIFT_BITS
)
&&
SHIFT_BITS
>
0
#
if
!defined
(
INTEGER_ARITHMETIC
)
sum
=
sum
*
(
1.0f
/
(
1
<<
SHIFT_BITS
)
)
;
#
else
sum
=
(
sum
+
(
1
<<
(
SHIFT_BITS-1
)))
>>
SHIFT_BITS
;
#
endif
#
endif
//
store
result
into
destination
image
storepix
(
convertToDstT
(
sum
+
(
WT
)(
delta
)
)
,
Dst
+
mad24
(
y
+
liy,
dst_step,
mad24
(
x,
DSTSIZE,
dst_offset
)))
;
storepix
(
convertToDstT
(
sum
)
,
Dst
+
mad24
(
y
+
liy,
dst_step,
mad24
(
x,
DSTSIZE,
dst_offset
)))
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
i
=
liy
*
BLK_X
+
lix
; i < (RADIUSY*2) * (BLK_X+(RADIUSX*2)); i += BLK_X * BLK_Y)
{
int
clocX
=
i
%
(
BLK_X+
(
RADIUSX*2
))
;
...
...
modules/imgproc/src/smooth.dispatch.cpp
浏览文件 @
44d473fb
...
...
@@ -48,6 +48,7 @@
#include <opencv2/core/utils/configuration.private.hpp>
#include <vector>
#include <iostream>
#include "opencv2/core/hal/intrin.hpp"
#include "opencl_kernels_imgproc.hpp"
...
...
@@ -637,10 +638,9 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
return
;
}
bool
useOpenCL
=
(
ocl
::
isOpenCLActivated
()
&&
_dst
.
isUMat
()
&&
_src
.
dims
()
<=
2
&&
((
ksize
.
width
==
3
&&
ksize
.
height
==
3
)
||
(
ksize
.
width
==
5
&&
ksize
.
height
==
5
))
&&
_src
.
rows
()
>
ksize
.
height
&&
_src
.
cols
()
>
ksize
.
width
);
bool
useOpenCL
=
ocl
::
isOpenCLActivated
()
&&
_dst
.
isUMat
()
&&
_src
.
dims
()
<=
2
&&
_src
.
rows
()
>=
ksize
.
height
&&
_src
.
cols
()
>=
ksize
.
width
&&
ksize
.
width
>
1
&&
ksize
.
height
>
1
;
CV_UNUSED
(
useOpenCL
);
int
sdepth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
...
...
@@ -648,27 +648,13 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
Mat
kx
,
ky
;
createGaussianKernels
(
kx
,
ky
,
type
,
ksize
,
sigma1
,
sigma2
);
CV_OCL_RUN
(
useOpenCL
,
ocl_GaussianBlur_8UC1
(
_src
,
_dst
,
ksize
,
CV_MAT_DEPTH
(
type
),
kx
,
ky
,
borderType
));
CV_OCL_RUN
(
useOpenCL
&&
sdepth
==
CV_8U
&&
((
ksize
.
width
==
3
&&
ksize
.
height
==
3
)
||
(
ksize
.
width
==
5
&&
ksize
.
height
==
5
)),
ocl_GaussianBlur_8UC1
(
_src
,
_dst
,
ksize
,
CV_MAT_DEPTH
(
type
),
kx
,
ky
,
borderType
)
);
CV_OCL_RUN
(
_dst
.
isUMat
()
&&
_src
.
dims
()
<=
2
&&
(
size_t
)
_src
.
rows
()
>
kx
.
total
()
&&
(
size_t
)
_src
.
cols
()
>
kx
.
total
(),
ocl_sepFilter2D
(
_src
,
_dst
,
sdepth
,
kx
,
ky
,
Point
(
-
1
,
-
1
),
0
,
borderType
))
Mat
src
=
_src
.
getMat
();
Mat
dst
=
_dst
.
getMat
();
Point
ofs
;
Size
wsz
(
src
.
cols
,
src
.
rows
);
if
(
!
(
borderType
&
BORDER_ISOLATED
))
src
.
locateROI
(
wsz
,
ofs
);
CALL_HAL
(
gaussianBlur
,
cv_hal_gaussianBlur
,
src
.
ptr
(),
src
.
step
,
dst
.
ptr
(),
dst
.
step
,
src
.
cols
,
src
.
rows
,
sdepth
,
cn
,
ofs
.
x
,
ofs
.
y
,
wsz
.
width
-
src
.
cols
-
ofs
.
x
,
wsz
.
height
-
src
.
rows
-
ofs
.
y
,
ksize
.
width
,
ksize
.
height
,
sigma1
,
sigma2
,
borderType
&~
BORDER_ISOLATED
);
CV_OVX_RUN
(
true
,
openvx_gaussianBlur
(
src
,
dst
,
ksize
,
sigma1
,
sigma2
,
borderType
))
if
(
sdepth
==
CV_8U
&&
((
borderType
&
BORDER_ISOLATED
)
||
!
_src
.
getMat
().
isSubmatrix
()))
if
(
sdepth
==
CV_8U
&&
((
borderType
&
BORDER_ISOLATED
)
||
!
_src
.
isSubmatrix
()))
{
std
::
vector
<
ufixedpoint16
>
fkx
,
fky
;
createGaussianKernels
(
fkx
,
fky
,
type
,
ksize
,
sigma1
,
sigma2
);
...
...
@@ -684,6 +670,17 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
}
else
{
CV_OCL_RUN
(
useOpenCL
,
ocl_sepFilter2D_BitExact
(
_src
,
_dst
,
sdepth
,
ksize
,
(
const
uint16_t
*
)
&
fkx
[
0
],
(
const
uint16_t
*
)
&
fky
[
0
],
Point
(
-
1
,
-
1
),
0
,
borderType
,
8
/*shift_bits*/
)
);
Mat
src
=
_src
.
getMat
();
Mat
dst
=
_dst
.
getMat
();
if
(
src
.
data
==
dst
.
data
)
src
=
src
.
clone
();
CV_CPU_DISPATCH
(
GaussianBlurFixedPoint
,
(
src
,
dst
,
(
const
uint16_t
*
)
&
fkx
[
0
],
(
int
)
fkx
.
size
(),
(
const
uint16_t
*
)
&
fky
[
0
],
(
int
)
fky
.
size
(),
borderType
),
...
...
@@ -692,6 +689,29 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
}
}
#ifdef HAVE_OPENCL
if
(
useOpenCL
)
{
sepFilter2D
(
_src
,
_dst
,
sdepth
,
kx
,
ky
,
Point
(
-
1
,
-
1
),
0
,
borderType
);
return
;
}
#endif
Mat
src
=
_src
.
getMat
();
Mat
dst
=
_dst
.
getMat
();
Point
ofs
;
Size
wsz
(
src
.
cols
,
src
.
rows
);
if
(
!
(
borderType
&
BORDER_ISOLATED
))
src
.
locateROI
(
wsz
,
ofs
);
CALL_HAL
(
gaussianBlur
,
cv_hal_gaussianBlur
,
src
.
ptr
(),
src
.
step
,
dst
.
ptr
(),
dst
.
step
,
src
.
cols
,
src
.
rows
,
sdepth
,
cn
,
ofs
.
x
,
ofs
.
y
,
wsz
.
width
-
src
.
cols
-
ofs
.
x
,
wsz
.
height
-
src
.
rows
-
ofs
.
y
,
ksize
.
width
,
ksize
.
height
,
sigma1
,
sigma2
,
borderType
&~
BORDER_ISOLATED
);
CV_OVX_RUN
(
true
,
openvx_gaussianBlur
(
src
,
dst
,
ksize
,
sigma1
,
sigma2
,
borderType
))
#if defined ENABLE_IPP_GAUSSIAN_BLUR
// IPP is not bit-exact to OpenCV implementation
CV_IPP_RUN_FAST
(
ipp_GaussianBlur
(
src
,
dst
,
ksize
,
sigma1
,
sigma2
,
borderType
));
...
...
modules/imgproc/test/ocl/test_sepfilter2d.cpp
浏览文件 @
44d473fb
...
...
@@ -73,7 +73,7 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
useRoi
=
GET_PARAM
(
4
);
}
void
random_roi
()
void
random_roi
(
bool
bitExact
)
{
Size
ksize
=
randomSize
(
kernelMinSize
,
kernelMaxSize
);
if
(
1
!=
ksize
.
width
%
2
)
...
...
@@ -81,11 +81,19 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
if
(
1
!=
ksize
.
height
%
2
)
ksize
.
height
++
;
Mat
temp
=
randomMat
(
Size
(
ksize
.
width
,
1
),
CV_
MAKE_TYPE
(
CV_32F
,
1
),
-
MAX_VALUE
,
MAX_VALUE
);
Mat
temp
=
randomMat
(
Size
(
ksize
.
width
,
1
),
CV_
32FC1
,
-
0.5
,
1.0
);
cv
::
normalize
(
temp
,
kernelX
,
1.0
,
0.0
,
NORM_L1
);
temp
=
randomMat
(
Size
(
1
,
ksize
.
height
),
CV_MAKE_TYPE
(
CV_32F
,
1
),
-
MAX_VALUE
,
MAX_VALUE
);
temp
=
randomMat
(
Size
(
1
,
ksize
.
height
),
CV_32FC1
,
-
0.5
,
1.0
);
cv
::
normalize
(
temp
,
kernelY
,
1.0
,
0.0
,
NORM_L1
);
if
(
bitExact
)
{
kernelX
.
convertTo
(
temp
,
CV_32S
,
256
);
temp
.
convertTo
(
kernelX
,
CV_32F
,
1.0
/
256
);
kernelY
.
convertTo
(
temp
,
CV_32S
,
256
);
temp
.
convertTo
(
kernelY
,
CV_32F
,
1.0
/
256
);
}
Size
roiSize
=
randomSize
(
ksize
.
width
,
MAX_VALUE
,
ksize
.
height
,
MAX_VALUE
);
Border
srcBorder
=
randomBorder
(
0
,
useRoi
?
MAX_VALUE
:
0
);
randomSubMat
(
src
,
src_roi
,
roiSize
,
srcBorder
,
type
,
-
MAX_VALUE
,
MAX_VALUE
);
...
...
@@ -96,6 +104,11 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
anchor
.
x
=
anchor
.
y
=
-
1
;
delta
=
randomDouble
(
-
100
,
100
);
if
(
bitExact
)
{
delta
=
(
int
)(
delta
*
256
)
/
256.0
;
}
UMAT_UPLOAD_INPUT_PARAMETER
(
src
);
UMAT_UPLOAD_OUTPUT_PARAMETER
(
dst
);
}
...
...
@@ -110,7 +123,7 @@ OCL_TEST_P(SepFilter2D, Mat)
{
for
(
int
j
=
0
;
j
<
test_loop_times
+
3
;
j
++
)
{
random_roi
();
random_roi
(
false
);
OCL_OFF
(
cv
::
sepFilter2D
(
src_roi
,
dst_roi
,
-
1
,
kernelX
,
kernelY
,
anchor
,
delta
,
borderType
));
OCL_ON
(
cv
::
sepFilter2D
(
usrc_roi
,
udst_roi
,
-
1
,
kernelX
,
kernelY
,
anchor
,
delta
,
borderType
));
...
...
@@ -119,6 +132,22 @@ OCL_TEST_P(SepFilter2D, Mat)
}
}
OCL_TEST_P
(
SepFilter2D
,
Mat_BitExact
)
{
for
(
int
j
=
0
;
j
<
test_loop_times
+
3
;
j
++
)
{
random_roi
(
true
);
OCL_OFF
(
cv
::
sepFilter2D
(
src_roi
,
dst_roi
,
-
1
,
kernelX
,
kernelY
,
anchor
,
delta
,
borderType
));
OCL_ON
(
cv
::
sepFilter2D
(
usrc_roi
,
udst_roi
,
-
1
,
kernelX
,
kernelY
,
anchor
,
delta
,
borderType
));
if
(
src_roi
.
depth
()
<
CV_32F
)
Near
(
0.0
);
else
Near
(
1e-3
);
}
}
OCL_INSTANTIATE_TEST_CASE_P
(
ImageProc
,
SepFilter2D
,
Combine
(
Values
(
CV_8U
,
CV_32F
),
...
...
modules/stitching/src/exposure_compensate.cpp
浏览文件 @
44d473fb
...
...
@@ -416,7 +416,11 @@ void BlocksCompensator::feed(const std::vector<Point> &corners, const std::vecto
bl_idx
+=
bl_per_img
.
width
*
bl_per_img
.
height
;
for
(
int
i
=
0
;
i
<
nr_gain_filtering_iterations_
;
++
i
)
sepFilter2D
(
gain_map
,
gain_map
,
CV_32F
,
ker
,
ker
);
{
UMat
tmp
;
sepFilter2D
(
gain_map
,
tmp
,
CV_32F
,
ker
,
ker
);
swap
(
gain_map
,
tmp
);
}
gain_maps_
[
img_idx
]
=
gain_map
;
}
...
...
platforms/winpack_dldt/2020.4/20200413-dldt-pdb.patch
0 → 100644
浏览文件 @
44d473fb
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1f981ed2..90eb500a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -22,6 +22,9 @@
endif()
project(OpenVINO)
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi /FS")
+set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
+
set(OpenVINO_MAIN_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(IE_MAIN_SOURCE_DIR ${OpenVINO_MAIN_SOURCE_DIR}/inference-engine)
list(APPEND CMAKE_MODULE_PATH "${OpenVINO_MAIN_SOURCE_DIR}/cmake")
platforms/winpack_dldt/2020.4/20200604-dldt-disable-multidevice.patch
0 → 100644
浏览文件 @
44d473fb
diff --git a/inference-engine/src/CMakeLists.txt b/inference-engine/src/CMakeLists.txt
index 0ba0dd78..7d34e7cb 100644
--- a/inference-engine/src/CMakeLists.txt
+++ b/inference-engine/src/CMakeLists.txt
@@ -26,7 +26,7 @@
endif()
add_subdirectory(hetero_plugin)
-add_subdirectory(multi_device)
+#add_subdirectory(multi_device)
add_subdirectory(transformations)
platforms/winpack_dldt/2020.4/20200701-dldt-disable-unused-targets.patch
0 → 100644
浏览文件 @
44d473fb
diff --git a/inference-engine/CMakeLists.txt b/inference-engine/CMakeLists.txt
index 39ff413b..df4e89c7 100644
--- a/inference-engine/CMakeLists.txt
+++ b/inference-engine/CMakeLists.txt
@@ -66,7 +66,7 @@
if(ENABLE_TESTS)
add_subdirectory(tests)
endif()
-add_subdirectory(tools)
+#add_subdirectory(tools)
function(ie_build_samples)
# samples should be build with the same flags as from OpenVINO package,
@@ -85,7 +85,7 @@
endfunction()
# gflags and format_reader targets are kept inside of samples directory and
# they must be built even if samples build is disabled (required for tests and tools).
-ie_build_samples()
+#ie_build_samples()
file(GLOB_RECURSE SAMPLES_SOURCES samples/*.cpp samples/*.hpp samples/*.h)
add_cpplint_target(sample_cpplint
@@ -174,10 +174,10 @@
endif()
# Developer package
#
-ie_developer_export_targets(format_reader)
+#ie_developer_export_targets(format_reader)
ie_developer_export_targets(${NGRAPH_LIBRARIES})
-ie_developer_export()
+#ie_developer_export()
configure_file(
"${IE_MAIN_SOURCE_DIR}/cmake/developer_package_config.cmake.in"
diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt
index 4ae0d560..e37acbe0 100644
--- a/inference-engine/src/inference_engine/CMakeLists.txt
+++ b/inference-engine/src/inference_engine/CMakeLists.txt
@@ -99,7 +99,7 @@
add_cpplint_target(${TARGET_NAME}_plugin_api_cpplint FOR_SOURCES ${plugin_api_sr
# Create common base object library
-add_library(${TARGET_NAME}_common_obj OBJECT
+add_library(${TARGET_NAME}_common_obj OBJECT EXCLUDE_FROM_ALL
${IE_BASE_SOURCE_FILES})
target_compile_definitions(${TARGET_NAME}_common_obj PRIVATE IMPLEMENT_INFERENCE_ENGINE_API)
@@ -112,7 +112,7 @@
target_include_directories(${TARGET_NAME}_common_obj SYSTEM PRIVATE
# Create object library
-add_library(${TARGET_NAME}_obj OBJECT
+add_library(${TARGET_NAME}_obj OBJECT EXCLUDE_FROM_ALL
${LIBRARY_SRC}
${LIBRARY_HEADERS}
${PUBLIC_HEADERS})
@@ -177,7 +177,7 @@
ie_register_plugins(MAIN_TARGET ${TARGET_NAME}
# Static library used for unit tests which are always built
-add_library(${TARGET_NAME}_s STATIC
+add_library(${TARGET_NAME}_s STATIC EXCLUDE_FROM_ALL
$<TARGET_OBJECTS:${TARGET_NAME}_obj>
$<TARGET_OBJECTS:${TARGET_NAME}_common_obj>
$<TARGET_OBJECTS:${TARGET_NAME}_legacy_obj>
diff --git a/inference-engine/src/legacy_api/CMakeLists.txt b/inference-engine/src/legacy_api/CMakeLists.txt
index 85524310..ed27e058 100644
--- a/inference-engine/src/legacy_api/CMakeLists.txt
+++ b/inference-engine/src/legacy_api/CMakeLists.txt
@@ -21,7 +21,7 @@
source_group("include" FILES ${PUBLIC_HEADERS})
# Create object library
-add_library(${TARGET_NAME}_obj OBJECT
+add_library(${TARGET_NAME}_obj OBJECT EXCLUDE_FROM_ALL
${LIBRARY_SRC}
${PUBLIC_HEADERS})
diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
index 297783da..06da35c3 100644
--- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt
+++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
@@ -192,7 +192,7 @@
cross_compiled_file(${TARGET_NAME}
# add test object library
-add_library(${TARGET_NAME}_obj OBJECT ${SOURCES} ${HEADERS})
+add_library(${TARGET_NAME}_obj OBJECT EXCLUDE_FROM_ALL ${SOURCES} ${HEADERS})
target_include_directories(${TARGET_NAME}_obj PRIVATE $<TARGET_PROPERTY:inference_engine_preproc_s,INTERFACE_INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:inference_engine_lp_transformations,INTERFACE_INCLUDE_DIRECTORIES>
diff --git a/inference-engine/src/preprocessing/CMakeLists.txt b/inference-engine/src/preprocessing/CMakeLists.txt
index adc52f06..6b7d0ffe 100644
--- a/inference-engine/src/preprocessing/CMakeLists.txt
+++ b/inference-engine/src/preprocessing/CMakeLists.txt
@@ -124,7 +124,7 @@
endif()
# Create object library
-add_library(${TARGET_NAME}_obj OBJECT
+add_library(${TARGET_NAME}_obj OBJECT EXCLUDE_FROM_ALL
${LIBRARY_SRC}
${LIBRARY_HEADERS})
@@ -183,7 +183,7 @@
add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}
# Static library used for unit tests which are always built
-add_library(${TARGET_NAME}_s STATIC
+add_library(${TARGET_NAME}_s STATIC EXCLUDE_FROM_ALL
$<TARGET_OBJECTS:${TARGET_NAME}_obj>)
set_ie_threading_interface_for(${TARGET_NAME}_s)
diff --git a/inference-engine/src/vpu/common/CMakeLists.txt b/inference-engine/src/vpu/common/CMakeLists.txt
index 43e9308f..2e40dd31 100644
--- a/inference-engine/src/vpu/common/CMakeLists.txt
+++ b/inference-engine/src/vpu/common/CMakeLists.txt
@@ -55,7 +55,7 @@
add_common_target("vpu_common_lib" FALSE)
# Unit tests support for graph transformer
if(WIN32)
- add_common_target("vpu_common_lib_test_static" TRUE)
+ #add_common_target("vpu_common_lib_test_static" TRUE)
else()
add_library("vpu_common_lib_test_static" ALIAS "vpu_common_lib")
endif()
diff --git a/inference-engine/src/vpu/graph_transformer/CMakeLists.txt b/inference-engine/src/vpu/graph_transformer/CMakeLists.txt
index 982d3c7f..15fcf3e8 100644
--- a/inference-engine/src/vpu/graph_transformer/CMakeLists.txt
+++ b/inference-engine/src/vpu/graph_transformer/CMakeLists.txt
@@ -64,7 +64,7 @@
add_graph_transformer_target("vpu_graph_transformer" FALSE)
# Unit tests support for graph transformer
if(WIN32)
- add_graph_transformer_target("vpu_graph_transformer_test_static" TRUE)
+ #add_graph_transformer_target("vpu_graph_transformer_test_static" TRUE)
else()
add_library("vpu_graph_transformer_test_static" ALIAS "vpu_graph_transformer")
endif()
diff --git a/inference-engine/thirdparty/CMakeLists.txt b/inference-engine/thirdparty/CMakeLists.txt
index f94453e0..c80e75c5 100644
--- a/inference-engine/thirdparty/CMakeLists.txt
+++ b/inference-engine/thirdparty/CMakeLists.txt
@@ -43,13 +43,13 @@
function(build_with_lto)
endfunction()
ie_build_pugixml()
- add_subdirectory(stb_lib)
+ #add_subdirectory(stb_lib)
add_subdirectory(ade)
add_subdirectory(fluid/modules/gapi)
target_include_directories(pugixml INTERFACE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/pugixml/src>")
- set_target_properties(pugixml ade fluid stb_image
+ set_target_properties(pugixml ade fluid
PROPERTIES FOLDER thirdparty)
# developer package
diff --git a/inference-engine/thirdparty/pugixml/CMakeLists.txt b/inference-engine/thirdparty/pugixml/CMakeLists.txt
index 8bcb2801..380fb468 100644
--- a/inference-engine/thirdparty/pugixml/CMakeLists.txt
+++ b/inference-engine/thirdparty/pugixml/CMakeLists.txt
@@ -41,7 +41,7 @@
if(BUILD_SHARED_LIBS)
else()
add_library(pugixml STATIC ${SOURCES})
if (MSVC)
- add_library(pugixml_mt STATIC ${SOURCES})
+ #add_library(pugixml_mt STATIC ${SOURCES})
#if (WIN32)
# set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT")
# set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd")
platforms/winpack_dldt/2020.4/patch.config.py
0 → 100644
浏览文件 @
44d473fb
applyPatch
(
'20200701-dldt-disable-unused-targets.patch'
)
applyPatch
(
'20200413-dldt-pdb.patch'
)
applyPatch
(
'20200604-dldt-disable-multidevice.patch'
)
platforms/winpack_dldt/2020.4/sysroot.config.py
0 → 100644
浏览文件 @
44d473fb
sysroot_bin_dir
=
prepare_dir
(
self
.
sysrootdir
/
'bin'
)
copytree
(
self
.
build_dir
/
'install'
,
self
.
sysrootdir
/
'ngraph'
)
#rm_one(self.sysrootdir / 'ngraph' / 'lib' / 'ngraph.dll')
build_config
=
'Release'
if
not
self
.
config
.
build_debug
else
'Debug'
build_bin_dir
=
self
.
build_dir
/
'bin'
/
'intel64'
/
build_config
def
copy_bin
(
name
):
global
build_bin_dir
,
sysroot_bin_dir
copytree
(
build_bin_dir
/
name
,
sysroot_bin_dir
/
name
)
dll_suffix
=
'd'
if
self
.
config
.
build_debug
else
''
def
copy_dll
(
name
):
global
copy_bin
,
dll_suffix
copy_bin
(
name
+
dll_suffix
+
'.dll'
)
copy_bin
(
name
+
dll_suffix
+
'.pdb'
)
copy_bin
(
'cache.json'
)
copy_dll
(
'clDNNPlugin'
)
copy_dll
(
'HeteroPlugin'
)
copy_dll
(
'inference_engine'
)
copy_dll
(
'inference_engine_ir_reader'
)
copy_dll
(
'inference_engine_legacy'
)
copy_dll
(
'inference_engine_transformations'
)
# runtime
copy_dll
(
'inference_engine_lp_transformations'
)
# runtime
copy_dll
(
'MKLDNNPlugin'
)
# runtime
copy_dll
(
'myriadPlugin'
)
# runtime
#copy_dll('MultiDevicePlugin') # runtime, not used
copy_dll
(
'ngraph'
)
copy_bin
(
'plugins.xml'
)
copytree
(
self
.
build_dir
/
'bin'
/
'intel64'
/
'pcie-ma248x.elf'
,
sysroot_bin_dir
/
'pcie-ma248x.elf'
)
copytree
(
self
.
build_dir
/
'bin'
/
'intel64'
/
'usb-ma2x8x.mvcmd'
,
sysroot_bin_dir
/
'usb-ma2x8x.mvcmd'
)
copytree
(
self
.
build_dir
/
'bin'
/
'intel64'
/
'usb-ma2450.mvcmd'
,
sysroot_bin_dir
/
'usb-ma2450.mvcmd'
)
copytree
(
self
.
srcdir
/
'inference-engine'
/
'temp'
/
'tbb'
/
'bin'
,
sysroot_bin_dir
)
copytree
(
self
.
srcdir
/
'inference-engine'
/
'temp'
/
'tbb'
,
self
.
sysrootdir
/
'tbb'
)
sysroot_ie_dir
=
prepare_dir
(
self
.
sysrootdir
/
'deployment_tools'
/
'inference_engine'
)
sysroot_ie_lib_dir
=
prepare_dir
(
sysroot_ie_dir
/
'lib'
/
'intel64'
)
copytree
(
self
.
srcdir
/
'inference-engine'
/
'include'
,
sysroot_ie_dir
/
'include'
)
if
not
self
.
config
.
build_debug
:
copytree
(
self
.
build_dir
/
'install'
/
'lib'
/
'ngraph.lib'
,
sysroot_ie_lib_dir
/
'ngraph.lib'
)
copytree
(
build_bin_dir
/
'inference_engine.lib'
,
sysroot_ie_lib_dir
/
'inference_engine.lib'
)
copytree
(
build_bin_dir
/
'inference_engine_ir_reader.lib'
,
sysroot_ie_lib_dir
/
'inference_engine_ir_reader.lib'
)
copytree
(
build_bin_dir
/
'inference_engine_legacy.lib'
,
sysroot_ie_lib_dir
/
'inference_engine_legacy.lib'
)
else
:
copytree
(
self
.
build_dir
/
'install'
/
'lib'
/
'ngraphd.lib'
,
sysroot_ie_lib_dir
/
'ngraphd.lib'
)
copytree
(
build_bin_dir
/
'inference_engined.lib'
,
sysroot_ie_lib_dir
/
'inference_engined.lib'
)
copytree
(
build_bin_dir
/
'inference_engine_ir_readerd.lib'
,
sysroot_ie_lib_dir
/
'inference_engine_ir_readerd.lib'
)
copytree
(
build_bin_dir
/
'inference_engine_legacyd.lib'
,
sysroot_ie_lib_dir
/
'inference_engine_legacyd.lib'
)
sysroot_license_dir
=
prepare_dir
(
self
.
sysrootdir
/
'etc'
/
'licenses'
)
copytree
(
self
.
srcdir
/
'LICENSE'
,
sysroot_license_dir
/
'dldt-LICENSE'
)
copytree
(
self
.
srcdir
/
'ngraph/LICENSE'
,
sysroot_license_dir
/
'ngraph-LICENSE'
)
copytree
(
self
.
sysrootdir
/
'tbb/LICENSE'
,
sysroot_license_dir
/
'tbb-LICENSE'
)
platforms/winpack_dldt/build_package.py
浏览文件 @
44d473fb
...
...
@@ -133,9 +133,10 @@ def git_checkout(dst, url, branch, revision, clone_extra_args, noFetch=False):
([
'-b'
,
branch
]
if
branch
else
[])
+
clone_extra_args
+
[
url
,
'.'
],
cwd
=
dst
)
else
:
execute
(
cmd
=
[
'git'
,
'fetch'
,
'origin'
]
+
([
branch
]
if
branch
else
[]),
cwd
=
dst
)
execute
(
cmd
=
[
'git'
,
'fetch'
,
'origin'
]
+
([
branch
+
':'
+
branch
]
if
branch
else
[]),
cwd
=
dst
)
execute
(
cmd
=
[
'git'
,
'reset'
,
'--hard'
],
cwd
=
dst
)
execute
(
cmd
=
[
'git'
,
'checkout'
,
'-B'
,
'winpack_dldt'
,
revision
],
cwd
=
dst
)
execute
(
cmd
=
[
'git'
,
'clean'
,
'-f'
,
'-d'
],
cwd
=
dst
)
execute
(
cmd
=
[
'git'
,
'checkout'
,
'--force'
,
'-B'
,
'winpack_dldt'
,
revision
],
cwd
=
dst
)
execute
(
cmd
=
[
'git'
,
'clean'
,
'-f'
,
'-d'
],
cwd
=
dst
)
execute
(
cmd
=
[
'git'
,
'submodule'
,
'init'
],
cwd
=
dst
)
execute
(
cmd
=
[
'git'
,
'submodule'
,
'update'
,
'--force'
,
'--depth=1000'
],
cwd
=
dst
)
...
...
@@ -149,6 +150,7 @@ def git_apply_patch(src_dir, patch_file):
patch_file
=
str
(
patch_file
)
# Python 3.5 may not handle Path
assert
os
.
path
.
exists
(
patch_file
),
patch_file
execute
(
cmd
=
[
'git'
,
'apply'
,
'--3way'
,
'-v'
,
'--ignore-space-change'
,
str
(
patch_file
)],
cwd
=
src_dir
)
execute
(
cmd
=
[
'git'
,
'diff'
,
'HEAD'
],
cwd
=
src_dir
)
#===================================================================================================
...
...
@@ -186,6 +188,17 @@ class BuilderDLDT:
self
.
build_dir
=
prepare_dir
(
self
.
outdir
/
'build'
,
clean
=
self
.
config
.
clean_dldt
)
self
.
sysrootdir
=
prepare_dir
(
self
.
outdir
/
'sysroot'
,
clean
=
self
.
config
.
clean_dldt
)
if
self
.
config
.
build_subst_drive
:
if
os
.
path
.
exists
(
self
.
config
.
build_subst_drive
+
':
\\
'
):
execute
([
'subst'
,
self
.
config
.
build_subst_drive
+
':'
,
'/D'
])
execute
([
'subst'
,
self
.
config
.
build_subst_drive
+
':'
,
str
(
self
.
outdir
)])
def
fix_path
(
p
):
return
str
(
p
).
replace
(
str
(
self
.
outdir
),
self
.
config
.
build_subst_drive
+
':'
)
self
.
srcdir
=
Path
(
fix_path
(
self
.
srcdir
))
self
.
build_dir
=
Path
(
fix_path
(
self
.
build_dir
))
self
.
sysrootdir
=
Path
(
fix_path
(
self
.
sysrootdir
))
def
init_patchset
(
self
):
cpath
=
self
.
cpath
self
.
patch_file
=
str
(
cpath
/
'patch.config.py'
)
# Python 3.5 may not handle Path
...
...
@@ -255,12 +268,14 @@ class BuilderDLDT:
BUILD_TESTS
=
'OFF'
,
ENABLE_OPENCV
=
'OFF'
,
ENABLE_GNA
=
'OFF'
,
ENABLE_SPEECH_DEMO
=
'OFF'
,
# 2020.4+
NGRAPH_DOC_BUILD_ENABLE
=
'OFF'
,
NGRAPH_UNIT_TEST_ENABLE
=
'OFF'
,
NGRAPH_UNIT_TEST_OPENVINO_ENABLE
=
'OFF'
,
NGRAPH_TEST_UTIL_ENABLE
=
'OFF'
,
NGRAPH_ONNX_IMPORT_ENABLE
=
'OFF'
,
CMAKE_INSTALL_PREFIX
=
str
(
self
.
build_dir
/
'install'
),
OUTPUT_ROOT
=
str
(
self
.
build_dir
),
# 2020.4+
)
cmd
+=
[
'-D%s=%s'
%
(
k
,
v
)
for
(
k
,
v
)
in
cmake_vars
.
items
()
if
v
is
not
None
]
...
...
@@ -270,14 +285,6 @@ class BuilderDLDT:
cmd
.
append
(
str
(
self
.
srcdir
))
build_dir
=
self
.
build_dir
if
self
.
config
.
build_subst_drive
:
if
os
.
path
.
exists
(
self
.
config
.
build_subst_drive
+
':
\\
'
):
execute
([
'subst'
,
self
.
config
.
build_subst_drive
+
':'
,
'/D'
])
def
fix_path
(
p
):
return
str
(
p
).
replace
(
str
(
self
.
outdir
),
self
.
config
.
build_subst_drive
+
':'
)
execute
([
'subst'
,
self
.
config
.
build_subst_drive
+
':'
,
str
(
self
.
outdir
)])
cmd
=
[
fix_path
(
c
)
for
c
in
cmd
]
build_dir
=
Path
(
fix_path
(
build_dir
))
try
:
execute
(
cmd
,
cwd
=
build_dir
)
...
...
@@ -291,8 +298,6 @@ class BuilderDLDT:
cmd
=
[
self
.
cmake_path
,
'-DBUILD_TYPE='
+
build_config
,
'-P'
,
'cmake_install.cmake'
]
execute
(
cmd
,
cwd
=
build_dir
/
'ngraph'
)
except
:
if
self
.
config
.
build_subst_drive
:
execute
([
'subst'
,
self
.
config
.
build_subst_drive
+
':'
,
'/D'
])
raise
log
.
info
(
'DLDT build completed'
)
...
...
@@ -307,6 +312,11 @@ class BuilderDLDT:
log
.
info
(
'DLDT sysroot preparation completed'
)
def
cleanup
(
self
):
if
self
.
config
.
build_subst_drive
:
execute
([
'subst'
,
self
.
config
.
build_subst_drive
+
':'
,
'/D'
])
#===================================================================================================
class
Builder
:
...
...
@@ -466,7 +476,7 @@ def main():
parser
.
add_argument
(
'--dldt_reference_dir'
,
help
=
'DLDT reference git repository (optional)'
)
parser
.
add_argument
(
'--dldt_src_dir'
,
help
=
'DLDT custom source repository (skip git checkout and patching, use for TESTING only)'
)
parser
.
add_argument
(
'--dldt_config'
,
help
=
'Specify DLDT build configuration (defaults to
DLDT commit
)'
)
parser
.
add_argument
(
'--dldt_config'
,
help
=
'Specify DLDT build configuration (defaults to
evaluate from DLDT commit/branch
)'
)
args
=
parser
.
parse_args
()
...
...
@@ -492,7 +502,10 @@ def main():
args
.
opencv_dir
=
os
.
path
.
abspath
(
args
.
opencv_dir
)
if
not
args
.
dldt_config
:
args
.
dldt_config
=
args
.
dldt_src_commit
if
args
.
dldt_src_commit
==
'releases/2020/4'
or
args
.
dldt_src_branch
==
'releases/2020/4'
:
args
.
dldt_config
=
'2020.4'
else
:
args
.
dldt_config
=
args
.
dldt_src_commit
_opencv_dir
=
check_dir
(
args
.
opencv_dir
)
_outdir
=
prepare_dir
(
args
.
output_dir
)
...
...
@@ -504,14 +517,18 @@ def main():
builder_dldt
=
BuilderDLDT
(
args
)
builder_dldt
.
prepare_sources
()
builder_dldt
.
build
()
builder_dldt
.
make_sysroot
()
builder_opencv
=
Builder
(
args
)
builder_opencv
.
build
(
builder_dldt
)
builder_opencv
.
copy_sysroot
(
builder_dldt
)
builder_opencv
.
package_sources
()
try
:
builder_dldt
.
prepare_sources
()
builder_dldt
.
build
()
builder_dldt
.
make_sysroot
()
builder_opencv
=
Builder
(
args
)
builder_opencv
.
build
(
builder_dldt
)
builder_opencv
.
copy_sysroot
(
builder_dldt
)
builder_opencv
.
package_sources
()
except
:
builder_dldt
.
cleanup
()
raise
log
.
info
(
"====="
)
log
.
info
(
"===== Build finished"
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录