Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
3bba5b5a
O
Opencv
项目概览
Greenplum
/
Opencv
10 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
3bba5b5a
编写于
2月 18, 2019
作者:
A
Alexander Alekhin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #13850 from nglee:dev_FixCudaStereoBMHangRace
上级
9b71f5fd
1a961660
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
72 addition
and
35 deletion
+72
-35
modules/cudastereo/src/cuda/stereobm.cu
modules/cudastereo/src/cuda/stereobm.cu
+72
-35
未找到文件。
modules/cudastereo/src/cuda/stereobm.cu
浏览文件 @
3bba5b5a
...
@@ -71,48 +71,54 @@ namespace cv { namespace cuda { namespace device
...
@@ -71,48 +71,54 @@ namespace cv { namespace cuda { namespace device
}
}
template
<
int
RADIUS
>
template
<
int
RADIUS
>
__device__
unsigned
int
CalcSSD
(
volatile
unsigned
int
*
col_ssd_cache
,
volatile
unsigned
int
*
col_ssd
)
__device__
unsigned
int
CalcSSD
(
volatile
unsigned
int
*
col_ssd_cache
,
volatile
unsigned
int
*
col_ssd
,
const
int
X
)
{
{
unsigned
int
cache
=
0
;
unsigned
int
cache
=
0
;
unsigned
int
cache2
=
0
;
unsigned
int
cache2
=
0
;
for
(
int
i
=
1
;
i
<=
RADIUS
;
i
++
)
if
(
X
<
cwidth
-
RADIUS
)
cache
+=
col_ssd
[
i
];
{
for
(
int
i
=
1
;
i
<=
RADIUS
;
i
++
)
cache
+=
col_ssd
[
i
];
col_ssd_cache
[
0
]
=
cache
;
col_ssd_cache
[
0
]
=
cache
;
}
__syncthreads
();
__syncthreads
();
if
(
threadIdx
.
x
<
BLOCK_W
-
RADIUS
)
if
(
X
<
cwidth
-
RADIUS
)
cache2
=
col_ssd_cache
[
RADIUS
];
{
else
if
(
threadIdx
.
x
<
BLOCK_W
-
RADIUS
)
for
(
int
i
=
RADIUS
+
1
;
i
<
(
2
*
RADIUS
+
1
);
i
++
)
cache2
=
col_ssd_cache
[
RADIUS
];
cache2
+=
col_ssd
[
i
];
else
for
(
int
i
=
RADIUS
+
1
;
i
<
(
2
*
RADIUS
+
1
);
i
++
)
cache2
+=
col_ssd
[
i
];
}
return
col_ssd
[
0
]
+
cache
+
cache2
;
return
col_ssd
[
0
]
+
cache
+
cache2
;
}
}
template
<
int
RADIUS
>
template
<
int
RADIUS
>
__device__
uint2
MinSSD
(
volatile
unsigned
int
*
col_ssd_cache
,
volatile
unsigned
int
*
col_ssd
)
__device__
uint2
MinSSD
(
volatile
unsigned
int
*
col_ssd_cache
,
volatile
unsigned
int
*
col_ssd
,
const
int
X
)
{
{
unsigned
int
ssd
[
N_DISPARITIES
];
unsigned
int
ssd
[
N_DISPARITIES
];
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
ssd
[
0
]
=
CalcSSD
<
RADIUS
>
(
col_ssd_cache
,
col_ssd
+
0
*
(
BLOCK_W
+
2
*
RADIUS
));
ssd
[
0
]
=
CalcSSD
<
RADIUS
>
(
col_ssd_cache
,
col_ssd
+
0
*
(
BLOCK_W
+
2
*
RADIUS
)
,
X
);
__syncthreads
();
__syncthreads
();
ssd
[
1
]
=
CalcSSD
<
RADIUS
>
(
col_ssd_cache
,
col_ssd
+
1
*
(
BLOCK_W
+
2
*
RADIUS
));
ssd
[
1
]
=
CalcSSD
<
RADIUS
>
(
col_ssd_cache
,
col_ssd
+
1
*
(
BLOCK_W
+
2
*
RADIUS
)
,
X
);
__syncthreads
();
__syncthreads
();
ssd
[
2
]
=
CalcSSD
<
RADIUS
>
(
col_ssd_cache
,
col_ssd
+
2
*
(
BLOCK_W
+
2
*
RADIUS
));
ssd
[
2
]
=
CalcSSD
<
RADIUS
>
(
col_ssd_cache
,
col_ssd
+
2
*
(
BLOCK_W
+
2
*
RADIUS
)
,
X
);
__syncthreads
();
__syncthreads
();
ssd
[
3
]
=
CalcSSD
<
RADIUS
>
(
col_ssd_cache
,
col_ssd
+
3
*
(
BLOCK_W
+
2
*
RADIUS
));
ssd
[
3
]
=
CalcSSD
<
RADIUS
>
(
col_ssd_cache
,
col_ssd
+
3
*
(
BLOCK_W
+
2
*
RADIUS
)
,
X
);
__syncthreads
();
__syncthreads
();
ssd
[
4
]
=
CalcSSD
<
RADIUS
>
(
col_ssd_cache
,
col_ssd
+
4
*
(
BLOCK_W
+
2
*
RADIUS
));
ssd
[
4
]
=
CalcSSD
<
RADIUS
>
(
col_ssd_cache
,
col_ssd
+
4
*
(
BLOCK_W
+
2
*
RADIUS
)
,
X
);
__syncthreads
();
__syncthreads
();
ssd
[
5
]
=
CalcSSD
<
RADIUS
>
(
col_ssd_cache
,
col_ssd
+
5
*
(
BLOCK_W
+
2
*
RADIUS
));
ssd
[
5
]
=
CalcSSD
<
RADIUS
>
(
col_ssd_cache
,
col_ssd
+
5
*
(
BLOCK_W
+
2
*
RADIUS
)
,
X
);
__syncthreads
();
__syncthreads
();
ssd
[
6
]
=
CalcSSD
<
RADIUS
>
(
col_ssd_cache
,
col_ssd
+
6
*
(
BLOCK_W
+
2
*
RADIUS
));
ssd
[
6
]
=
CalcSSD
<
RADIUS
>
(
col_ssd_cache
,
col_ssd
+
6
*
(
BLOCK_W
+
2
*
RADIUS
)
,
X
);
__syncthreads
();
__syncthreads
();
ssd
[
7
]
=
CalcSSD
<
RADIUS
>
(
col_ssd_cache
,
col_ssd
+
7
*
(
BLOCK_W
+
2
*
RADIUS
));
ssd
[
7
]
=
CalcSSD
<
RADIUS
>
(
col_ssd_cache
,
col_ssd
+
7
*
(
BLOCK_W
+
2
*
RADIUS
)
,
X
);
int
mssd
=
::
min
(
::
min
(
::
min
(
ssd
[
0
],
ssd
[
1
]),
::
min
(
ssd
[
4
],
ssd
[
5
])),
::
min
(
::
min
(
ssd
[
2
],
ssd
[
3
]),
::
min
(
ssd
[
6
],
ssd
[
7
])));
int
mssd
=
::
min
(
::
min
(
::
min
(
ssd
[
0
],
ssd
[
1
]),
::
min
(
ssd
[
4
],
ssd
[
5
])),
::
min
(
::
min
(
ssd
[
2
],
ssd
[
3
]),
::
min
(
ssd
[
6
],
ssd
[
7
])));
...
@@ -243,12 +249,12 @@ namespace cv { namespace cuda { namespace device
...
@@ -243,12 +249,12 @@ namespace cv { namespace cuda { namespace device
unsigned
int
*
minSSDImage
=
cminSSDImage
+
X
+
Y
*
cminSSD_step
;
unsigned
int
*
minSSDImage
=
cminSSDImage
+
X
+
Y
*
cminSSD_step
;
unsigned
char
*
disparImage
=
disp
.
data
+
X
+
Y
*
disp
.
step
;
unsigned
char
*
disparImage
=
disp
.
data
+
X
+
Y
*
disp
.
step
;
/*
if (X < cwidth)
//
if (X < cwidth)
{
//
{
unsigned int *minSSDImage_end = minSSDImage + min(ROWSperTHREAD, cheight - Y) * minssd_step;
//
unsigned int *minSSDImage_end = minSSDImage + min(ROWSperTHREAD, cheight - Y) * minssd_step;
for(uint *ptr = minSSDImage; ptr != minSSDImage_end; ptr += minssd_step )
//
for(uint *ptr = minSSDImage; ptr != minSSDImage_end; ptr += minssd_step )
*ptr = 0xFFFFFFFF;
//
*ptr = 0xFFFFFFFF;
}*/
//}
int
end_row
=
::
min
(
ROWSperTHREAD
,
cheight
-
Y
-
RADIUS
);
int
end_row
=
::
min
(
ROWSperTHREAD
,
cheight
-
Y
-
RADIUS
);
int
y_tex
;
int
y_tex
;
int
x_tex
=
X
-
RADIUS
;
int
x_tex
=
X
-
RADIUS
;
...
@@ -268,13 +274,27 @@ namespace cv { namespace cuda { namespace device
...
@@ -268,13 +274,27 @@ namespace cv { namespace cuda { namespace device
__syncthreads
();
//before MinSSD function
__syncthreads
();
//before MinSSD function
if
(
X
<
cwidth
-
RADIUS
&&
Y
<
cheight
-
RADIUS
)
if
(
Y
<
cheight
-
RADIUS
)
{
{
uint2
minSSD
=
MinSSD
<
RADIUS
>
(
col_ssd_cache
+
threadIdx
.
x
,
col_ssd
);
uint2
minSSD
=
MinSSD
<
RADIUS
>
(
col_ssd_cache
+
threadIdx
.
x
,
col_ssd
,
X
);
if
(
minSSD
.
x
<
minSSDImage
[
0
])
// For threads that do not satisfy the if condition below("X < cwidth - RADIUS"), previously
// computed "minSSD" value, which is the result of "MinSSD" function call, is not used at all.
//
// However, since the "MinSSD" function has "__syncthreads" call in its body, those threads
// must also call "MinSSD" to avoid deadlock. (#13850)
//
// From CUDA 9, using "__syncwarp" with proper mask value instead of using "__syncthreads"
// could be an option, but the shared memory access pattern does not allow this option,
// resulting in race condition. (Checked via "cuda-memcheck --tool racecheck")
if
(
X
<
cwidth
-
RADIUS
)
{
{
disparImage
[
0
]
=
(
unsigned
char
)(
d
+
minSSD
.
y
);
if
(
minSSD
.
x
<
minSSDImage
[
0
])
minSSDImage
[
0
]
=
minSSD
.
x
;
{
disparImage
[
0
]
=
(
unsigned
char
)(
d
+
minSSD
.
y
);
minSSDImage
[
0
]
=
minSSD
.
x
;
}
}
}
}
}
...
@@ -295,17 +315,34 @@ namespace cv { namespace cuda { namespace device
...
@@ -295,17 +315,34 @@ namespace cv { namespace cuda { namespace device
__syncthreads
();
//before MinSSD function
__syncthreads
();
//before MinSSD function
if
(
X
<
cwidth
-
RADIUS
&&
row
<
cheight
-
RADIUS
-
Y
)
if
(
row
<
cheight
-
RADIUS
-
Y
)
{
{
int
idx
=
row
*
cminSSD_step
;
uint2
minSSD
=
MinSSD
<
RADIUS
>
(
col_ssd_cache
+
threadIdx
.
x
,
col_ssd
,
X
);
uint2
minSSD
=
MinSSD
<
RADIUS
>
(
col_ssd_cache
+
threadIdx
.
x
,
col_ssd
);
if
(
minSSD
.
x
<
minSSDImage
[
idx
])
// For threads that do not satisfy the if condition below("X < cwidth - RADIUS"), previously
// computed "minSSD" value, which is the result of "MinSSD" function call, is not used at all.
//
// However, since the "MinSSD" function has "__syncthreads" call in its body, those threads
// must also call "MinSSD" to avoid deadlock. (#13850)
//
// From CUDA 9, using "__syncwarp" with proper mask value instead of using "__syncthreads"
// could be an option, but the shared memory access pattern does not allow this option,
// resulting in race condition. (Checked via "cuda-memcheck --tool racecheck")
if
(
X
<
cwidth
-
RADIUS
)
{
{
disparImage
[
disp
.
step
*
row
]
=
(
unsigned
char
)(
d
+
minSSD
.
y
);
int
idx
=
row
*
cminSSD_step
;
minSSDImage
[
idx
]
=
minSSD
.
x
;
if
(
minSSD
.
x
<
minSSDImage
[
idx
])
{
disparImage
[
disp
.
step
*
row
]
=
(
unsigned
char
)(
d
+
minSSD
.
y
);
minSSDImage
[
idx
]
=
minSSD
.
x
;
}
}
}
}
}
}
// for row loop
}
// for row loop
__syncthreads
();
// before initializing shared memory at the beginning of next loop
}
// for d loop
}
// for d loop
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录