Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
01324b02
O
Opencv
项目概览
Greenplum
/
Opencv
10 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
01324b02
编写于
8月 26, 2020
作者:
A
Alexander Alekhin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #18136 from nglee:dev_cudaEqualizeHistBitExact
上级
79272286
a7ffcaab
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
223 addition
and
31 deletion
+223
-31
modules/cudaimgproc/src/cuda/hist.cu
modules/cudaimgproc/src/cuda/hist.cu
+131
-13
modules/cudaimgproc/src/histogram.cpp
modules/cudaimgproc/src/histogram.cpp
+6
-10
modules/cudaimgproc/test/test_histogram.cpp
modules/cudaimgproc/test/test_histogram.cpp
+80
-2
modules/cudaoptflow/src/cuda/tvl1flow.cu
modules/cudaoptflow/src/cuda/tvl1flow.cu
+6
-6
未找到文件。
modules/cudaimgproc/src/cuda/hist.cu
浏览文件 @
01324b02
...
...
@@ -257,18 +257,15 @@ namespace hist
namespace
hist
{
__constant__
int
c_lut
[
256
];
struct
EqualizeHist
:
unary_function
<
uchar
,
uchar
>
{
float
scale
;
const
uchar
*
lut
;
__host__
EqualizeHist
(
float
_scale
)
:
scale
(
_scale
)
{}
__host__
EqualizeHist
(
const
uchar
*
_lut
)
:
lut
(
_lut
)
{}
__device__
__forceinline__
uchar
operator
()(
uchar
val
)
const
{
const
int
lut
=
c_lut
[
val
];
return
__float2int_rn
(
scale
*
lut
);
return
lut
[
val
];
}
};
}
...
...
@@ -283,16 +280,137 @@ namespace cv { namespace cuda { namespace device
namespace
hist
{
void
equalizeHist
(
PtrStepSzb
src
,
PtrStepSzb
dst
,
const
int
*
lut
,
cudaStream_t
stream
)
void
equalizeHist
(
PtrStepSzb
src
,
PtrStepSzb
dst
,
const
uchar
*
lut
,
cudaStream_t
stream
)
{
if
(
stream
==
0
)
cudaSafeCall
(
cudaMemcpyToSymbol
(
c_lut
,
lut
,
256
*
sizeof
(
int
),
0
,
cudaMemcpyDeviceToDevice
)
);
else
cudaSafeCall
(
cudaMemcpyToSymbolAsync
(
c_lut
,
lut
,
256
*
sizeof
(
int
),
0
,
cudaMemcpyDeviceToDevice
,
stream
)
);
device
::
transform
(
src
,
dst
,
EqualizeHist
(
lut
),
WithOutMask
(),
stream
);
}
__global__
void
buildLutKernel
(
int
*
hist
,
unsigned
char
*
lut
,
int
size
)
{
__shared__
int
warp_smem
[
8
];
__shared__
int
hist_smem
[
8
][
33
];
#define HIST_SMEM_NO_BANK_CONFLICT(idx) hist_smem[(idx) >> 5][(idx) & 31]
const
int
tId
=
threadIdx
.
x
;
const
int
warpId
=
threadIdx
.
x
/
32
;
const
int
laneId
=
threadIdx
.
x
%
32
;
// Step1 - Find minimum non-zero value in hist and make it zero
HIST_SMEM_NO_BANK_CONFLICT
(
tId
)
=
hist
[
tId
];
int
nonZeroIdx
=
HIST_SMEM_NO_BANK_CONFLICT
(
tId
)
>
0
?
tId
:
256
;
__syncthreads
();
for
(
int
delta
=
16
;
delta
>
0
;
delta
/=
2
)
{
#if __CUDACC_VER_MAJOR__ >= 9
int
shflVal
=
__shfl_down_sync
(
0xFFFFFFFF
,
nonZeroIdx
,
delta
);
#else
int
shflVal
=
__shfl_down
(
nonZeroIdx
,
delta
);
#endif
if
(
laneId
<
delta
)
nonZeroIdx
=
min
(
nonZeroIdx
,
shflVal
);
}
if
(
laneId
==
0
)
warp_smem
[
warpId
]
=
nonZeroIdx
;
const
float
scale
=
255.0
f
/
(
src
.
cols
*
src
.
rows
);
__syncthreads
();
if
(
tId
<
8
)
{
int
warpVal
=
warp_smem
[
tId
];
for
(
int
delta
=
4
;
delta
>
0
;
delta
/=
2
)
{
#if __CUDACC_VER_MAJOR__ >= 9
int
shflVal
=
__shfl_down_sync
(
0x000000FF
,
warpVal
,
delta
);
#else
int
shflVal
=
__shfl_down
(
warpVal
,
delta
);
#endif
if
(
tId
<
delta
)
warpVal
=
min
(
warpVal
,
shflVal
);
}
if
(
tId
==
0
)
{
warp_smem
[
0
]
=
warpVal
;
// warpVal - minimum index
}
}
__syncthreads
();
const
int
minNonZeroIdx
=
warp_smem
[
0
];
const
int
minNonZeroVal
=
HIST_SMEM_NO_BANK_CONFLICT
(
minNonZeroIdx
);
if
(
minNonZeroVal
==
size
)
{
// This is a special case: the whole image has the same color
lut
[
tId
]
=
0
;
if
(
tId
==
minNonZeroIdx
)
lut
[
tId
]
=
minNonZeroIdx
;
return
;
}
device
::
transform
(
src
,
dst
,
EqualizeHist
(
scale
),
WithOutMask
(),
stream
);
if
(
tId
==
0
)
HIST_SMEM_NO_BANK_CONFLICT
(
minNonZeroIdx
)
=
0
;
__syncthreads
();
// Step2 - Inclusive sum
// Algorithm from GPU Gems 3 (A Work-Efficient Parallel Scan)
// https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda
// Step2 Phase1 - The Up-Sweep Phase
for
(
int
delta
=
1
;
delta
<
256
;
delta
*=
2
)
{
if
(
tId
<
128
/
delta
)
{
int
idx
=
255
-
2
*
tId
*
delta
;
HIST_SMEM_NO_BANK_CONFLICT
(
idx
)
+=
HIST_SMEM_NO_BANK_CONFLICT
(
idx
-
delta
);
}
__syncthreads
();
}
// Step2 Phase2 - The Down-Sweep Phase
if
(
tId
==
0
)
HIST_SMEM_NO_BANK_CONFLICT
(
255
)
=
0
;
for
(
int
delta
=
128
;
delta
>=
1
;
delta
/=
2
)
{
if
(
tId
<
128
/
delta
)
{
int
rootIdx
=
255
-
tId
*
delta
*
2
;
int
leftIdx
=
rootIdx
-
delta
;
int
tmp
=
HIST_SMEM_NO_BANK_CONFLICT
(
leftIdx
);
HIST_SMEM_NO_BANK_CONFLICT
(
leftIdx
)
=
HIST_SMEM_NO_BANK_CONFLICT
(
rootIdx
);
HIST_SMEM_NO_BANK_CONFLICT
(
rootIdx
)
+=
tmp
;
}
__syncthreads
();
}
// Step2 Phase3 - Convert exclusive sum to inclusive sum
int
tmp
=
HIST_SMEM_NO_BANK_CONFLICT
(
tId
);
__syncthreads
();
if
(
tId
>=
1
)
HIST_SMEM_NO_BANK_CONFLICT
(
tId
-
1
)
=
tmp
;
if
(
tId
==
255
)
HIST_SMEM_NO_BANK_CONFLICT
(
tId
)
=
tmp
+
hist
[
tId
];
__syncthreads
();
// Step3 - Scale values to build lut
lut
[
tId
]
=
saturate_cast
<
unsigned
char
>
(
HIST_SMEM_NO_BANK_CONFLICT
(
tId
)
*
(
255.0
f
/
(
size
-
minNonZeroVal
)));
#undef HIST_SMEM_NO_BANK_CONFLICT
}
void
buildLut
(
PtrStepSzi
hist
,
PtrStepSzb
lut
,
int
size
,
cudaStream_t
stream
)
{
buildLutKernel
<<<
1
,
256
,
0
,
stream
>>>
(
hist
.
data
,
lut
.
data
,
size
);
cudaSafeCall
(
cudaGetLastError
()
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
}
...
...
modules/cudaimgproc/src/histogram.cpp
浏览文件 @
01324b02
...
...
@@ -102,7 +102,8 @@ void cv::cuda::calcHist(InputArray _src, InputArray _mask, OutputArray _hist, St
namespace
hist
{
void
equalizeHist
(
PtrStepSzb
src
,
PtrStepSzb
dst
,
const
int
*
lut
,
cudaStream_t
stream
);
void
equalizeHist
(
PtrStepSzb
src
,
PtrStepSzb
dst
,
const
uchar
*
lut
,
cudaStream_t
stream
);
void
buildLut
(
PtrStepSzi
hist
,
PtrStepSzb
lut
,
int
size
,
cudaStream_t
stream
);
}
void
cv
::
cuda
::
equalizeHist
(
InputArray
_src
,
OutputArray
_dst
,
Stream
&
_stream
)
...
...
@@ -114,26 +115,21 @@ void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, Stream& _stream)
_dst
.
create
(
src
.
size
(),
src
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
int
intBufSize
;
nppSafeCall
(
nppsIntegralGetBufferSize_32s
(
256
,
&
intBufSize
)
);
size_t
bufSize
=
intBufSize
+
2
*
256
*
sizeof
(
int
);
size_t
bufSize
=
256
*
sizeof
(
int
)
+
256
*
sizeof
(
uchar
);
BufferPool
pool
(
_stream
);
GpuMat
buf
=
pool
.
getBuffer
(
1
,
static_cast
<
int
>
(
bufSize
),
CV_8UC1
);
GpuMat
hist
(
1
,
256
,
CV_32SC1
,
buf
.
data
);
GpuMat
lut
(
1
,
256
,
CV_32SC1
,
buf
.
data
+
256
*
sizeof
(
int
));
GpuMat
intBuf
(
1
,
intBufSize
,
CV_8UC1
,
buf
.
data
+
2
*
256
*
sizeof
(
int
));
GpuMat
lut
(
1
,
256
,
CV_8UC1
,
buf
.
data
+
256
*
sizeof
(
int
));
cuda
::
calcHist
(
src
,
hist
,
_stream
);
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
NppStreamHandler
h
(
stream
);
nppSafeCall
(
nppsIntegral_32s
(
hist
.
ptr
<
Npp32s
>
(),
lut
.
ptr
<
Npp32s
>
(),
256
,
intBuf
.
ptr
<
Npp8u
>
())
);
hist
::
buildLut
(
hist
,
lut
,
src
.
rows
*
src
.
cols
,
stream
);
hist
::
equalizeHist
(
src
,
dst
,
lut
.
ptr
<
int
>
()
,
stream
);
hist
::
equalizeHist
(
src
,
dst
,
lut
.
data
,
stream
);
}
////////////////////////////////////////////////////////////////////////
...
...
modules/cudaimgproc/test/test_histogram.cpp
浏览文件 @
01324b02
...
...
@@ -208,7 +208,7 @@ CUDA_TEST_P(EqualizeHist, Async)
cv
::
Mat
dst_gold
;
cv
::
equalizeHist
(
src
,
dst_gold
);
EXPECT_MAT_NEAR
(
dst_gold
,
dst
,
3
.0
);
EXPECT_MAT_NEAR
(
dst_gold
,
dst
,
0
.0
);
}
CUDA_TEST_P
(
EqualizeHist
,
Accuracy
)
...
...
@@ -221,13 +221,91 @@ CUDA_TEST_P(EqualizeHist, Accuracy)
cv
::
Mat
dst_gold
;
cv
::
equalizeHist
(
src
,
dst_gold
);
EXPECT_MAT_NEAR
(
dst_gold
,
dst
,
3
.0
);
EXPECT_MAT_NEAR
(
dst_gold
,
dst
,
0
.0
);
}
INSTANTIATE_TEST_CASE_P
(
CUDA_ImgProc
,
EqualizeHist
,
testing
::
Combine
(
ALL_DEVICES
,
DIFFERENT_SIZES
));
TEST
(
EqualizeHistIssue
,
Issue18035
)
{
std
::
vector
<
std
::
string
>
imgPaths
;
imgPaths
.
push_back
(
std
::
string
(
cvtest
::
TS
::
ptr
()
->
get_data_path
())
+
"../cv/shared/3MP.png"
);
imgPaths
.
push_back
(
std
::
string
(
cvtest
::
TS
::
ptr
()
->
get_data_path
())
+
"../cv/shared/5MP.png"
);
imgPaths
.
push_back
(
std
::
string
(
cvtest
::
TS
::
ptr
()
->
get_data_path
())
+
"../cv/shared/airplane.png"
);
imgPaths
.
push_back
(
std
::
string
(
cvtest
::
TS
::
ptr
()
->
get_data_path
())
+
"../cv/shared/baboon.png"
);
imgPaths
.
push_back
(
std
::
string
(
cvtest
::
TS
::
ptr
()
->
get_data_path
())
+
"../cv/shared/box.png"
);
imgPaths
.
push_back
(
std
::
string
(
cvtest
::
TS
::
ptr
()
->
get_data_path
())
+
"../cv/shared/box_in_scene.png"
);
imgPaths
.
push_back
(
std
::
string
(
cvtest
::
TS
::
ptr
()
->
get_data_path
())
+
"../cv/shared/fruits.png"
);
imgPaths
.
push_back
(
std
::
string
(
cvtest
::
TS
::
ptr
()
->
get_data_path
())
+
"../cv/shared/fruits_ecc.png"
);
imgPaths
.
push_back
(
std
::
string
(
cvtest
::
TS
::
ptr
()
->
get_data_path
())
+
"../cv/shared/graffiti.png"
);
imgPaths
.
push_back
(
std
::
string
(
cvtest
::
TS
::
ptr
()
->
get_data_path
())
+
"../cv/shared/lena.png"
);
for
(
size_t
i
=
0
;
i
<
imgPaths
.
size
();
++
i
)
{
std
::
string
imgPath
=
imgPaths
[
i
];
cv
::
Mat
src
=
cv
::
imread
(
imgPath
,
cv
::
IMREAD_GRAYSCALE
);
src
=
src
/
30
;
cv
::
cuda
::
GpuMat
d_src
,
dst
;
d_src
.
upload
(
src
);
cv
::
cuda
::
equalizeHist
(
d_src
,
dst
);
cv
::
Mat
dst_gold
;
cv
::
equalizeHist
(
src
,
dst_gold
);
EXPECT_MAT_NEAR
(
dst_gold
,
dst
,
0.0
);
}
}
PARAM_TEST_CASE
(
EqualizeHistExtreme
,
cv
::
cuda
::
DeviceInfo
,
cv
::
Size
,
int
)
{
cv
::
cuda
::
DeviceInfo
devInfo
;
cv
::
Size
size
;
int
val
;
virtual
void
SetUp
()
{
devInfo
=
GET_PARAM
(
0
);
size
=
GET_PARAM
(
1
);
val
=
GET_PARAM
(
2
);
cv
::
cuda
::
setDevice
(
devInfo
.
deviceID
());
}
};
CUDA_TEST_P
(
EqualizeHistExtreme
,
Case1
)
{
cv
::
Mat
src
(
size
,
CV_8UC1
,
val
);
cv
::
cuda
::
GpuMat
dst
;
cv
::
cuda
::
equalizeHist
(
loadMat
(
src
),
dst
);
cv
::
Mat
dst_gold
;
cv
::
equalizeHist
(
src
,
dst_gold
);
EXPECT_MAT_NEAR
(
dst_gold
,
dst
,
0.0
);
}
CUDA_TEST_P
(
EqualizeHistExtreme
,
Case2
)
{
cv
::
Mat
src
=
randomMat
(
size
,
CV_8UC1
,
val
);
cv
::
cuda
::
GpuMat
dst
;
cv
::
cuda
::
equalizeHist
(
loadMat
(
src
),
dst
);
cv
::
Mat
dst_gold
;
cv
::
equalizeHist
(
src
,
dst_gold
);
EXPECT_MAT_NEAR
(
dst_gold
,
dst
,
0.0
);
}
INSTANTIATE_TEST_CASE_P
(
CUDA_ImgProc
,
EqualizeHistExtreme
,
testing
::
Combine
(
ALL_DEVICES
,
DIFFERENT_SIZES
,
testing
::
Range
(
0
,
256
)));
///////////////////////////////////////////////////////////////////////////////////////////////////////
// CLAHE
...
...
modules/cudaoptflow/src/cuda/tvl1flow.cu
浏览文件 @
01324b02
...
...
@@ -116,15 +116,15 @@ namespace tvl1flow
texture
<
float
,
cudaTextureType2D
,
cudaReadModeElementType
>
tex_I1y
(
false
,
cudaFilterModePoint
,
cudaAddressModeClamp
);
struct
SrcTexRef
:
SrcTex
{
__device__
__forceinline__
float
I1
(
float
x
,
float
y
)
const
override
__device__
__forceinline__
float
I1
(
float
x
,
float
y
)
const
CV_OVERRIDE
{
return
tex2D
(
tex_I1
,
x
,
y
);
}
__device__
__forceinline__
float
I1x
(
float
x
,
float
y
)
const
override
__device__
__forceinline__
float
I1x
(
float
x
,
float
y
)
const
CV_OVERRIDE
{
return
tex2D
(
tex_I1x
,
x
,
y
);
}
__device__
__forceinline__
float
I1y
(
float
x
,
float
y
)
const
override
__device__
__forceinline__
float
I1y
(
float
x
,
float
y
)
const
CV_OVERRIDE
{
return
tex2D
(
tex_I1y
,
x
,
y
);
}
...
...
@@ -135,15 +135,15 @@ namespace tvl1flow
__host__
SrcTexObj
(
cudaTextureObject_t
tex_obj_I1_
,
cudaTextureObject_t
tex_obj_I1x_
,
cudaTextureObject_t
tex_obj_I1y_
)
:
tex_obj_I1
(
tex_obj_I1_
),
tex_obj_I1x
(
tex_obj_I1x_
),
tex_obj_I1y
(
tex_obj_I1y_
)
{}
__device__
__forceinline__
float
I1
(
float
x
,
float
y
)
const
override
__device__
__forceinline__
float
I1
(
float
x
,
float
y
)
const
CV_OVERRIDE
{
return
tex2D
<
float
>
(
tex_obj_I1
,
x
,
y
);
}
__device__
__forceinline__
float
I1x
(
float
x
,
float
y
)
const
override
__device__
__forceinline__
float
I1x
(
float
x
,
float
y
)
const
CV_OVERRIDE
{
return
tex2D
<
float
>
(
tex_obj_I1x
,
x
,
y
);
}
__device__
__forceinline__
float
I1y
(
float
x
,
float
y
)
const
override
__device__
__forceinline__
float
I1y
(
float
x
,
float
y
)
const
CV_OVERRIDE
{
return
tex2D
<
float
>
(
tex_obj_I1y
,
x
,
y
);
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录