Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
0ddd16cf
O
Opencv
项目概览
Greenplum
/
Opencv
大约 1 年 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
0ddd16cf
编写于
11月 12, 2012
作者:
V
Vladislav Vinogradov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
calcHist & equalizeHist
上级
7e57648e
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
91 addition
and
193 deletion
+91
-193
modules/gpu/include/opencv2/gpu/gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+0
-2
modules/gpu/perf/perf_imgproc.cpp
modules/gpu/perf/perf_imgproc.cpp
+2
-3
modules/gpu/src/cuda/hist.cu
modules/gpu/src/cuda/hist.cu
+80
-147
modules/gpu/src/imgproc.cpp
modules/gpu/src/imgproc.cpp
+9
-41
未找到文件。
modules/gpu/include/opencv2/gpu/gpu.hpp
浏览文件 @
0ddd16cf
...
...
@@ -1028,11 +1028,9 @@ CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels
//! Calculates histogram for 8u one channel image
//! Output hist will have one row, 256 cols and CV32SC1 type.
CV_EXPORTS
void
calcHist
(
const
GpuMat
&
src
,
GpuMat
&
hist
,
Stream
&
stream
=
Stream
::
Null
());
CV_EXPORTS
void
calcHist
(
const
GpuMat
&
src
,
GpuMat
&
hist
,
GpuMat
&
buf
,
Stream
&
stream
=
Stream
::
Null
());
//! normalizes the grayscale image brightness and contrast by normalizing its histogram
CV_EXPORTS
void
equalizeHist
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
Stream
&
stream
=
Stream
::
Null
());
CV_EXPORTS
void
equalizeHist
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
GpuMat
&
hist
,
Stream
&
stream
=
Stream
::
Null
());
CV_EXPORTS
void
equalizeHist
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
GpuMat
&
hist
,
GpuMat
&
buf
,
Stream
&
stream
=
Stream
::
Null
());
//////////////////////////////// StereoBM_GPU ////////////////////////////////
...
...
modules/gpu/perf/perf_imgproc.cpp
浏览文件 @
0ddd16cf
...
...
@@ -581,13 +581,12 @@ PERF_TEST_P(Sz, ImgProc_CalcHist, GPU_TYPICAL_MAT_SIZES)
{
cv
::
gpu
::
GpuMat
d_src
(
src
);
cv
::
gpu
::
GpuMat
d_hist
;
cv
::
gpu
::
GpuMat
d_buf
;
cv
::
gpu
::
calcHist
(
d_src
,
d_hist
,
d_buf
);
cv
::
gpu
::
calcHist
(
d_src
,
d_hist
);
TEST_CYCLE
()
{
cv
::
gpu
::
calcHist
(
d_src
,
d_hist
,
d_buf
);
cv
::
gpu
::
calcHist
(
d_src
,
d_hist
);
}
GPU_SANITY_CHECK
(
d_hist
);
...
...
modules/gpu/src/cuda/hist.cu
浏览文件 @
0ddd16cf
...
...
@@ -43,182 +43,115 @@
#if !defined CUDA_DISABLER
#include "internal_shared.hpp"
#include "opencv2/gpu/device/utility.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/functional.hpp"
#include "opencv2/gpu/device/emulation.hpp"
#include "opencv2/gpu/device/transform.hpp"
namespace
cv
{
namespace
gpu
{
namespace
device
{
#define UINT_BITS 32U
//Warps == subhistograms per threadblock
#define WARP_COUNT 6
//Threadblock size
#define HISTOGRAM256_THREADBLOCK_SIZE (WARP_COUNT * OPENCV_GPU_WARP_SIZE)
#define HISTOGRAM256_BIN_COUNT 256
//Shared memory per threadblock
#define HISTOGRAM256_THREADBLOCK_MEMORY (WARP_COUNT * HISTOGRAM256_BIN_COUNT)
#define PARTIAL_HISTOGRAM256_COUNT 240
#define MERGE_THREADBLOCK_SIZE 256
using
namespace
cv
::
gpu
;
using
namespace
cv
::
gpu
::
device
;
#define USE_SMEM_ATOMICS (defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 120))
namespace
hist
namespace
{
__global__
void
histogram256
(
const
uchar
*
src
,
int
cols
,
int
rows
,
size_t
step
,
int
*
hist
)
{
#if (!USE_SMEM_ATOMICS)
#define TAG_MASK ( (1U << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE)) - 1U )
__forceinline__
__device__
void
addByte
(
volatile
uint
*
s_WarpHist
,
uint
data
,
uint
threadTag
)
{
uint
count
;
do
{
count
=
s_WarpHist
[
data
]
&
TAG_MASK
;
count
=
threadTag
|
(
count
+
1
);
s_WarpHist
[
data
]
=
count
;
}
while
(
s_WarpHist
[
data
]
!=
count
);
}
#else
__shared__
int
shist
[
256
];
#define TAG_MASK 0xFFFFFFFFU
const
int
y
=
blockIdx
.
x
*
blockDim
.
y
+
threadIdx
.
y
;
const
int
tid
=
threadIdx
.
y
*
blockDim
.
x
+
threadIdx
.
x
;
__forceinline__
__device__
void
addByte
(
uint
*
s_WarpHist
,
uint
data
,
uint
threadTag
)
{
atomicAdd
(
s_WarpHist
+
data
,
1
);
}
shist
[
tid
]
=
0
;
__syncthreads
();
#endif
__forceinline__
__device__
void
addWord
(
uint
*
s_WarpHist
,
uint
data
,
uint
tag
,
uint
pos_x
,
uint
cols
)
if
(
y
<
rows
)
{
uint
x
=
pos_x
<<
2
;
if
(
x
+
0
<
cols
)
addByte
(
s_WarpHist
,
(
data
>>
0
)
&
0xFFU
,
tag
);
if
(
x
+
1
<
cols
)
addByte
(
s_WarpHist
,
(
data
>>
8
)
&
0xFFU
,
tag
);
if
(
x
+
2
<
cols
)
addByte
(
s_WarpHist
,
(
data
>>
16
)
&
0xFFU
,
tag
);
if
(
x
+
3
<
cols
)
addByte
(
s_WarpHist
,
(
data
>>
24
)
&
0xFFU
,
tag
);
}
const
unsigned
int
*
rowPtr
=
(
const
unsigned
int
*
)
(
src
+
y
*
step
);
__global__
void
histogram256
(
const
PtrStep
<
uint
>
d_Data
,
uint
*
d_PartialHistograms
,
uint
dataCount
,
uint
cols
)
{
//Per-warp subhistogram storage
__shared__
uint
s_Hist
[
HISTOGRAM256_THREADBLOCK_MEMORY
];
uint
*
s_WarpHist
=
s_Hist
+
(
threadIdx
.
x
>>
OPENCV_GPU_LOG_WARP_SIZE
)
*
HISTOGRAM256_BIN_COUNT
;
//Clear shared memory storage for current threadblock before processing
#pragma unroll
for
(
uint
i
=
0
;
i
<
(
HISTOGRAM256_THREADBLOCK_MEMORY
/
HISTOGRAM256_THREADBLOCK_SIZE
);
i
++
)
s_Hist
[
threadIdx
.
x
+
i
*
HISTOGRAM256_THREADBLOCK_SIZE
]
=
0
;
//Cycle through the entire data set, update subhistograms for each warp
const
uint
tag
=
threadIdx
.
x
<<
(
UINT_BITS
-
OPENCV_GPU_LOG_WARP_SIZE
);
__syncthreads
();
const
uint
colsui
=
d_Data
.
step
/
sizeof
(
uint
);
for
(
uint
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
pos
<
dataCount
;
pos
+=
blockDim
.
x
*
gridDim
.
x
)
const
int
cols_4
=
cols
/
4
;
for
(
int
x
=
threadIdx
.
x
;
x
<
cols_4
;
x
+=
blockDim
.
x
)
{
uint
pos_y
=
pos
/
colsui
;
uint
pos_x
=
pos
%
colsui
;
uint
data
=
d_Data
.
ptr
(
pos_y
)[
pos_x
];
addWord
(
s_WarpHist
,
data
,
tag
,
pos_x
,
cols
);
unsigned
int
data
=
rowPtr
[
x
];
Emulation
::
smem
::
atomicAdd
(
&
shist
[(
data
>>
0
)
&
0xFFU
],
1
);
Emulation
::
smem
::
atomicAdd
(
&
shist
[(
data
>>
8
)
&
0xFFU
],
1
);
Emulation
::
smem
::
atomicAdd
(
&
shist
[(
data
>>
16
)
&
0xFFU
],
1
);
Emulation
::
smem
::
atomicAdd
(
&
shist
[(
data
>>
24
)
&
0xFFU
],
1
);
}
//Merge per-warp histograms into per-block and write to global memory
__syncthreads
();
for
(
uint
bin
=
threadIdx
.
x
;
bin
<
HISTOGRAM256_BIN_COUNT
;
bin
+=
HISTOGRAM256_THREADBLOCK_SIZE
)
if
(
cols
%
4
!=
0
&&
threadIdx
.
x
==
0
)
{
uint
sum
=
0
;
for
(
uint
i
=
0
;
i
<
WARP_COUNT
;
i
++
)
sum
+=
s_Hist
[
bin
+
i
*
HISTOGRAM256_BIN_COUNT
]
&
TAG_MASK
;
d_PartialHistograms
[
blockIdx
.
x
*
HISTOGRAM256_BIN_COUNT
+
bin
]
=
sum
;
for
(
int
x
=
cols_4
*
4
;
x
<
cols
;
++
x
)
{
unsigned
int
data
=
((
const
uchar
*
)
rowPtr
)[
x
];
Emulation
::
smem
::
atomicAdd
(
&
shist
[
data
],
1
);
}
}
}
////////////////////////////////////////////////////////////////////////////////
// Merge histogram256() output
// Run one threadblock per bin; each threadblock adds up the same bin counter
// from every partial histogram. Reads are uncoalesced, but mergeHistogram256
// takes only a fraction of total processing time
////////////////////////////////////////////////////////////////////////////////
__global__
void
mergeHistogram256
(
const
uint
*
d_PartialHistograms
,
int
*
d_Histogram
)
{
uint
sum
=
0
;
__syncthreads
();
#pragma unroll
for
(
uint
i
=
threadIdx
.
x
;
i
<
PARTIAL_HISTOGRAM256_COUNT
;
i
+=
MERGE_THREADBLOCK_SIZE
)
sum
+=
d_PartialHistograms
[
blockIdx
.
x
+
i
*
HISTOGRAM256_BIN_COUNT
];
const
int
histVal
=
shist
[
tid
];
if
(
histVal
>
0
)
::
atomicAdd
(
hist
+
tid
,
histVal
);
}
}
__shared__
uint
data
[
MERGE_THREADBLOCK_SIZE
];
data
[
threadIdx
.
x
]
=
sum
;
for
(
uint
stride
=
MERGE_THREADBLOCK_SIZE
/
2
;
stride
>
0
;
stride
>>=
1
)
{
__syncthreads
();
if
(
threadIdx
.
x
<
stride
)
data
[
threadIdx
.
x
]
+=
data
[
threadIdx
.
x
+
stride
];
}
if
(
threadIdx
.
x
==
0
)
d_Histogram
[
blockIdx
.
x
]
=
saturate_cast
<
int
>
(
data
[
0
]);
}
namespace
hist
{
void
histogram256
(
PtrStepSzb
src
,
int
*
hist
,
cudaStream_t
stream
)
{
const
dim3
block
(
32
,
8
);
const
dim3
grid
(
divUp
(
src
.
rows
,
block
.
y
));
void
histogram256_gpu
(
PtrStepSzb
src
,
int
*
hist
,
uint
*
buf
,
cudaStream_t
stream
)
{
histogram256
<<<
PARTIAL_HISTOGRAM256_COUNT
,
HISTOGRAM256_THREADBLOCK_SIZE
,
0
,
stream
>>>
(
PtrStepSz
<
uint
>
(
src
),
buf
,
static_cast
<
uint
>
(
src
.
rows
*
src
.
step
/
sizeof
(
uint
)),
src
.
cols
);
::
histogram256
<<<
grid
,
block
,
0
,
stream
>>>
(
src
.
data
,
src
.
cols
,
src
.
rows
,
src
.
step
,
hist
);
cudaSafeCall
(
cudaGetLastError
()
);
cudaSafeCall
(
cudaGetLastError
()
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
}
mergeHistogram256
<<<
HISTOGRAM256_BIN_COUNT
,
MERGE_THREADBLOCK_SIZE
,
0
,
stream
>>>
(
buf
,
hist
);
/////////////////////////////////////////////////////////////////////////
cudaSafeCall
(
cudaGetLastError
()
);
namespace
{
__constant__
int
c_lut
[
256
];
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
struct
EqualizeHist
:
unary_function
<
uchar
,
uchar
>
{
float
scale
;
__
constant__
int
c_lut
[
256
];
__
host__
EqualizeHist
(
float
_scale
)
:
scale
(
_scale
)
{}
__
global__
void
equalizeHist
(
const
PtrStepSzb
src
,
PtrStepb
dst
)
__
device__
__forceinline__
uchar
operator
()(
uchar
val
)
const
{
const
int
x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
const
int
y
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
if
(
x
<
src
.
cols
&&
y
<
src
.
rows
)
{
const
uchar
val
=
src
.
ptr
(
y
)[
x
];
const
int
lut
=
c_lut
[
val
];
dst
.
ptr
(
y
)[
x
]
=
__float2int_rn
(
255.0
f
/
(
src
.
cols
*
src
.
rows
)
*
lut
);
}
const
int
lut
=
c_lut
[
val
];
return
__float2int_rn
(
scale
*
lut
);
}
};
}
void
equalizeHist_gpu
(
PtrStepSzb
src
,
PtrStepSzb
dst
,
const
int
*
lut
,
cudaStream_t
stream
)
{
dim3
block
(
16
,
16
);
dim3
grid
(
divUp
(
src
.
cols
,
block
.
x
),
divUp
(
src
.
rows
,
block
.
y
));
namespace
cv
{
namespace
gpu
{
namespace
device
{
template
<
>
struct
TransformFunctorTraits
<
EqualizeHist
>
:
DefaultTransformFunctorTraits
<
EqualizeHist
>
{
enum
{
smart_shift
=
4
};
};
}}}
namespace
hist
{
void
equalizeHist
(
PtrStepSzb
src
,
PtrStepSzb
dst
,
const
int
*
lut
,
cudaStream_t
stream
)
{
if
(
stream
==
0
)
cudaSafeCall
(
cudaMemcpyToSymbol
(
c_lut
,
lut
,
256
*
sizeof
(
int
),
0
,
cudaMemcpyDeviceToDevice
)
);
else
cudaSafeCall
(
cudaMemcpyToSymbolAsync
(
c_lut
,
lut
,
256
*
sizeof
(
int
),
0
,
cudaMemcpyDeviceToDevice
,
stream
)
);
equalizeHist
<<<
grid
,
block
,
0
,
stream
>>>
(
src
,
dst
);
cudaSafeCall
(
cudaGetLastError
()
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
}
// namespace hist
}}}
// namespace cv { namespace gpu { namespace device
const
float
scale
=
255.0
f
/
(
src
.
cols
*
src
.
rows
);
transform
(
src
,
dst
,
EqualizeHist
(
scale
),
WithOutMask
(),
stream
);
}
}
#endif
/* CUDA_DISABLER */
\ No newline at end of file
#endif
/* CUDA_DISABLER */
modules/gpu/src/imgproc.cpp
浏览文件 @
0ddd16cf
...
...
@@ -71,9 +71,7 @@ void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&, GpuMat&, Stream&)
void
cv
::
gpu
::
histRange
(
const
GpuMat
&
,
GpuMat
*
,
const
GpuMat
*
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
histRange
(
const
GpuMat
&
,
GpuMat
*
,
const
GpuMat
*
,
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
calcHist
(
const
GpuMat
&
,
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
calcHist
(
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
equalizeHist
(
const
GpuMat
&
,
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
equalizeHist
(
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
equalizeHist
(
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
cornerHarris
(
const
GpuMat
&
,
GpuMat
&
,
int
,
int
,
double
,
int
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
cornerHarris
(
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
int
,
int
,
double
,
int
)
{
throw_nogpu
();
}
...
...
@@ -989,36 +987,20 @@ void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4
hist_callers
[
src
.
depth
()](
src
,
hist
,
levels
,
buf
,
StreamAccessor
::
getStream
(
stream
));
}
namespace
cv
{
namespace
gpu
{
namespace
device
{
namespace
hist
{
void
histogram256_gpu
(
PtrStepSzb
src
,
int
*
hist
,
unsigned
int
*
buf
,
cudaStream_t
stream
);
const
int
PARTIAL_HISTOGRAM256_COUNT
=
240
;
const
int
HISTOGRAM256_BIN_COUNT
=
256
;
void
equalizeHist_gpu
(
PtrStepSzb
src
,
PtrStepSzb
dst
,
const
int
*
lut
,
cudaStream_t
stream
);
}
}}}
void
cv
::
gpu
::
calcHist
(
const
GpuMat
&
src
,
GpuMat
&
hist
,
Stream
&
stream
)
namespace
hist
{
GpuMat
buf
;
calcHist
(
src
,
hist
,
buf
,
stream
);
void
histogram256
(
PtrStepSzb
src
,
int
*
hist
,
cudaStream_t
stream
)
;
void
equalizeHist
(
PtrStepSzb
src
,
PtrStepSzb
dst
,
const
int
*
lut
,
cudaStream_t
stream
);
}
void
cv
::
gpu
::
calcHist
(
const
GpuMat
&
src
,
GpuMat
&
hist
,
GpuMat
&
buf
,
Stream
&
stream
)
void
cv
::
gpu
::
calcHist
(
const
GpuMat
&
src
,
GpuMat
&
hist
,
Stream
&
stream
)
{
using
namespace
::
cv
::
gpu
::
device
::
hist
;
CV_Assert
(
src
.
type
()
==
CV_8UC1
);
hist
.
create
(
1
,
256
,
CV_32SC1
);
hist
.
setTo
(
Scalar
::
all
(
0
));
ensureSizeIsEnough
(
1
,
PARTIAL_HISTOGRAM256_COUNT
*
HISTOGRAM256_BIN_COUNT
,
CV_32SC1
,
buf
);
histogram256_gpu
(
src
,
hist
.
ptr
<
int
>
(),
buf
.
ptr
<
unsigned
int
>
(),
StreamAccessor
::
getStream
(
stream
));
hist
::
histogram256
(
src
,
hist
.
ptr
<
int
>
(),
StreamAccessor
::
getStream
(
stream
));
}
void
cv
::
gpu
::
equalizeHist
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
Stream
&
stream
)
...
...
@@ -1028,16 +1010,8 @@ void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream)
equalizeHist
(
src
,
dst
,
hist
,
buf
,
stream
);
}
void
cv
::
gpu
::
equalizeHist
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
GpuMat
&
hist
,
Stream
&
stream
)
{
GpuMat
buf
;
equalizeHist
(
src
,
dst
,
hist
,
buf
,
stream
);
}
void
cv
::
gpu
::
equalizeHist
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
GpuMat
&
hist
,
GpuMat
&
buf
,
Stream
&
s
)
{
using
namespace
::
cv
::
gpu
::
device
::
hist
;
CV_Assert
(
src
.
type
()
==
CV_8UC1
);
dst
.
create
(
src
.
size
(),
src
.
type
());
...
...
@@ -1045,15 +1019,12 @@ void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat&
int
intBufSize
;
nppSafeCall
(
nppsIntegralGetBufferSize_32s
(
256
,
&
intBufSize
)
);
int
bufSize
=
static_cast
<
int
>
(
std
::
max
(
256
*
240
*
sizeof
(
int
),
intBufSize
+
256
*
sizeof
(
int
)));
ensureSizeIsEnough
(
1
,
bufSize
,
CV_8UC1
,
buf
);
ensureSizeIsEnough
(
1
,
intBufSize
+
256
*
sizeof
(
int
),
CV_8UC1
,
buf
);
GpuMat
histBuf
(
1
,
256
*
240
,
CV_32SC1
,
buf
.
ptr
());
GpuMat
intBuf
(
1
,
intBufSize
,
CV_8UC1
,
buf
.
ptr
());
GpuMat
lut
(
1
,
256
,
CV_32S
,
buf
.
ptr
()
+
intBufSize
);
calcHist
(
src
,
hist
,
histBuf
,
s
);
calcHist
(
src
,
hist
,
s
);
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
s
);
...
...
@@ -1061,10 +1032,7 @@ void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat&
nppSafeCall
(
nppsIntegral_32s
(
hist
.
ptr
<
Npp32s
>
(),
lut
.
ptr
<
Npp32s
>
(),
256
,
intBuf
.
ptr
<
Npp8u
>
())
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
equalizeHist_gpu
(
src
,
dst
,
lut
.
ptr
<
int
>
(),
stream
);
hist
::
equalizeHist
(
src
,
dst
,
lut
.
ptr
<
int
>
(),
stream
);
}
////////////////////////////////////////////////////////////////////////
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录