Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
3f927abb
O
Opencv
项目概览
Greenplum
/
Opencv
大约 1 年 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
3f927abb
编写于
11月 18, 2013
作者:
R
Roman Donchenko
提交者:
OpenCV Buildbot
11月 18, 2013
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1809 from ilya-lavrenov:ocl_resize_nn
上级
f5ded410
cc237b7a
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
81 addition
and
219 deletion
+81
-219
modules/ocl/src/imgproc.cpp
modules/ocl/src/imgproc.cpp
+38
-77
modules/ocl/src/opencl/imgproc_resize.cl
modules/ocl/src/opencl/imgproc_resize.cl
+43
-142
未找到文件。
modules/ocl/src/imgproc.cpp
浏览文件 @
3f927abb
...
...
@@ -282,96 +282,63 @@ namespace cv
static
void
resize_gpu
(
const
oclMat
&
src
,
oclMat
&
dst
,
double
fx
,
double
fy
,
int
interpolation
)
{
CV_Assert
(
(
src
.
channels
()
==
dst
.
channels
())
);
Context
*
clCxt
=
src
.
clCxt
;
float
ifx
=
1.
/
fx
;
float
ify
=
1.
/
fy
;
double
ifx_d
=
1.
/
fx
;
double
ify_d
=
1.
/
fy
;
int
srcStep_in_pixel
=
src
.
step1
()
/
src
.
oclchannels
();
int
srcoffset_in_pixel
=
src
.
offset
/
src
.
elemSize
();
int
dstStep_in_pixel
=
dst
.
step1
()
/
dst
.
oclchannels
();
int
dstoffset_in_pixel
=
dst
.
offset
/
dst
.
elemSize
();
string
kernelName
;
if
(
interpolation
==
INTER_LINEAR
)
kernelName
=
"resizeLN"
;
else
if
(
interpolation
==
INTER_NEAREST
)
kernelName
=
"resizeNN"
;
float
ifx
=
1.
f
/
fx
,
ify
=
1.
f
/
fy
;
int
src_step
=
src
.
step
/
src
.
elemSize
(),
src_offset
=
src
.
offset
/
src
.
elemSize
();
int
dst_step
=
dst
.
step
/
dst
.
elemSize
(),
dst_offset
=
dst
.
offset
/
dst
.
elemSize
();
int
ocn
=
interpolation
==
INTER_LINEAR
?
dst
.
oclchannels
()
:
-
1
;
int
depth
=
interpolation
==
INTER_LINEAR
?
dst
.
depth
()
:
-
1
;
const
char
*
const
interMap
[]
=
{
"NN"
,
"LN"
,
"CUBIC"
,
"AREA"
,
"LAN4"
};
std
::
string
kernelName
=
std
::
string
(
"resize"
)
+
interMap
[
interpolation
];
const
char
*
const
typeMap
[]
=
{
"uchar"
,
"uchar"
,
"ushort"
,
"ushort"
,
"int"
,
"int"
,
"double"
};
const
char
*
const
channelMap
[]
=
{
""
,
""
,
"2"
,
"4"
,
"4"
};
std
::
string
buildOption
=
format
(
"-D %s -D T=%s%s"
,
interMap
[
interpolation
],
typeMap
[
dst
.
depth
()],
channelMap
[
dst
.
oclchannels
()]);
//TODO: improve this kernel
size_t
blkSizeX
=
16
,
blkSizeY
=
16
;
size_t
glbSizeX
;
if
(
src
.
type
()
==
CV_8UC1
)
if
(
src
.
type
()
==
CV_8UC1
&&
interpolation
==
INTER_LINEAR
)
{
size_t
cols
=
(
dst
.
cols
+
dst
.
offset
%
4
+
3
)
/
4
;
glbSizeX
=
cols
%
blkSizeX
==
0
&&
cols
!=
0
?
cols
:
(
cols
/
blkSizeX
+
1
)
*
blkSizeX
;
}
else
glbSizeX
=
dst
.
cols
%
blkSizeX
==
0
&&
dst
.
cols
!=
0
?
dst
.
cols
:
(
dst
.
cols
/
blkSizeX
+
1
)
*
blkSizeX
;
glbSizeX
=
dst
.
cols
;
size_t
glbSizeY
=
dst
.
rows
%
blkSizeY
==
0
&&
dst
.
rows
!=
0
?
dst
.
rows
:
(
dst
.
rows
/
blkSizeY
+
1
)
*
blkSizeY
;
size_t
globalThreads
[
3
]
=
{
glbSizeX
,
glbSizeY
,
1
};
size_t
localThreads
[
3
]
=
{
blkSizeX
,
blkSizeY
,
1
};
size_t
globalThreads
[
3
]
=
{
glbSizeX
,
dst
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
blkSizeX
,
blkSizeY
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
if
(
interpolation
==
INTER_NEAREST
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dstoffset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
srcoffset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dstStep_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
srcStep_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
rows
));
if
(
src
.
clCxt
->
supportsFeature
(
FEATURE_CL_DOUBLE
))
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
ifx_d
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
ify_d
));
}
else
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ifx
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ify
));
}
}
else
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dstoffset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
srcoffset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dstStep_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
srcStep_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ifx
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ify
));
}
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ifx
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ify
));
openCLExecuteKernel
(
clCxt
,
&
imgproc_resize
,
kernelName
,
globalThreads
,
localThreads
,
args
,
src
.
oclchannels
(),
src
.
depth
());
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_resize
,
kernelName
,
globalThreads
,
localThreads
,
args
,
ocn
,
depth
,
buildOption
.
c_str
());
}
void
resize
(
const
oclMat
&
src
,
oclMat
&
dst
,
Size
dsize
,
double
fx
,
double
fy
,
int
interpolation
)
void
resize
(
const
oclMat
&
src
,
oclMat
&
dst
,
Size
dsize
,
double
fx
,
double
fy
,
int
interpolation
)
{
CV_Assert
(
src
.
type
()
==
CV_8UC1
||
src
.
type
()
==
CV_8UC3
||
src
.
type
()
==
CV_8UC4
||
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC3
||
src
.
type
()
==
CV_32FC4
);
CV_Assert
(
interpolation
==
INTER_LINEAR
||
interpolation
==
INTER_NEAREST
);
CV_Assert
(
src
.
size
().
area
()
>
0
);
CV_Assert
(
!
(
dsize
==
Size
())
||
(
fx
>
0
&&
fy
>
0
)
);
if
(
!
(
dsize
==
Size
())
&&
(
fx
>
0
&&
fy
>
0
))
if
(
dsize
.
width
!=
(
int
)(
src
.
cols
*
fx
)
||
dsize
.
height
!=
(
int
)(
src
.
rows
*
fy
))
CV_Error
(
CV_StsUnmatchedSizes
,
"invalid dsize and fx, fy!"
);
CV_Assert
(
dsize
.
area
()
>
0
||
(
fx
>
0
&&
fy
>
0
));
if
(
dsize
==
Size
()
)
if
(
dsize
.
area
()
==
0
)
{
dsize
=
Size
(
saturate_cast
<
int
>
(
src
.
cols
*
fx
),
saturate_cast
<
int
>
(
src
.
rows
*
fy
));
CV_Assert
(
dsize
.
area
()
>
0
);
}
else
{
fx
=
(
double
)
dsize
.
width
/
src
.
cols
;
...
...
@@ -380,13 +347,7 @@ namespace cv
dst
.
create
(
dsize
,
src
.
type
());
if
(
interpolation
==
INTER_NEAREST
||
interpolation
==
INTER_LINEAR
)
{
resize_gpu
(
src
,
dst
,
fx
,
fy
,
interpolation
);
return
;
}
CV_Error
(
CV_StsUnsupportedFormat
,
"Non-supported interpolation method"
);
resize_gpu
(
src
,
dst
,
fx
,
fy
,
interpolation
);
}
////////////////////////////////////////////////////////////////////////
...
...
modules/ocl/src/opencl/imgproc_resize.cl
浏览文件 @
3f927abb
...
...
@@ -45,7 +45,7 @@
//
resize
kernel
//
Currently,
CV_8UC1
CV_8UC4
CV_32FC1
and
CV_32FC4
are
supported.
//
Currently,
CV_8UC1
,
CV_8UC4,
CV_32FC1
and
CV_32FC4
are
supported.
//
We
shall
support
other
types
later
if
necessary.
#
ifdef
DOUBLE_SUPPORT
...
...
@@ -54,20 +54,18 @@
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
F
double
#
else
#
define
F
float
#
endif
#
define
INTER_RESIZE_COEF_BITS
11
#
define
INTER_RESIZE_COEF_SCALE
(
1
<<
INTER_RESIZE_COEF_BITS
)
#
define
CAST_BITS
(
INTER_RESIZE_COEF_BITS
<<
1
)
#
define
CAST_SCALE
(
1.0f/
(
1<<CAST_BITS
))
#
define
INC
(
x,l
)
((
x+1
)
>=
(
l
)
?
(
x
)
:
((
x
)
+1
))
#
ifdef
LN
__kernel
void
resizeLN_C1_D0
(
__global
uchar
*
dst,
__global
uchar
const
*
restrict
src,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
int
gx
=
get_global_id
(
0
)
;
...
...
@@ -75,7 +73,7 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
float4
sx,
u,
xf
;
int4
x,
DX
;
gx
=
(
gx<<2
)
-
(
dst
offset_in_pixel
&3
)
;
gx
=
(
gx<<2
)
-
(
dst
_offset
&3
)
;
DX
=
(
int4
)(
gx,
gx+1,
gx+2,
gx+3
)
;
sx
=
(
convert_float4
(
DX
)
+
0.5f
)
*
ifx
-
0.5f
;
xf
=
floor
(
sx
)
;
...
...
@@ -113,10 +111,10 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
int4
val1,
val2,
val
;
int4
sdata1,
sdata2,
sdata3,
sdata4
;
int4
pos1
=
mad24
((
int4
)
y,
(
int4
)
src
step_in_pixel,
x+
(
int4
)
srcoffset_in_pixel
)
;
int4
pos2
=
mad24
((
int4
)
y,
(
int4
)
src
step_in_pixel,
x_+
(
int4
)
srcoffset_in_pixel
)
;
int4
pos3
=
mad24
((
int4
)
y_,
(
int4
)
src
step_in_pixel,
x+
(
int4
)
srcoffset_in_pixel
)
;
int4
pos4
=
mad24
((
int4
)
y_,
(
int4
)
src
step_in_pixel,
x_+
(
int4
)
srcoffset_in_pixel
)
;
int4
pos1
=
mad24
((
int4
)
y,
(
int4
)
src
_step,
x+
(
int4
)
src_offset
)
;
int4
pos2
=
mad24
((
int4
)
y,
(
int4
)
src
_step,
x_+
(
int4
)
src_offset
)
;
int4
pos3
=
mad24
((
int4
)
y_,
(
int4
)
src
_step,
x+
(
int4
)
src_offset
)
;
int4
pos4
=
mad24
((
int4
)
y_,
(
int4
)
src
_step,
x_+
(
int4
)
src_offset
)
;
sdata1.s0
=
src[pos1.s0]
;
sdata1.s1
=
src[pos1.s1]
;
...
...
@@ -144,12 +142,12 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
val
=
((
val
+
(
1<<
(
CAST_BITS-1
)))
>>
CAST_BITS
)
;
pos4
=
mad24
(
dy,
dst
step_in_pixel,
gx+dstoffset_in_pixel
)
;
pos4
=
mad24
(
dy,
dst
_step,
gx+dst_offset
)
;
pos4.y++
;
pos4.z+=2
;
pos4.w+=3
;
uchar4
uval
=
convert_uchar4_sat
(
val
)
;
int
con
=
(
gx
>=
0
&&
gx+3
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
&&
(
dst
offset_in_pixel
&3
)
==0
)
;
int
con
=
(
gx
>=
0
&&
gx+3
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
&&
(
dst
_offset
&3
)
==0
)
;
if
(
con
)
{
*
(
__global
uchar4*
)(
dst
+
pos4.x
)
=uval
;
...
...
@@ -176,7 +174,7 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
}
__kernel
void
resizeLN_C4_D0
(
__global
uchar4
*
dst,
__global
uchar4
*
src,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
int
dx
=
get_global_id
(
0
)
;
...
...
@@ -202,24 +200,24 @@ __kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
int
y_
=
INC
(
y,src_rows
)
;
int
x_
=
INC
(
x,src_cols
)
;
int4
srcpos
;
srcpos.x
=
mad24
(
y,
src
step_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.y
=
mad24
(
y,
src
step_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.z
=
mad24
(
y_,
src
step_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.w
=
mad24
(
y_,
src
step_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.x
=
mad24
(
y,
src
_step,
x+src_offset
)
;
srcpos.y
=
mad24
(
y,
src
_step,
x_+src_offset
)
;
srcpos.z
=
mad24
(
y_,
src
_step,
x+src_offset
)
;
srcpos.w
=
mad24
(
y_,
src
_step,
x_+src_offset
)
;
int4
data0
=
convert_int4
(
src[srcpos.x]
)
;
int4
data1
=
convert_int4
(
src[srcpos.y]
)
;
int4
data2
=
convert_int4
(
src[srcpos.z]
)
;
int4
data3
=
convert_int4
(
src[srcpos.w]
)
;
int4
val
=
mul24
((
int4
)
mul24
(
U1,
V1
)
,
data0
)
+
mul24
((
int4
)
mul24
(
U,
V1
)
,
data1
)
+mul24
((
int4
)
mul24
(
U1,
V
)
,
data2
)
+mul24
((
int4
)
mul24
(
U,
V
)
,
data3
)
;
int
dstpos
=
mad24
(
dy,
dst
step_in_pixel,
dx+dstoffset_in_pixel
)
;
int
dstpos
=
mad24
(
dy,
dst
_step,
dx+dst_offset
)
;
uchar4
uval
=
convert_uchar4
((
val
+
(
1<<
(
CAST_BITS-1
)))
>>CAST_BITS
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dstpos]
=
uval
;
}
__kernel
void
resizeLN_C1_D5
(
__global
float
*
dst,
__global
float
*
src,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
int
dx
=
get_global_id
(
0
)
;
...
...
@@ -239,10 +237,10 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
float
u1
=
1.f-u
;
float
v1
=
1.f-v
;
int4
srcpos
;
srcpos.x
=
mad24
(
y,
src
step_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.y
=
mad24
(
y,
src
step_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.z
=
mad24
(
y_,
src
step_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.w
=
mad24
(
y_,
src
step_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.x
=
mad24
(
y,
src
_step,
x+src_offset
)
;
srcpos.y
=
mad24
(
y,
src
_step,
x_+src_offset
)
;
srcpos.z
=
mad24
(
y_,
src
_step,
x+src_offset
)
;
srcpos.w
=
mad24
(
y_,
src
_step,
x_+src_offset
)
;
float
data0
=
src[srcpos.x]
;
float
data1
=
src[srcpos.y]
;
float
data2
=
src[srcpos.z]
;
...
...
@@ -252,13 +250,13 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
float
val2
=
u1
*
data2
+
u
*
data3
;
float
val
=
v1
*
val1
+
v
*
val2
;
int
dstpos
=
mad24
(
dy,
dst
step_in_pixel,
dx+dstoffset_in_pixel
)
;
int
dstpos
=
mad24
(
dy,
dst
_step,
dx+dst_offset
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dstpos]
=
val
;
}
__kernel
void
resizeLN_C4_D5
(
__global
float4
*
dst,
__global
float4
*
src,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
int
dx
=
get_global_id
(
0
)
;
...
...
@@ -278,10 +276,10 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
float
u1
=
1.f-u
;
float
v1
=
1.f-v
;
int4
srcpos
;
srcpos.x
=
mad24
(
y,
src
step_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.y
=
mad24
(
y,
src
step_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.z
=
mad24
(
y_,
src
step_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.w
=
mad24
(
y_,
src
step_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.x
=
mad24
(
y,
src
_step,
x+src_offset
)
;
srcpos.y
=
mad24
(
y,
src
_step,
x_+src_offset
)
;
srcpos.z
=
mad24
(
y_,
src
_step,
x+src_offset
)
;
srcpos.w
=
mad24
(
y_,
src
_step,
x_+src_offset
)
;
float4
s_data1,
s_data2,
s_data3,
s_data4
;
s_data1
=
src[srcpos.x]
;
s_data2
=
src[srcpos.y]
;
...
...
@@ -289,129 +287,32 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
s_data4
=
src[srcpos.w]
;
float4
val
=
u1
*
v1
*
s_data1
+
u
*
v1
*
s_data2
+u1
*
v
*s_data3
+
u
*
v
*s_data4
;
int
dstpos
=
mad24
(
dy,
dst
step_in_pixel,
dx+dstoffset_in_pixel
)
;
int
dstpos
=
mad24
(
dy,
dst
_step,
dx+dst_offset
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dstpos]
=
val
;
}
__kernel
void
resizeNN_C1_D0
(
__global
uchar
*
dst,
__global
uchar
*
src,
int
dstoffset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
{
int
gx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
#
elif
defined
NN
gx
=
(
gx<<2
)
-
(
dstoffset_in_pixel&3
)
;
//int4
GX
=
(
int4
)(
gx,
gx+1,
gx+2,
gx+3
)
;
int4
sx
;
int
sy
;
F
ss1
=
gx*ifx
;
F
ss2
=
(
gx+1
)
*ifx
;
F
ss3
=
(
gx+2
)
*ifx
;
F
ss4
=
(
gx+3
)
*ifx
;
F
s5
=
dy
*
ify
;
sx.s0
=
min
((
int
)
floor
(
ss1
)
,
src_cols-1
)
;
sx.s1
=
min
((
int
)
floor
(
ss2
)
,
src_cols-1
)
;
sx.s2
=
min
((
int
)
floor
(
ss3
)
,
src_cols-1
)
;
sx.s3
=
min
((
int
)
floor
(
ss4
)
,
src_cols-1
)
;
sy
=
min
((
int
)
floor
(
s5
)
,
src_rows-1
)
;
uchar4
val
;
int4
pos
=
mad24
((
int4
)
sy,
(
int4
)
srcstep_in_pixel,
sx+
(
int4
)
srcoffset_in_pixel
)
;
val.s0
=
src[pos.s0]
;
val.s1
=
src[pos.s1]
;
val.s2
=
src[pos.s2]
;
val.s3
=
src[pos.s3]
;
//__global
uchar4*
d
=
(
__global
uchar4*
)(
dst
+
dstoffset_in_pixel
+
dy
*
dststep_in_pixel
+
gx
)
;
//uchar4
dVal
=
*d
;
pos
=
mad24
(
dy,
dststep_in_pixel,
gx+dstoffset_in_pixel
)
;
pos.y++
;
pos.z+=2
;
pos.w+=3
;
int
con
=
(
gx
>=
0
&&
gx+3
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
&&
(
dstoffset_in_pixel&3
)
==0
)
;
if
(
con
)
{
*
(
__global
uchar4*
)(
dst
+
pos.x
)
=val
;
}
else
{
if
(
gx
>=
0
&&
gx
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos.x]=val.x
;
}
if
(
gx+1
>=
0
&&
gx+1
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos.y]=val.y
;
}
if
(
gx+2
>=
0
&&
gx+2
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos.z]=val.z
;
}
if
(
gx+3
>=
0
&&
gx+3
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos.w]=val.w
;
}
}
}
__kernel
void
resizeNN_C4_D0
(
__global
uchar4
*
dst,
__global
uchar4
*
src,
int
dstoffset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
{
int
dx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
F
s1
=
dx*ifx
;
F
s2
=
dy*ify
;
int
sx
=
fmin
((
float
)
floor
(
s1
)
,
(
float
)
src_cols-1
)
;
int
sy
=
fmin
((
float
)
floor
(
s2
)
,
(
float
)
src_rows-1
)
;
int
dpos
=
mad24
(
dy,
dststep_in_pixel,
dx
+
dstoffset_in_pixel
)
;
int
spos
=
mad24
(
sy,
srcstep_in_pixel,
sx
+
srcoffset_in_pixel
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dpos]
=
src[spos]
;
}
__kernel
void
resizeNN_C1_D5
(
__global
float
*
dst,
__global
float
*
src,
int
dstoffset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
__kernel
void
resizeNN
(
__global
T
*
dst,
__global
T
*
src,
int
dst_offset,
int
src_offset,int
dst_step,
int
src_step,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
int
dx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
F
s1
=
dx*ifx
;
F
s2
=
dy*ify
;
int
sx
=
fmin
((
float
)
floor
(
s1
)
,
(
float
)
src_cols-1
)
;
int
sy
=
fmin
((
float
)
floor
(
s2
)
,
(
float
)
src_rows-1
)
;
if
(
dx
<
dst_cols
&&
dy
<
dst_rows
)
{
float
s1
=
dx
*
ifx,
s2
=
dy
*
ify
;
int
sx
=
min
(
convert_int_sat_rtn
(
s1
)
,
src_cols
-
1
)
;
int
sy
=
min
(
convert_int_sat_rtn
(
s2
)
,
src_rows
-
1
)
;
int
dpos
=
mad24
(
dy,
dststep_in_pixel,
dx
+
dstoffset_in_pixel
)
;
int
spos
=
mad24
(
sy,
srcstep_in_pixel,
sx
+
srcoffset_in_pixel
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dpos]
=
src[spos]
;
int
dst_index
=
mad24
(
dy,
dst_step,
dx
+
dst_offset
)
;
int
src_index
=
mad24
(
sy,
src_step,
sx
+
src_offset
)
;
dst[dst_index]
=
src[src_index]
;
}
}
__kernel
void
resizeNN_C4_D5
(
__global
float4
*
dst,
__global
float4
*
src,
int
dstoffset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
{
int
dx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
F
s1
=
dx*ifx
;
F
s2
=
dy*ify
;
int
s_col
=
floor
(
s1
)
;
int
s_row
=
floor
(
s2
)
;
int
sx
=
min
(
s_col,
src_cols-1
)
;
int
sy
=
min
(
s_row,
src_rows-1
)
;
int
dpos
=
mad24
(
dy,
dststep_in_pixel,
dx
+
dstoffset_in_pixel
)
;
int
spos
=
mad24
(
sy,
srcstep_in_pixel,
sx
+
srcoffset_in_pixel
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dpos]
=
src[spos]
;
}
#
endif
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录