Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
5262c860
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
5262c860
编写于
11月 30, 2017
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refactor opencl conv kernel and op.
上级
b028e4de
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
89 addition
and
131 deletion
+89
-131
mace/kernels/opencl/buffer_to_image.cc
mace/kernels/opencl/buffer_to_image.cc
+2
-2
mace/kernels/opencl/cl/conv_2d_1x1.cl
mace/kernels/opencl/cl/conv_2d_1x1.cl
+27
-44
mace/kernels/opencl/cl/conv_2d_3x3.cl
mace/kernels/opencl/cl/conv_2d_3x3.cl
+18
-51
mace/kernels/opencl/conv_2d_opencl.cc
mace/kernels/opencl/conv_2d_opencl.cc
+6
-6
mace/kernels/opencl/conv_2d_opencl_1x1.cc
mace/kernels/opencl/conv_2d_opencl_1x1.cc
+7
-4
mace/kernels/opencl/conv_2d_opencl_3x3.cc
mace/kernels/opencl/conv_2d_opencl_3x3.cc
+10
-7
mace/ops/conv_2d_test.cc
mace/ops/conv_2d_test.cc
+19
-17
未找到文件。
mace/kernels/opencl/buffer_to_image.cc
浏览文件 @
5262c860
...
...
@@ -24,8 +24,8 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
}
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DataTypeToCLType
(
image
->
dtype
()
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DataTypeToOPENCLCMDDataType
(
image
->
dtype
()
));
built_options
.
emplace
(
"-DDATA_TYPE="
+
DataTypeToCLType
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DataTypeToOPENCLCMDDataType
(
DataTypeToEnum
<
T
>::
value
));
auto
runtime
=
OpenCLRuntime
::
Get
();
string
kernel_name
;
switch
(
type
)
{
...
...
mace/kernels/opencl/cl/conv_2d_1x1.cl
浏览文件 @
5262c860
...
...
@@ -23,15 +23,15 @@ __kernel void conv_2d_1x1(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
#
ifdef
BIAS
float4
out0
=
convert_float4
(
READ_IMAGET
(
bias,
sampler,
(
int2
)(
out_ch_blk,
0
)
))
;
float
4
out1
=
out0
;
float
4
out2
=
out0
;
float
4
out3
=
out0
;
DATA_TYPE4
out0
=
READ_IMAGET
(
bias,
sampler,
(
int2
)(
out_ch_blk,
0
))
;
DATA_TYPE
4
out1
=
out0
;
DATA_TYPE
4
out2
=
out0
;
DATA_TYPE
4
out3
=
out0
;
#
else
float
4
out0
=
0
;
float
4
out1
=
0
;
float
4
out2
=
0
;
float
4
out3
=
0
;
DATA_TYPE
4
out0
=
0
;
DATA_TYPE
4
out1
=
0
;
DATA_TYPE
4
out2
=
0
;
DATA_TYPE
4
out3
=
0
;
#
endif
int4
w
;
...
...
@@ -62,16 +62,16 @@ __kernel void conv_2d_1x1(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
int
in_x_base
=
0
;
for
(
int
in_ch_blk
=
0
; in_ch_blk < in_ch_blks; ++in_ch_blk) {
float4
in0
=
convert_float4
(
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_x_base
+
w.x,
out_hb_idx
)
))
;
float4
in1
=
convert_float4
(
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_x_base
+
w.y,
out_hb_idx
)
))
;
float4
in2
=
convert_float4
(
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_x_base
+
w.z,
out_hb_idx
)
))
;
float4
in3
=
convert_float4
(
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_x_base
+
w.w,
out_hb_idx
)
))
;
DATA_TYPE4
in0
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_x_base
+
w.x,
out_hb_idx
))
;
DATA_TYPE4
in1
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_x_base
+
w.y,
out_hb_idx
))
;
DATA_TYPE4
in2
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_x_base
+
w.z,
out_hb_idx
))
;
DATA_TYPE4
in3
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_x_base
+
w.w,
out_hb_idx
))
;
const
int
filter_x0
=
in_ch_blk
<<
2
;
float4
weights0
=
convert_float4
(
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_x0,
out_ch_blk
)
))
;
float4
weights1
=
convert_float4
(
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_x0
+
1
,
out_ch_blk
)
))
;
float4
weights2
=
convert_float4
(
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_x0
+
2
,
out_ch_blk
)
))
;
float4
weights3
=
convert_float4
(
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_x0
+
3
,
out_ch_blk
)
))
;
DATA_TYPE4
weights0
=
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_x0,
out_ch_blk
))
;
DATA_TYPE4
weights1
=
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_x0
+
1
,
out_ch_blk
))
;
DATA_TYPE4
weights2
=
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_x0
+
2
,
out_ch_blk
))
;
DATA_TYPE4
weights3
=
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_x0
+
3
,
out_ch_blk
))
;
//
Will
prefetch
L2
improve
performance?
How
to
pretch
image
data?
out0
+=
in0.x
*
weights0
;
...
...
@@ -99,18 +99,18 @@ __kernel void conv_2d_1x1(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
#
ifdef
FUSED_BATCH_NORM
//
batch
norm
float
4
bn_scale_value
=
convert_float4
(
READ_IMAGET
(
bn_scale,
sampler,
(
int2
)(
out_ch_blk,
0
)
))
;
float4
scale0
=
(
float
4
)(
bn_scale_value.x
)
;
float4
scale1
=
(
float
4
)(
bn_scale_value.y
)
;
float4
scale2
=
(
float
4
)(
bn_scale_value.z
)
;
float4
scale3
=
(
float
4
)(
bn_scale_value.w
)
;
float
4
bn_offset_value
=
DATA_TYPE
4
bn_scale_value
=
READ_IMAGET
(
bn_scale,
sampler,
(
int2
)(
out_ch_blk,
0
))
;
DATA_TYPE4
scale0
=
(
DATA_TYPE
4
)(
bn_scale_value.x
)
;
DATA_TYPE4
scale1
=
(
DATA_TYPE
4
)(
bn_scale_value.y
)
;
DATA_TYPE4
scale2
=
(
DATA_TYPE
4
)(
bn_scale_value.z
)
;
DATA_TYPE4
scale3
=
(
DATA_TYPE
4
)(
bn_scale_value.w
)
;
DATA_TYPE
4
bn_offset_value
=
READ_IMAGET
(
bn_offset,
sampler,
(
int2
)(
out_ch_blk,
0
))
;
float4
offset0
=
(
float
4
)(
bn_offset_value.x
)
;
float4
offset1
=
(
float
4
)(
bn_offset_value.y
)
;
float4
offset2
=
(
float
4
)(
bn_offset_value.z
)
;
float4
offset3
=
(
float
4
)(
bn_offset_value.w
)
;
DATA_TYPE4
offset0
=
(
DATA_TYPE
4
)(
bn_offset_value.x
)
;
DATA_TYPE4
offset1
=
(
DATA_TYPE
4
)(
bn_offset_value.y
)
;
DATA_TYPE4
offset2
=
(
DATA_TYPE
4
)(
bn_offset_value.z
)
;
DATA_TYPE4
offset3
=
(
DATA_TYPE
4
)(
bn_offset_value.w
)
;
out0
=
out0
*
scale0
+
offset0
;
out1
=
out1
*
scale1
+
offset1
;
...
...
@@ -126,7 +126,6 @@ __kernel void conv_2d_1x1(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
out3
=
fmax
(
out3,
0
)
;
#
endif
#
ifdef
TYPE_FLOAT
const
int
out_x_base
=
out_ch_blk
*
width
;
int
out_x_idx
=
out_w_blk
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
out_x_idx,
out_hb
)
,
out0
)
;
...
...
@@ -142,21 +141,5 @@ __kernel void conv_2d_1x1(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
out_x_idx
+=
out_w_blks
;
if
(
out_x_idx
>=
width
)
return
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
out_x_idx,
out_hb
)
,
out3
)
;
#
else
const
int
out_x_base
=
out_ch_blk
*
width
;
int
out_x_idx
=
out_w_blk
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
out_x_idx,
out_hb
)
,
convert_half4
(
out0
))
;
out_x_idx
+=
out_w_blks
;
if
(
out_x_idx
>=
width
)
return
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
out_x_idx,
out_hb
)
,
convert_half4
(
out1
))
;
out_x_idx
+=
out_w_blks
;
if
(
out_x_idx
>=
width
)
return
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
out_x_idx,
out_hb
)
,
convert_half4
(
out2
))
;
out_x_idx
+=
out_w_blks
;
if
(
out_x_idx
>=
width
)
return
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
out_x_idx,
out_hb
)
,
convert_half4
(
out3
))
;
#
endif
}
mace/kernels/opencl/cl/conv_2d_3x3.cl
浏览文件 @
5262c860
...
...
@@ -19,21 +19,20 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
const
int
out_hb
=
get_global_id
(
2
)
;
const
int
rounded_in_ch
=
in_ch_blks
*
4
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
#
ifdef
BIAS
float
4
out0
=
convert_float4
(
READ_IMAGET
(
bias,
sampler,
(
int2
)(
out_ch_blk,
0
)
))
;
float
4
out1
=
out0
;
float
4
out2
=
out0
;
float
4
out3
=
out0
;
float
4
out4
=
out0
;
DATA_TYPE
4
out0
=
READ_IMAGET
(
bias,
sampler,
(
int2
)(
out_ch_blk,
0
))
;
DATA_TYPE
4
out1
=
out0
;
DATA_TYPE
4
out2
=
out0
;
DATA_TYPE
4
out3
=
out0
;
DATA_TYPE
4
out4
=
out0
;
#
else
float
4
out0
=
0
;
float
4
out1
=
0
;
float
4
out2
=
0
;
float
4
out3
=
0
;
float
4
out4
=
0
;
DATA_TYPE
4
out0
=
0
;
DATA_TYPE
4
out1
=
0
;
DATA_TYPE
4
out2
=
0
;
DATA_TYPE
4
out3
=
0
;
DATA_TYPE
4
out4
=
0
;
#
endif
#
if
STRIDE
==
1
...
...
@@ -54,8 +53,8 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
const
int
batch_idx
=
(
out_hb
/
out_height
)
*
in_height
;
float
4
in0,
in1,
in2,
in3,
in4
;
float
4
weights0,
weights1,
weights2,
weights3
;
DATA_TYPE
4
in0,
in1,
in2,
in3,
in4
;
DATA_TYPE
4
weights0,
weights1,
weights2,
weights3
;
int
in_idx,
hb_idx,
width_idx,
in_width_idx
;
//
Unrolling
this
loop
hurt
perfmance
for
(
short
in_ch_blk
=
0
; in_ch_blk < in_ch_blks; ++in_ch_blk) {
...
...
@@ -75,7 +74,7 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
in_width_value = select(in_idx + in_width_value, \
-1, \
(in_width_value < 0 |
|
in_width_value
>=
in_width
))
; \
in##i
=
convert_float4
(
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_width_value,
in_hb_value
)
))
;
in##i
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_width_value,
in_hb_value
))
;
READ_INPUT
(
0
)
;
READ_INPUT
(
1
)
;
...
...
@@ -86,10 +85,10 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
#
undef
READ_INPUT
int
filter_idx
=
(
in_ch_blk
<<
2
)
+
(
hb_idx
*
3
+
width_idx
)
*
rounded_in_ch
;
weights0
=
convert_float4
(
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_idx
+
0
,
out_ch_blk
)
))
;
weights1
=
convert_float4
(
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_idx
+
1
,
out_ch_blk
)
))
;
weights2
=
convert_float4
(
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_idx
+
2
,
out_ch_blk
)
))
;
weights3
=
convert_float4
(
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_idx
+
3
,
out_ch_blk
)
))
;
weights0
=
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_idx
+
0
,
out_ch_blk
))
;
weights1
=
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_idx
+
1
,
out_ch_blk
))
;
weights2
=
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_idx
+
2
,
out_ch_blk
))
;
weights3
=
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_idx
+
3
,
out_ch_blk
))
;
//
Will
prefetch
L2
improve
performance?
How
to
pretch
image
data?
...
...
@@ -122,7 +121,6 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
}
}
#
ifdef
TYPE_FLOAT
const
int
out_x_base
=
out_ch_blk
*
out_width
;
int
w
=
out_w_blk
;
WRITE_IMAGET
(
output,
...
...
@@ -152,36 +150,5 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
w,
out_hb
)
,
out4
)
;
#
else
const
int
out_x_base
=
out_ch_blk
*
out_width
;
int
w
=
out_w_blk
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
w,
out_hb
)
,
convert_half4
(
out0
))
;
w
+=
out_w_blks
;
if
(
w
>=
out_width
)
return
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
w,
out_hb
)
,
convert_half4
(
out1
))
;
w
+=
out_w_blks
;
if
(
w
>=
out_width
)
return
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
w,
out_hb
)
,
convert_half4
(
out2
))
;
w
+=
out_w_blks
;
if
(
w
>=
out_width
)
return
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
w,
out_hb
)
,
convert_half4
(
out3
))
;
w
+=
out_w_blks
;
if
(
w
>=
out_width
)
return
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
w,
out_hb
)
,
convert_half4
(
out4
))
;
#
endif
}
mace/kernels/opencl/conv_2d_opencl.cc
浏览文件 @
5262c860
...
...
@@ -10,19 +10,19 @@ namespace kernels {
extern
void
Conv2dOpenclK1x1S1
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
*
padding
,
Tensor
*
output
);
const
DataType
dt
,
Tensor
*
output
);
extern
void
Conv2dOpenclK1x1S2
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
*
padding
,
Tensor
*
output
);
const
DataType
dt
,
Tensor
*
output
);
extern
void
Conv2dOpenclK3x3S1
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
*
padding
,
Tensor
*
output
);
const
DataType
dt
,
Tensor
*
output
);
extern
void
Conv2dOpenclK3x3S2
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
*
padding
,
Tensor
*
output
);
const
DataType
dt
,
Tensor
*
output
);
template
<
typename
T
>
void
Conv2dFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input
,
...
...
@@ -31,7 +31,7 @@ void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
Tensor
*
output
)
{
typedef
void
(
*
Conv2dOpenclFunction
)(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
*
padding
,
Tensor
*
output
);
DataType
dt
,
Tensor
*
output
);
// Selection matrix: kernel_size x stride_size
static
const
Conv2dOpenclFunction
selector
[
5
][
2
]
=
{
{
Conv2dOpenclK1x1S1
,
Conv2dOpenclK1x1S2
},
...
...
@@ -70,7 +70,7 @@ void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
auto
conv2d_func
=
selector
[
kernel_h
-
1
][
strides_
[
0
]
-
1
];
conv2d_func
(
input
,
filter
,
bias
,
paddings
.
data
(),
output
);
conv2d_func
(
input
,
filter
,
bias
,
paddings
.
data
(),
DataTypeToEnum
<
T
>::
value
,
output
);
}
template
struct
Conv2dFunctor
<
DeviceType
::
OPENCL
,
float
>;
...
...
mace/kernels/opencl/conv_2d_opencl_1x1.cc
浏览文件 @
5262c860
...
...
@@ -15,6 +15,7 @@ void Conv1x1(const Tensor *input,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
stride
,
const
DataType
dt
,
Tensor
*
output
)
{
const
index_t
batch
=
output
->
dim
(
0
);
const
index_t
height
=
output
->
dim
(
1
);
...
...
@@ -32,8 +33,8 @@ void Conv1x1(const Tensor *input,
MACE_CHECK
(
input_batch
==
batch
);
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
input
->
dtype
()
==
DT_FLOAT
?
"-DTYPE_FLOAT"
:
""
);
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DataTypeToOPENCLCMDDataType
(
input
->
dtype
()
));
built_options
.
emplace
(
"-DDATA_TYPE="
+
DataTypeToCLType
(
dt
)
);
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DataTypeToOPENCLCMDDataType
(
dt
));
built_options
.
emplace
(
"-DSTRIDE="
+
ToString
(
stride
));
if
(
bias
!=
nullptr
)
{
built_options
.
emplace
(
"-DBIAS"
);
...
...
@@ -74,16 +75,18 @@ extern void Conv2dOpenclK1x1S1(const Tensor *input,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
*
padding
,
const
DataType
dt
,
Tensor
*
output
)
{
Conv1x1
(
input
,
filter
,
bias
,
1
,
output
);
Conv1x1
(
input
,
filter
,
bias
,
1
,
dt
,
output
);
};
extern
void
Conv2dOpenclK1x1S2
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
*
padding
,
const
DataType
dt
,
Tensor
*
output
)
{
Conv1x1
(
input
,
filter
,
bias
,
2
,
output
);
Conv1x1
(
input
,
filter
,
bias
,
2
,
dt
,
output
);
};
}
// namespace kernels
...
...
mace/kernels/opencl/conv_2d_opencl_3x3.cc
浏览文件 @
5262c860
...
...
@@ -13,7 +13,8 @@ namespace kernels {
static
void
Conv2d3x3S12
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
uint32_t
stride
,
const
int
*
padding
,
Tensor
*
output
)
{
const
int
*
padding
,
const
DataType
dt
,
Tensor
*
output
)
{
const
index_t
batch
=
output
->
dim
(
0
);
const
index_t
height
=
output
->
dim
(
1
);
const
index_t
width
=
output
->
dim
(
2
);
...
...
@@ -25,8 +26,8 @@ static void Conv2d3x3S12(const Tensor *input, const Tensor *filter,
const
index_t
width_blocks
=
RoundUpDiv
<
index_t
,
5
>
(
width
);
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
input
->
dtype
()
==
DT_FLOAT
?
"-DTYPE_FLOAT"
:
""
);
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DataTypeToOPENCLCMDDataType
(
input
->
dtype
()
));
built_options
.
emplace
(
"-DDATA_TYPE="
+
DataTypeToCLType
(
dt
)
);
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DataTypeToOPENCLCMDDataType
(
dt
));
built_options
.
emplace
(
bias
!=
nullptr
?
"-DBIAS"
:
""
);
built_options
.
emplace
(
"-DSTRIDE="
+
ToString
(
stride
));
...
...
@@ -63,13 +64,15 @@ static void Conv2d3x3S12(const Tensor *input, const Tensor *filter,
}
void
Conv2dOpenclK3x3S1
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
*
padding
,
Tensor
*
output
)
{
Conv2d3x3S12
(
input
,
filter
,
bias
,
1
,
padding
,
output
);
const
Tensor
*
bias
,
const
int
*
padding
,
const
DataType
dt
,
Tensor
*
output
)
{
Conv2d3x3S12
(
input
,
filter
,
bias
,
1
,
padding
,
dt
,
output
);
};
void
Conv2dOpenclK3x3S2
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
*
padding
,
Tensor
*
output
)
{
Conv2d3x3S12
(
input
,
filter
,
bias
,
2
,
padding
,
output
);
const
Tensor
*
bias
,
const
int
*
padding
,
const
DataType
dt
,
Tensor
*
output
)
{
Conv2d3x3S12
(
input
,
filter
,
bias
,
2
,
padding
,
dt
,
output
);
};
}
// namespace kernels
...
...
mace/ops/conv_2d_test.cc
浏览文件 @
5262c860
...
...
@@ -558,14 +558,14 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
}
TEST_F
(
Conv2dOpTest
,
OPENCLAlignedConvNxNS12
)
{
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
32
,
32
,
64
,
128
});
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
32
,
32
,
32
,
64
});
}
TEST_F
(
Conv2dOpTest
,
OPENCLUnalignedConvNxNS12
)
{
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
107
,
113
,
5
,
7
});
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
>
static
void
TestHalfComplexConvNxNS12
(
const
std
::
vector
<
index_t
>
&
shape
)
{
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
...
...
@@ -612,15 +612,15 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
std
::
vector
<
half
>
input_data
(
float_input_data
.
begin
(),
float_input_data
.
end
());
std
::
vector
<
half
>
filter_data
(
float_filter_data
.
begin
(),
float_filter_data
.
end
());
std
::
vector
<
half
>
bias_data
(
float_bias_data
.
begin
(),
float_bias_data
.
end
());
net
.
AddInputFromArray
<
D
,
T
>
(
"InputHalf"
,
{
batch
,
height
,
width
,
input_channels
},
input_data
);
net
.
AddInputFromArray
<
D
,
T
>
(
net
.
AddInputFromArray
<
D
,
half
>
(
"InputHalf"
,
{
batch
,
height
,
width
,
input_channels
},
input_data
);
net
.
AddInputFromArray
<
D
,
half
>
(
"FilterHalf"
,
{
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
filter_data
);
net
.
AddInputFromArray
<
D
,
T
>
(
"BiasHalf"
,
{
output_channels
},
bias_data
);
net
.
AddInputFromArray
<
D
,
half
>
(
"BiasHalf"
,
{
output_channels
},
bias_data
);
// run on gpu
BufferToImage
<
D
,
T
>
(
net
,
"InputHalf"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"FilterHalf"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"BiasHalf"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
half
>
(
net
,
"InputHalf"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
half
>
(
net
,
"FilterHalf"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
half
>
(
net
,
"BiasHalf"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -630,24 +630,26 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
})
.
AddIntArg
(
"padding"
,
type
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run on device
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
half
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
,
T
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1.0
);
ExpectTensorNear
<
float
,
half
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.2
);
};
for
(
int
kernel_size
:
{
3
})
{
for
(
int
stride
:
{
1
})
{
for
(
int
kernel_size
:
{
1
,
3
})
{
for
(
int
stride
:
{
1
,
2
})
{
func
(
kernel_size
,
kernel_size
,
stride
,
stride
,
VALID
);
}
}
}
// TODO: support half input & float computation
//TEST_F(Conv2dOpTest, OPENCLHalfAlignedConvNxNS12) {
// TestHalfComplexConvNxNS12<DeviceType::OPENCL, half>({32, 32, 64, 128});
//}
TEST_F
(
Conv2dOpTest
,
OPENCLHalfAlignedConvNxNS12
)
{
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
,
32
,
64
});
}
TEST_F
(
Conv2dOpTest
,
OPENCLHalfUnalignedConvNxNS12
)
{
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
107
,
113
,
5
,
7
});
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录