Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
29c3f0f7
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
29c3f0f7
编写于
11月 30, 2017
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refactor conv register and optimize conv 3x3.
上级
8499b852
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
350 addition
and
219 deletion
+350
-219
mace/core/half.h
mace/core/half.h
+1
-1
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+29
-13
mace/kernels/opencl/cl/conv_2d_3x3.cl
mace/kernels/opencl/cl/conv_2d_3x3.cl
+86
-63
mace/kernels/opencl/conv_2d_opencl.cc
mace/kernels/opencl/conv_2d_opencl.cc
+7
-4
mace/kernels/opencl/conv_2d_opencl_3x3.cc
mace/kernels/opencl/conv_2d_opencl_3x3.cc
+3
-1
mace/ops/conv_2d.cc
mace/ops/conv_2d.cc
+10
-0
mace/ops/conv_2d_benchmark.cc
mace/ops/conv_2d_benchmark.cc
+5
-38
mace/ops/conv_2d_test.cc
mace/ops/conv_2d_test.cc
+168
-80
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+41
-19
未找到文件。
mace/core/half.h
浏览文件 @
29c3f0f7
...
...
@@ -1098,7 +1098,7 @@ namespace half_float
/// Conversion constructor.
/// \param rhs float to convert
explicit
half
(
float
rhs
)
:
data_
(
detail
::
float2half
<
round_style
>
(
rhs
))
{}
half
(
float
rhs
)
:
data_
(
detail
::
float2half
<
round_style
>
(
rhs
))
{}
/// Conversion to single-precision.
/// \return single precision value representing expression value
...
...
mace/kernels/conv_2d.h
浏览文件 @
29c3f0f7
...
...
@@ -11,13 +11,23 @@
namespace
mace
{
namespace
kernels
{
struct
Conv2dFunctorBase
{
Conv2dFunctorBase
(
const
int
*
strides
,
const
Padding
&
paddings
,
const
int
*
dilations
)
:
strides_
(
strides
),
dilations_
(
dilations
),
paddings_
(
paddings
)
{}
const
int
*
strides_
;
// [stride_h, stride_w]
const
int
*
dilations_
;
// [dilation_h, dilation_w]
Padding
paddings_
;
};
template
<
DeviceType
D
,
typename
T
>
struct
Conv2dFunctor
{
Conv2dFunctor
()
{}
struct
Conv2dFunctor
:
Conv2dFunctorBase
{
Conv2dFunctor
(
const
int
*
strides
,
const
Padding
&
paddings
,
const
int
*
dilations
)
:
strides_
(
strides
),
dilations_
(
dilations
),
paddings_
(
padding
s
)
{}
:
Conv2dFunctorBase
(
strides
,
paddings
,
dilation
s
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
filter
,
...
...
@@ -76,9 +86,10 @@ struct Conv2dFunctor {
for
(
int
h
=
0
;
h
<
height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width
;
++
w
)
{
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
T
bias_channel
=
bias_data
?
bias_data
[
c
]
:
0
;
T
bias_channel
=
0.0
f
;
if
(
bias
)
bias_channel
=
bias_data
[
c
];
*
output_data
=
bias_channel
;
T
sum
=
0
;
T
sum
=
0
.0
f
;
const
T
*
filter_ptr
=
filter_data
+
c
;
for
(
int
kh
=
0
;
kh
<
kernel_h
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
kernel_w
;
++
kw
)
{
...
...
@@ -113,9 +124,6 @@ struct Conv2dFunctor {
}
const
int
*
strides_
;
// [stride_h, stride_w]
const
int
*
dilations_
;
// [dilation_h, dilation_w]
Padding
paddings_
;
};
template
<
>
...
...
@@ -123,11 +131,19 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const Tensor *input,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
Tensor
*
output
);
template
<
>
void
Conv2dFunctor
<
DeviceType
::
OPENCL
,
float
>::
operator
()(
const
Tensor
*
input
,
template
<
typename
T
>
struct
Conv2dFunctor
<
DeviceType
::
OPENCL
,
T
>
:
Conv2dFunctorBase
{
Conv2dFunctor
(
const
int
*
strides
,
const
Padding
&
paddings
,
const
int
*
dilations
)
:
Conv2dFunctorBase
(
strides
,
paddings
,
dilations
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
Tensor
*
output
);
};
}
// namespace kernels
}
// namespace mace
...
...
mace/kernels/opencl/cl/conv_2d_3x3.cl
浏览文件 @
29c3f0f7
...
...
@@ -19,86 +19,76 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
const
int
out_hb
=
get_global_id
(
2
)
;
const
int
rounded_in_ch
=
in_ch_blks
*
4
;
DATA_TYPE
4
out0
=
0
;
DATA_TYPE
4
out1
=
0
;
DATA_TYPE
4
out2
=
0
;
DATA_TYPE
4
out3
=
0
;
DATA_TYPE
4
out4
=
0
;
float
4
out0
=
0
;
float
4
out1
=
0
;
float
4
out2
=
0
;
float
4
out3
=
0
;
float
4
out4
=
0
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
#
ifdef
BIAS
out0
=
READ_IMAGET
(
bias,
sampler,
(
int2
)(
out_ch_blk,
0
))
;
convert_float4
(
READ_IMAGET
(
bias,
sampler,
(
int2
)(
out_ch_blk,
0
)
))
;
out1
=
out0
;
out2
=
out0
;
out3
=
out0
;
out4
=
out0
;
#
endif
#
define
DEFINE_IN_WIDTH
(
i
)
\
in_width##i[1]
=
in_width##i[0]
+
1
; \
in_width##i[2]
=
in_width##i[0]
+
2
; \
in_width##i[0]
=
(
in_width##i[0]
<
0
|
| in_width##i[0] >= in_width) ? (INT_MIN) : in_width##i[0]; \
in_width##i[1] = (in_width##i[1] < 0 || in_width##i[1] >= in_width) ? (INT_MIN) : in_width##i[1]; \
in_width##i[2] = (in_width##i[2] < 0 || in_width##i[2] >= in_width) ? (INT_MIN) : in_width##i[2];
int in_width0[3];
int in_width1[3];
int in_width2[3];
int in_width3[3];
int in_width4[3];
in_width0[0] = out_w_blk - padding_left;
in_width1[0] = in_width0[0] + out_w_blks;
in_width2[0] = in_width1[0] + out_w_blks;
in_width3[0] = in_width2[0] + out_w_blks;
in_width4[0] = in_width3[0] + out_w_blks;
DEFINE_IN_WIDTH(0);
DEFINE_IN_WIDTH(1);
DEFINE_IN_WIDTH(2);
DEFINE_IN_WIDTH(3);
DEFINE_IN_WIDTH(4);
#undef DEFINE_IN_WIDTH
const int batch_idx = out_hb / out_height;
const int height_idx = out_hb % out_height;
int in_hb[3];
in_hb[0] = height_idx - padding_top;
in_hb[1] = in_hb[0] + 1;
in_hb[2] = in_hb[1] + 1;
// Judge the height border for padding input.
in_hb[0] = (in_hb[0] < 0 || in_hb[0] >= in_height) ? -1 : in_hb[0] + batch_idx * in_height;
in_hb[1] = (in_hb[1] < 0 || in_hb[1] >= in_height) ? -1 : in_hb[1] + batch_idx * in_height;
in_hb[2] = (in_hb[2] < 0 |
|
in_hb[2]
>=
in_height
)
?
-1
:
in_hb[2]
+
batch_idx
*
in_height
;
const
int
input_image_width
=
in_ch_blks
*
in_width
;
DATA_TYPE4
in0,
in1,
in2,
in3,
in4
;
DATA_TYPE4
weights0,
weights1,
weights2,
weights3
;
#
ifdef
STRIDE_1
int
in_width0
=
out_w_blk
-
padding_left
;
int
in_width1
=
in_width0
+
out_w_blks
;
int
in_width2
=
in_width1
+
out_w_blks
;
int
in_width3
=
in_width2
+
out_w_blks
;
int
in_width4
=
in_width3
+
out_w_blks
;
const
int
height_idx
=
(
out_hb
%
out_height
)
-
padding_top
;
#
else
int
in_width0
=
out_w_blk
*
2
-
padding_left
;
int
in_width1
=
(
out_w_blk
+
out_w_blks
)
*
2
-
padding_left
;
int
in_width2
=
(
out_w_blk
+
2
*
out_w_blks
)
*
2
-
padding_left
;
int
in_width3
=
(
out_w_blk
+
3
*
out_w_blks
)
*
2
-
padding_left
;
int
in_width4
=
(
out_w_blk
+
4
*
out_w_blks
)
*
2
-
padding_left
;
const
int
height_idx
=
(
out_hb
%
out_height
)
*
2
-
padding_top
;
#
endif
const
int
batch_idx
=
(
out_hb
/
out_height
)
*
in_height
;
float4
in0,
in1,
in2,
in3,
in4
;
float4
weights0,
weights1,
weights2,
weights3
;
int
in_idx,
hb_idx,
width_idx,
in_width_idx
;
//
Unrolling
this
loop
hurt
perfmance
for
(
short
in_ch_blk
=
0
; in_ch_blk < in_ch_blks; ++in_ch_blk) {
for
(
short
hb_idx
=
0
; hb_idx < 3; ++
hb_idx) {
for
(
short
hb_idx
=
0
; hb_idx < 3; ++hb_idx) {
for
(
short
width_idx
=
0
; width_idx < 3; ++width_idx) {
in_idx
=
in_ch_blk
*
in_width
;
//
Judge
the
width
border
for
padding
input.
in0
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_idx
+
in_width0[width_idx],
in_hb[hb_idx]
))
;
in1
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_idx
+
in_width1[width_idx],
in_hb[hb_idx]
))
;
in2
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_idx
+
in_width2[width_idx],
in_hb[hb_idx]
))
;
in3
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_idx
+
in_width3[width_idx],
in_hb[hb_idx]
))
;
in4
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_idx
+
in_width4[width_idx],
in_hb[hb_idx]
))
;
int
in_hb_value
=
height_idx
+
hb_idx
;
in_hb_value
=
select
(
in_hb_value
+
batch_idx,
-1
,
(
in_hb_value
<
0
|
| in_hb_value >= in_height));
int in_width_value;
#define READ_INPUT(i) \
in_width_value = in_width##i + width_idx; \
in_width_value = select(in_idx + in_width_value, \
-1, \
(in_width_value < 0 |
|
in_width_value
>=
in_width
))
; \
in##i
=
convert_float4
(
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_width_value,
in_hb_value
)))
;
READ_INPUT
(
0
)
;
READ_INPUT
(
1
)
;
READ_INPUT
(
2
)
;
READ_INPUT
(
3
)
;
READ_INPUT
(
4
)
;
#
undef
READ_INPUT
int
filter_idx
=
(
in_ch_blk
<<
2
)
+
(
hb_idx
*
3
+
width_idx
)
*
rounded_in_ch
;
weights0
=
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_idx
+
0
,
out_ch_blk
))
;
weights1
=
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_idx
+
1
,
out_ch_blk
))
;
weights2
=
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_idx
+
2
,
out_ch_blk
))
;
weights3
=
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_idx
+
3
,
out_ch_blk
))
;
weights0
=
convert_float4
(
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_idx
+
0
,
out_ch_blk
)
))
;
weights1
=
convert_float4
(
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_idx
+
1
,
out_ch_blk
)
))
;
weights2
=
convert_float4
(
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_idx
+
2
,
out_ch_blk
)
))
;
weights3
=
convert_float4
(
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_idx
+
3
,
out_ch_blk
)
))
;
//
Will
prefetch
L2
improve
performance?
How
to
pretch
image
data?
...
...
@@ -131,6 +121,7 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
}
}
#
ifdef
TYPE_FLOAT
const
int
out_x_base
=
out_ch_blk
*
out_width
;
int
w
=
out_w_blk
;
WRITE_IMAGET
(
output,
...
...
@@ -160,4 +151,36 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
w,
out_hb
)
,
out4
)
;
#
else
const
int
out_x_base
=
out_ch_blk
*
out_width
;
int
w
=
out_w_blk
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
w,
out_hb
)
,
convert_half4
(
out0
))
;
w
+=
out_w_blks
;
if
(
w
>=
out_width
)
return
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
w,
out_hb
)
,
convert_half4
(
out1
))
;
w
+=
out_w_blks
;
if
(
w
>=
out_width
)
return
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
w,
out_hb
)
,
convert_half4
(
out2
))
;
w
+=
out_w_blks
;
if
(
w
>=
out_width
)
return
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
w,
out_hb
)
,
convert_half4
(
out3
))
;
w
+=
out_w_blks
;
if
(
w
>=
out_width
)
return
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
w,
out_hb
)
,
convert_half4
(
out4
))
;
#
endif
}
mace/kernels/opencl/conv_2d_opencl.cc
浏览文件 @
29c3f0f7
...
...
@@ -24,8 +24,8 @@ extern void Conv2dOpenclK3x3S2(const Tensor *input, const Tensor *filter,
const
Tensor
*
bias
,
const
int
*
padding
,
Tensor
*
output
);
template
<
>
void
Conv2dFunctor
<
DeviceType
::
OPENCL
,
float
>::
operator
()(
const
Tensor
*
input
,
template
<
typename
T
>
void
Conv2dFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
Tensor
*
output
)
{
...
...
@@ -36,7 +36,7 @@ void Conv2dFunctor<DeviceType::OPENCL, float>::operator()(const Tensor *input,
static
const
Conv2dOpenclFunction
selector
[
5
][
2
]
=
{
{
Conv2dOpenclK1x1S1
,
Conv2dOpenclK1x1S2
},
{
nullptr
,
nullptr
},
{
Conv2dOpenclK3x3S1
,
nullptr
},
{
Conv2dOpenclK3x3S1
,
Conv2dOpenclK3x3S2
},
{
nullptr
,
nullptr
},
{
nullptr
,
nullptr
}};
...
...
@@ -50,7 +50,7 @@ void Conv2dFunctor<DeviceType::OPENCL, float>::operator()(const Tensor *input,
<<
" stride "
<<
strides_
[
0
]
<<
"x"
<<
strides_
[
1
]
<<
" is not implemented yet, using slow version"
;
// TODO(heliangliang) The CPU/NEON kernel should map the buffer
Conv2dFunctor
<
DeviceType
::
CPU
,
float
>
(
strides_
,
paddings_
,
dilations_
)(
Conv2dFunctor
<
DeviceType
::
CPU
,
T
>
(
strides_
,
paddings_
,
dilations_
)(
input
,
filter
,
bias
,
output
);
return
;
}
...
...
@@ -73,5 +73,8 @@ void Conv2dFunctor<DeviceType::OPENCL, float>::operator()(const Tensor *input,
conv2d_func
(
input
,
filter
,
bias
,
paddings
.
data
(),
output
);
}
template
struct
Conv2dFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
Conv2dFunctor
<
DeviceType
::
OPENCL
,
half
>;
}
// namespace kernels
}
// namespace mace
mace/kernels/opencl/conv_2d_opencl_3x3.cc
浏览文件 @
29c3f0f7
...
...
@@ -25,9 +25,10 @@ static void Conv2d3x3S12(const Tensor *input, const Tensor *filter,
const
index_t
width_blocks
=
RoundUpDiv
<
index_t
,
5
>
(
width
);
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DataTypeToCLType
(
input
->
dtype
())
);
built_options
.
emplace
(
input
->
dtype
()
==
DT_FLOAT
?
"-DTYPE_FLOAT"
:
""
);
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DataTypeToOPENCLCMDDataType
(
input
->
dtype
()));
built_options
.
emplace
(
bias
!=
nullptr
?
"-DBIAS"
:
""
);
built_options
.
emplace
(
stride
==
1
?
"-DSTRIDE_1"
:
""
);
auto
runtime
=
OpenCLRuntime
::
Get
();
auto
program
=
runtime
->
program
();
...
...
@@ -68,6 +69,7 @@ void Conv2dOpenclK3x3S1(const Tensor *input, const Tensor *filter,
void
Conv2dOpenclK3x3S2
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
*
padding
,
Tensor
*
output
)
{
Conv2d3x3S12
(
input
,
filter
,
bias
,
2
,
padding
,
output
);
};
}
// namespace kernels
...
...
mace/ops/conv_2d.cc
浏览文件 @
29c3f0f7
...
...
@@ -11,6 +11,11 @@ REGISTER_CPU_OPERATOR(OpKeyBuilder("Conv2D")
.
Build
(),
Conv2dOp
<
DeviceType
::
CPU
,
float
>
);
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"Conv2D"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
Conv2dOp
<
DeviceType
::
CPU
,
half
>
);
#if __ARM_NEON
REGISTER_NEON_OPERATOR
(
OpKeyBuilder
(
"Conv2D"
)
.
TypeConstraint
<
float
>
(
"T"
)
...
...
@@ -23,4 +28,9 @@ REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Conv2D")
.
Build
(),
Conv2dOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Conv2D"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
Conv2dOp
<
DeviceType
::
OPENCL
,
half
>
);
}
// namespace mace
mace/ops/conv_2d_benchmark.cc
浏览文件 @
29c3f0f7
...
...
@@ -33,9 +33,9 @@ static void Conv2d(int iters,
net
.
AddRandomInput
<
D
,
T
>
(
"Bias"
,
{
output_channels
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
...
...
@@ -44,6 +44,7 @@ static void Conv2d(int iters,
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntArg
(
"padding"
,
padding
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
...
...
@@ -88,43 +89,9 @@ static void Conv2d(int iters,
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE)
#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, OPENCL);
// ICNet
BM_CONV_2D
(
1
,
512
,
15
,
15
,
1
,
1
,
1
,
VALID
,
1024
,
float
);
BM_CONV_2D
(
1
,
128
,
60
,
60
,
3
,
3
,
1
,
VALID
,
128
,
float
);
// SNPE GPU ExecutionDuration = 448us, % ALU Utilization = 105
BM_CONV_2D
(
1
,
64
,
60
,
60
,
1
,
1
,
1
,
VALID
,
128
,
float
);
// SNPE GPU ExecutionDuration = 258us, % ALU Utilization = 108
BM_CONV_2D
(
1
,
32
,
60
,
60
,
1
,
1
,
1
,
VALID
,
128
,
float
);
// SNPE GPU ExecutionDuration = 506us, % ALU Utilization = 106.8
BM_CONV_2D
(
1
,
32
,
60
,
60
,
3
,
3
,
1
,
SAME
,
32
,
float
);
// Test RGB <-> YUV
BM_CONV_2D
(
1
,
3
,
2160
,
1080
,
1
,
1
,
1
,
VALID
,
3
,
float
);
BM_CONV_2D
(
1
,
3
,
480
,
480
,
1
,
1
,
1
,
VALID
,
3
,
float
);
BM_CONV_2D
(
1
,
32
,
60
,
60
,
3
,
3
,
1
,
SAME
,
32
,
half
);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
1
,
1
,
1
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
33
,
31
,
1
,
1
,
1
,
VALID
,
128
,
float
);
// Test bad alignments
BM_CONV_2D
(
1
,
3
,
512
,
512
,
1
,
1
,
1
,
VALID
,
3
,
float
);
BM_CONV_2D
(
1
,
32
,
112
,
112
,
1
,
1
,
1
,
VALID
,
64
,
float
);
BM_CONV_2D
(
1
,
64
,
56
,
56
,
1
,
1
,
1
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
256
,
28
,
28
,
1
,
1
,
1
,
VALID
,
256
,
float
);
BM_CONV_2D
(
1
,
1024
,
7
,
7
,
1
,
1
,
1
,
VALID
,
1024
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
3
,
3
,
1
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
33
,
31
,
3
,
3
,
1
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
3
,
512
,
512
,
3
,
3
,
1
,
VALID
,
3
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
3
,
3
,
1
,
SAME
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
33
,
31
,
3
,
3
,
1
,
SAME
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
3
,
3
,
2
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
3
,
512
,
512
,
3
,
3
,
2
,
VALID
,
3
,
float
);
BM_CONV_2D
(
1
,
64
,
33
,
31
,
3
,
3
,
2
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
3
,
3
,
2
,
SAME
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
33
,
31
,
3
,
3
,
2
,
SAME
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
5
,
5
,
1
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
31
,
5
,
5
,
1
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
5
,
5
,
1
,
SAME
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
31
,
5
,
5
,
1
,
SAME
,
128
,
float
);
}
// namespace mace
mace/ops/conv_2d_test.cc
浏览文件 @
29c3f0f7
...
...
@@ -98,9 +98,9 @@ void TestNHWCSimple3x3VALID() {
net
.
AddInputFromArray
<
D
,
T
>
(
"Bias"
,
{
1
},
{
0.1
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
...
...
@@ -109,12 +109,13 @@ void TestNHWCSimple3x3VALID() {
.
AddIntsArg
(
"strides"
,
{
1
,
1
})
.
AddIntArg
(
"padding"
,
Padding
::
VALID
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
...
...
@@ -125,13 +126,14 @@ void TestNHWCSimple3x3VALID() {
.
AddIntsArg
(
"strides"
,
{
1
,
1
})
.
AddIntArg
(
"padding"
,
Padding
::
VALID
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
}
auto
expected
=
CreateTensor
<
T
>
({
1
,
1
,
1
,
1
},
{
18.1
f
});
ExpectTensorNear
<
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.0
01
);
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
1
,
1
},
{
18.1
f
});
ExpectTensorNear
<
float
,
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.
01
);
}
template
<
DeviceType
D
,
typename
T
>
...
...
@@ -149,9 +151,9 @@ void TestNHWCSimple3x3SAME() {
net
.
AddInputFromArray
<
D
,
T
>
(
"Bias"
,
{
1
},
{
0.1
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
...
...
@@ -160,12 +162,13 @@ void TestNHWCSimple3x3SAME() {
.
AddIntsArg
(
"strides"
,
{
1
,
1
})
.
AddIntArg
(
"padding"
,
Padding
::
SAME
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
...
...
@@ -176,26 +179,31 @@ void TestNHWCSimple3x3SAME() {
.
AddIntsArg
(
"strides"
,
{
1
,
1
})
.
AddIntArg
(
"padding"
,
Padding
::
SAME
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
}
auto
expected
=
CreateTensor
<
T
>
(
auto
expected
=
CreateTensor
<
float
>
(
{
1
,
3
,
3
,
1
},
{
8.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
18.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
8.1
f
});
ExpectTensorNear
<
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.0
01
);
ExpectTensorNear
<
float
,
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.
01
);
}
TEST_F
(
Conv2dOpTest
,
CPUSimple
)
{
TestNHWCSimple3x3VALID
<
DeviceType
::
CPU
,
float
>
();
TestNHWCSimple3x3VALID
<
DeviceType
::
CPU
,
half
>
();
TestNHWCSimple3x3SAME
<
DeviceType
::
CPU
,
float
>
();
TestNHWCSimple3x3SAME
<
DeviceType
::
CPU
,
half
>
();
}
TEST_F
(
Conv2dOpTest
,
OPENCLSimple
)
{
TestNHWCSimple3x3VALID
<
DeviceType
::
OPENCL
,
float
>
();
TestNHWCSimple3x3VALID
<
DeviceType
::
OPENCL
,
half
>
();
TestNHWCSimple3x3SAME
<
DeviceType
::
OPENCL
,
float
>
();
TestNHWCSimple3x3SAME
<
DeviceType
::
OPENCL
,
half
>
();
}
template
<
DeviceType
D
>
...
...
@@ -233,22 +241,22 @@ TEST_F(Conv2dOpTest, NEONWithouBias) {
TestSimple3x3WithoutBias
<
DeviceType
::
NEON
>
();
}
template
<
DeviceType
D
>
template
<
DeviceType
D
,
typename
T
>
void
TestNHWCSimple3x3WithoutBias
()
{
OpsTestNet
net
;
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
net
.
AddInputFromArray
<
D
,
T
>
(
"Input"
,
{
1
,
3
,
3
,
2
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
D
,
float
>
(
net
.
AddInputFromArray
<
D
,
T
>
(
"Filter"
,
{
3
,
3
,
2
,
1
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -257,11 +265,12 @@ void TestNHWCSimple3x3WithoutBias() {
.
AddIntsArg
(
"strides"
,
{
1
,
1
})
.
AddIntArg
(
"padding"
,
Padding
::
VALID
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"Input"
)
...
...
@@ -270,6 +279,7 @@ void TestNHWCSimple3x3WithoutBias() {
.
AddIntsArg
(
"strides"
,
{
1
,
1
})
.
AddIntArg
(
"padding"
,
Padding
::
VALID
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run
...
...
@@ -279,15 +289,17 @@ void TestNHWCSimple3x3WithoutBias() {
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
1
,
1
},
{
18.0
f
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.0
01
);
ExpectTensorNear
<
float
,
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.
01
);
}
TEST_F
(
Conv2dOpTest
,
CPUWithoutBias
)
{
TestNHWCSimple3x3WithoutBias
<
DeviceType
::
CPU
>
();
TestNHWCSimple3x3WithoutBias
<
DeviceType
::
CPU
,
float
>
();
TestNHWCSimple3x3WithoutBias
<
DeviceType
::
CPU
,
half
>
();
}
TEST_F
(
Conv2dOpTest
,
OPENCLWithoutBias
)
{
TestNHWCSimple3x3WithoutBias
<
DeviceType
::
OPENCL
>
();
TestNHWCSimple3x3WithoutBias
<
DeviceType
::
OPENCL
,
float
>
();
TestNHWCSimple3x3WithoutBias
<
DeviceType
::
OPENCL
,
half
>
();
}
template
<
DeviceType
D
>
...
...
@@ -333,27 +345,27 @@ TEST_F(Conv2dOpTest, NEONCombined) {
TestCombined3x3
<
DeviceType
::
NEON
>
();
}
template
<
DeviceType
D
>
template
<
DeviceType
D
,
typename
T
>
static
void
TestNHWCCombined3x3
()
{
// Construct graph
OpsTestNet
net
;
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
net
.
AddInputFromArray
<
D
,
T
>
(
"Input"
,
{
1
,
5
,
5
,
2
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
D
,
float
>
(
net
.
AddInputFromArray
<
D
,
T
>
(
"Filter"
,
{
3
,
3
,
2
,
2
},
{
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
});
net
.
AddInputFromArray
<
D
,
T
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -363,11 +375,12 @@ static void TestNHWCCombined3x3() {
.
AddIntsArg
(
"strides"
,
{
2
,
2
})
.
AddIntArg
(
"padding"
,
Padding
::
SAME
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
ImageToBuffer
<
D
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
.
Input
(
"Input"
)
...
...
@@ -377,6 +390,7 @@ static void TestNHWCCombined3x3() {
.
AddIntsArg
(
"strides"
,
{
2
,
2
})
.
AddIntArg
(
"padding"
,
Padding
::
SAME
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
...
...
@@ -388,17 +402,33 @@ static void TestNHWCCombined3x3() {
{
1
,
3
,
3
,
2
},
{
8.1
f
,
4.2
f
,
12.1
f
,
6.2
f
,
8.1
f
,
4.2
f
,
12.1
f
,
6.2
f
,
18.1
f
,
9.2
f
,
12.1
f
,
6.2
f
,
8.1
f
,
4.2
f
,
12.1
f
,
6.2
f
,
8.1
f
,
4.2
f
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
ExpectTensorNear
<
float
,
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
}
TEST_F
(
Conv2dOpTest
,
CPUStride2
)
{
TestNHWCCombined3x3
<
DeviceType
::
CPU
,
float
>
();
TestNHWCCombined3x3
<
DeviceType
::
CPU
,
half
>
();
}
TEST_F
(
Conv2dOpTest
,
CPUCombined
)
{
TestNHWCCombined3x3
<
DeviceType
::
CPU
>
();
TEST_F
(
Conv2dOpTest
,
OPENCLStride2
)
{
TestNHWCCombined3x3
<
DeviceType
::
OPENCL
,
float
>
();
TestNHWCCombined3x3
<
DeviceType
::
OPENCL
,
half
>
();
}
template
<
DeviceType
D
>
void
TestConv1x1
()
{
// Construct graph
OpsTestNet
net
;
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
AddIntsArg
(
"strides"
,
{
1
,
1
})
.
AddIntArg
(
"padding"
,
Padding
::
VALID
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
...
...
@@ -415,39 +445,8 @@ void TestConv1x1() {
{
1.0
f
,
2.0
f
,
1.0
f
,
2.0
f
,
1.0
f
,
2.0
f
,
1.0
f
,
2.0
f
,
1.0
f
,
2.0
f
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
});
// Construct graph
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
.
Input
(
"BiasImage"
)
.
Output
(
"OutputImage"
)
.
AddIntsArg
(
"strides"
,
{
1
,
1
})
.
AddIntArg
(
"padding"
,
Padding
::
VALID
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
AddIntsArg
(
"strides"
,
{
1
,
1
})
.
AddIntArg
(
"padding"
,
Padding
::
VALID
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
}
// Check
auto
expected
=
CreateTensor
<
float
>
(
...
...
@@ -466,11 +465,11 @@ TEST_F(Conv2dOpTest, CPUConv1x1) {
TestConv1x1
<
DeviceType
::
CPU
>
();
}
TEST_F
(
Conv2dOpTest
,
OPENCLConv1x1
)
{
TestConv1x1
<
DeviceType
::
OPENCL
>
();
}
//
TEST_F(Conv2dOpTest, OPENCLConv1x1) {
//
TestConv1x1<DeviceType::OPENCL>();
//
}
template
<
DeviceType
D
>
template
<
DeviceType
D
,
typename
T
>
static
void
TestComplexConvNxNS12
(
const
std
::
vector
<
index_t
>
&
shape
)
{
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
...
...
@@ -478,7 +477,6 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
srand
(
time
(
NULL
));
// generate random input
// TODO test all sizes
index_t
batch
=
3
+
(
rand
()
%
10
);
index_t
height
=
shape
[
0
];
index_t
width
=
shape
[
1
];
...
...
@@ -494,13 +492,14 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
})
.
AddIntArg
(
"padding"
,
type
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
input_channels
});
net
.
AddRandomInput
<
D
,
float
>
(
net
.
AddRandomInput
<
D
,
T
>
(
"Input"
,
{
batch
,
height
,
width
,
input_channels
});
net
.
AddRandomInput
<
D
,
T
>
(
"Filter"
,
{
kernel_h
,
kernel_w
,
input_channels
,
output_channels
});
net
.
AddRandomInput
<
D
,
float
>
(
"Bias"
,
{
output_channels
});
net
.
AddRandomInput
<
D
,
T
>
(
"Bias"
,
{
output_channels
});
// run on cpu
net
.
RunOp
();
...
...
@@ -509,9 +508,9 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run on gpu
BufferToImage
<
D
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -521,16 +520,17 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
})
.
AddIntArg
(
"padding"
,
type
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run on device
net
.
RunOp
(
D
);
ImageToBuffer
<
D
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.001
);
};
for
(
int
kernel_size
:
{
1
,
3
})
{
for
(
int
stride
:
{
1
})
{
for
(
int
stride
:
{
1
,
2
})
{
func
(
kernel_size
,
kernel_size
,
stride
,
stride
,
VALID
);
func
(
kernel_size
,
kernel_size
,
stride
,
stride
,
SAME
);
}
...
...
@@ -538,9 +538,97 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
}
TEST_F
(
Conv2dOpTest
,
OPENCLAlignedConvNxNS12
)
{
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
,
64
,
128
});
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
32
,
32
,
64
,
128
});
}
TEST_F
(
Conv2dOpTest
,
OPENCLUnalignedConvNxNS12
)
{
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
107
,
113
,
5
,
7
});
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
107
,
113
,
5
,
7
});
}
template
<
DeviceType
D
,
typename
T
>
static
void
TestHalfComplexConvNxNS12
(
const
std
::
vector
<
index_t
>
&
shape
)
{
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
Padding
type
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
3
+
(
rand
()
%
10
);
index_t
height
=
shape
[
0
];
index_t
width
=
shape
[
1
];
index_t
input_channels
=
shape
[
2
]
+
(
rand
()
%
10
);
index_t
output_channels
=
shape
[
3
]
+
(
rand
()
%
10
);
// Construct graph
OpsTestNet
net
;
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
})
.
AddIntArg
(
"padding"
,
type
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
std
::
vector
<
float
>
float_input_data
;
GenerateRandomRealTypeData
({
batch
,
height
,
width
,
input_channels
},
float_input_data
);
std
::
vector
<
float
>
float_filter_data
;
GenerateRandomRealTypeData
({
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
float_filter_data
);
std
::
vector
<
float
>
float_bias_data
;
GenerateRandomRealTypeData
({
output_channels
},
float_bias_data
);
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
input_channels
},
float_input_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
float_filter_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
output_channels
},
float_bias_data
);
// run on cpu
net
.
RunOp
();
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
std
::
vector
<
half
>
input_data
(
float_input_data
.
begin
(),
float_input_data
.
end
());
std
::
vector
<
half
>
filter_data
(
float_filter_data
.
begin
(),
float_filter_data
.
end
());
std
::
vector
<
half
>
bias_data
(
float_bias_data
.
begin
(),
float_bias_data
.
end
());
net
.
AddInputFromArray
<
D
,
T
>
(
"InputHalf"
,
{
batch
,
height
,
width
,
input_channels
},
input_data
);
net
.
AddInputFromArray
<
D
,
T
>
(
"FilterHalf"
,
{
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
filter_data
);
net
.
AddInputFromArray
<
D
,
T
>
(
"BiasHalf"
,
{
output_channels
},
bias_data
);
// run on gpu
BufferToImage
<
D
,
T
>
(
net
,
"InputHalf"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"FilterHalf"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"BiasHalf"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
.
Input
(
"BiasImage"
)
.
Output
(
"OutputImage"
)
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
})
.
AddIntArg
(
"padding"
,
type
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run on device
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
,
T
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1.0
);
};
for
(
int
kernel_size
:
{
3
})
{
for
(
int
stride
:
{
1
})
{
func
(
kernel_size
,
kernel_size
,
stride
,
stride
,
VALID
);
}
}
}
// TODO: support half input & float computation
//TEST_F(Conv2dOpTest, OPENCLHalfAlignedConvNxNS12) {
// TestHalfComplexConvNxNS12<DeviceType::OPENCL, half>({32, 32, 64, 128});
//}
mace/ops/ops_test_util.h
浏览文件 @
29c3f0f7
...
...
@@ -210,13 +210,17 @@ void GenerateRandomRealTypeData(const std::vector<index_t> &shape,
std
::
vector
<
T
>
&
res
)
{
std
::
random_device
rd
;
std
::
mt19937
gen
(
rd
());
std
::
normal_distribution
<
T
>
nd
(
0
,
1
);
std
::
normal_distribution
<
float
>
nd
(
0
,
1
);
index_t
size
=
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
std
::
multiplies
<
index_t
>
());
res
.
resize
(
size
);
if
(
DataTypeToEnum
<
T
>::
value
==
DT_HALF
)
{
std
::
generate
(
res
.
begin
(),
res
.
end
(),
[
&
gen
,
&
nd
]
{
return
half_float
::
half_cast
<
half
>
(
nd
(
gen
));
});
}
else
{
std
::
generate
(
res
.
begin
(),
res
.
end
(),
[
&
gen
,
&
nd
]
{
return
nd
(
gen
);
});
}
}
template
<
typename
T
>
...
...
@@ -290,39 +294,40 @@ inline void ExpectEqual<double>(const double &a, const double &b) {
EXPECT_DOUBLE_EQ
(
a
,
b
);
}
inline
void
AssertSameTypeDims
(
const
Tensor
&
x
,
const
Tensor
&
y
)
{
ASSERT_EQ
(
x
.
dtype
(),
y
.
dtype
());
inline
void
AssertSameDims
(
const
Tensor
&
x
,
const
Tensor
&
y
)
{
ASSERT_TRUE
(
IsSameSize
(
x
,
y
))
<<
"x.shape ["
<<
ShapeToString
(
x
)
<<
"] vs "
<<
"y.shape [ "
<<
ShapeToString
(
y
)
<<
"]"
;
}
template
<
typename
T
,
bool
is_fp
=
is_floating_point_type
<
T
>
::
value
>
template
<
typename
EXP_TYPE
,
typename
RES_TYPE
,
bool
is_fp
=
is_floating_point_type
<
EXP_TYPE
>
::
value
>
struct
Expector
;
// Partial specialization for float and double.
template
<
typename
T
>
struct
Expector
<
T
,
true
>
{
static
void
Equal
(
const
T
&
a
,
const
T
&
b
)
{
ExpectEqual
(
a
,
b
);
}
template
<
typename
EXP_TYPE
,
typename
RES_TYPE
>
struct
Expector
<
EXP_TYPE
,
RES_TYPE
,
true
>
{
static
void
Equal
(
const
EXP_TYPE
&
a
,
const
RES_TYPE
&
b
)
{
ExpectEqual
(
a
,
b
);
}
static
void
Equal
(
const
Tensor
&
x
,
const
Tensor
&
y
)
{
ASSERT_EQ
(
x
.
dtype
(),
DataTypeToEnum
<
T
>::
v
());
AssertSameTypeDims
(
x
,
y
);
ASSERT_EQ
(
x
.
dtype
(),
DataTypeToEnum
<
EXP_TYPE
>::
v
());
ASSERT_EQ
(
y
.
dtype
(),
DataTypeToEnum
<
RES_TYPE
>::
v
());
AssertSameDims
(
x
,
y
);
Tensor
::
MappingGuard
x_mapper
(
&
x
);
Tensor
::
MappingGuard
y_mapper
(
&
y
);
auto
a
=
x
.
data
<
T
>
();
auto
b
=
y
.
data
<
T
>
();
auto
a
=
x
.
data
<
EXP_TYPE
>
();
auto
b
=
y
.
data
<
RES_TYPE
>
();
for
(
int
i
=
0
;
i
<
x
.
size
();
++
i
)
{
ExpectEqual
(
a
(
i
),
b
(
i
));
}
}
static
void
Near
(
const
Tensor
&
x
,
const
Tensor
&
y
,
const
double
abs_err
)
{
ASSERT_EQ
(
x
.
dtype
(),
DataTypeToEnum
<
T
>::
v
());
AssertSameTypeDims
(
x
,
y
);
ASSERT_EQ
(
x
.
dtype
(),
DataTypeToEnum
<
EXP_TYPE
>::
v
());
ASSERT_EQ
(
y
.
dtype
(),
DataTypeToEnum
<
RES_TYPE
>::
v
());
AssertSameDims
(
x
,
y
);
Tensor
::
MappingGuard
x_mapper
(
&
x
);
Tensor
::
MappingGuard
y_mapper
(
&
y
);
auto
a
=
x
.
data
<
T
>
();
auto
b
=
y
.
data
<
T
>
();
auto
a
=
x
.
data
<
EXP_TYPE
>
();
auto
b
=
y
.
data
<
RES_TYPE
>
();
for
(
int
i
=
0
;
i
<
x
.
size
();
++
i
)
{
EXPECT_NEAR
(
a
[
i
],
b
[
i
],
abs_err
)
<<
"a = "
<<
a
<<
" b = "
<<
b
<<
" index = "
<<
i
;
...
...
@@ -335,10 +340,25 @@ template <typename T>
void
ExpectTensorNear
(
const
Tensor
&
x
,
const
Tensor
&
y
,
const
double
abs_err
)
{
static_assert
(
is_floating_point_type
<
T
>::
value
,
"T is not a floating point type"
);
Expector
<
T
>::
Near
(
x
,
y
,
abs_err
);
Expector
<
T
,
T
>::
Near
(
x
,
y
,
abs_err
);
}
template
<
typename
EXP_TYPE
,
typename
RES_TYPE
>
void
ExpectTensorNear
(
const
Tensor
&
x
,
const
Tensor
&
y
,
const
double
abs_err
)
{
static_assert
(
is_floating_point_type
<
EXP_TYPE
>::
value
&&
is_floating_point_type
<
RES_TYPE
>::
value
,
"T is not a floating point type"
);
Expector
<
EXP_TYPE
,
RES_TYPE
>::
Near
(
x
,
y
,
abs_err
);
}
template
<
typename
T
>
std
::
string
ToString
(
const
T
&
input
)
{
std
::
stringstream
ss
;
ss
<<
input
;
return
ss
.
str
();
}
template
<
DeviceType
D
>
template
<
DeviceType
D
,
typename
T
>
void
BufferToImage
(
OpsTestNet
&
net
,
const
std
::
string
&
input_name
,
const
std
::
string
&
output_name
,
...
...
@@ -347,6 +367,7 @@ void BufferToImage(OpsTestNet &net,
.
Input
(
input_name
)
.
Output
(
output_name
)
.
AddIntArg
(
"buffer_type"
,
type
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run
...
...
@@ -355,7 +376,7 @@ void BufferToImage(OpsTestNet &net,
net
.
Sync
();
}
template
<
DeviceType
D
>
template
<
DeviceType
D
,
typename
T
>
void
ImageToBuffer
(
OpsTestNet
&
net
,
const
std
::
string
&
input_name
,
const
std
::
string
&
output_name
,
...
...
@@ -364,6 +385,7 @@ void ImageToBuffer(OpsTestNet &net,
.
Input
(
input_name
)
.
Output
(
output_name
)
.
AddIntArg
(
"buffer_type"
,
type
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录