Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
24c46a1a
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
24c46a1a
编写于
10月 28, 2019
作者:
叶
叶剑武
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support not equal strides in opencl conv
上级
c2c0a2d3
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
236 addition
and
81 deletion
+236
-81
mace/ops/common/conv_pool_2d_util.cc
mace/ops/common/conv_pool_2d_util.cc
+2
-1
mace/ops/opencl/cl/conv_2d.cl
mace/ops/opencl/cl/conv_2d.cl
+5
-4
mace/ops/opencl/cl/conv_2d_1x1.cl
mace/ops/opencl/cl/conv_2d_1x1.cl
+5
-4
mace/ops/opencl/cl/conv_2d_3x3.cl
mace/ops/opencl/cl/conv_2d_3x3.cl
+5
-4
mace/ops/opencl/image/conv_2d.cc
mace/ops/opencl/image/conv_2d.cc
+4
-2
mace/ops/opencl/image/conv_2d.h
mace/ops/opencl/image/conv_2d.h
+6
-3
mace/ops/opencl/image/conv_2d_1x1.cc
mace/ops/opencl/image/conv_2d_1x1.cc
+4
-2
mace/ops/opencl/image/conv_2d_3x3.cc
mace/ops/opencl/image/conv_2d_3x3.cc
+4
-2
mace/ops/opencl/image/conv_2d_general.cc
mace/ops/opencl/image/conv_2d_general.cc
+4
-2
test/ccunit/mace/ops/conv_2d_test.cc
test/ccunit/mace/ops/conv_2d_test.cc
+197
-57
未找到文件。
mace/ops/common/conv_pool_2d_util.cc
浏览文件 @
24c46a1a
...
...
@@ -76,7 +76,8 @@ void CalcPaddingAndOutputSize(const index_t *input_shape,
output_height
=
(
input_height
-
k_extent_height
)
/
strides
[
0
]
+
1
;
output_width
=
(
input_width
-
k_extent_width
)
/
strides
[
1
]
+
1
;
break
;
case
SAME
:
output_height
=
(
input_height
-
1
)
/
strides
[
0
]
+
1
;
case
SAME
:
output_height
=
(
input_height
-
1
)
/
strides
[
0
]
+
1
;
output_width
=
(
input_width
-
1
)
/
strides
[
1
]
+
1
;
break
;
case
FULL
:
...
...
mace/ops/opencl/cl/conv_2d.cl
浏览文件 @
24c46a1a
...
...
@@ -17,7 +17,8 @@ __kernel void conv_2d(OUT_OF_RANGE_PARAMS
__private
const
int
out_width,
__private
const
int
filter_height,
__private
const
int
filter_width,
__private
const
int
stride,
__private
const
int
stride_h,
__private
const
int
stride_w,
__private
const
int
padding_top,
__private
const
int
padding_left,
__private
const
int
dilation_h,
...
...
@@ -47,12 +48,12 @@ __kernel void conv_2d(OUT_OF_RANGE_PARAMS
DATA_TYPE4 out3 = 0;
#endif
int in_width_stride = mul24(out_w_blks, stride);
int in_width0 = mad24(out_w_blk, stride, -padding_left);
int in_width_stride = mul24(out_w_blks, stride
_w
);
int in_width0 = mad24(out_w_blk, stride
_w
, -padding_left);
int in_width1 = in_width0 + in_width_stride;
int in_width2 = in_width1 + in_width_stride;
int in_width3 = in_width2 + in_width_stride;
const int height_start = mad24((out_hb % out_height), stride, -padding_top);
const int height_start = mad24((out_hb % out_height), stride
_h
, -padding_top);
int in_height_gap = select(
0,
(-height_start + dilation_h - 1) / dilation_h,
...
...
mace/ops/opencl/cl/conv_2d_1x1.cl
浏览文件 @
24c46a1a
...
...
@@ -15,7 +15,8 @@ __kernel void conv_2d_1x1(OUT_OF_RANGE_PARAMS
__private
const
int
in_ch_blks,
__private
const
int
height,
__private
const
int
width,
__private
const
int
stride
)
{
__private
const
int
stride_h,
__private
const
int
stride_w
)
{
const
int
out_ch_blk
=
get_global_id
(
0
)
;
const
int
out_w_blk
=
get_global_id
(
1
)
;
const
int
out_hb
=
get_global_id
(
2
)
;
...
...
@@ -41,14 +42,14 @@ __kernel void conv_2d_1x1(OUT_OF_RANGE_PARAMS
#endif
int4 w;
int in_width_stride = mul24(out_w_blks, stride);
w.x = mul24(out_w_blk, stride);
int in_width_stride = mul24(out_w_blks, stride
_w
);
w.x = mul24(out_w_blk, stride
_w
);
w.y = w.x + in_width_stride;
w.z = w.y + in_width_stride;
w.w = w.z + in_width_stride;
int batch = out_hb / height;
int h_idx = out_hb - mul24(batch, height);
int out_hb_idx = mul24(h_idx, stride);
int out_hb_idx = mul24(h_idx, stride
_h
);
w.x = select(w.x, INT_MIN, w.x >= in_width);
w.y = select(w.y, INT_MIN, w.y >= in_width);
...
...
mace/ops/opencl/cl/conv_2d_3x3.cl
浏览文件 @
24c46a1a
...
...
@@ -15,7 +15,8 @@ __kernel void conv_2d_3x3(OUT_OF_RANGE_PARAMS
__private
const
int
in_ch_blks,
__private
const
int
out_height,
__private
const
int
out_width,
__private
const
int
stride,
__private
const
int
stride_h,
__private
const
int
stride_w,
__private
const
int
padding_top,
__private
const
int
padding_left,
__private
const
int
dilation_h,
...
...
@@ -47,13 +48,13 @@ __kernel void conv_2d_3x3(OUT_OF_RANGE_PARAMS
DATA_TYPE4 out4 = 0;
#endif
int in_width_stride = mul24(out_w_blks, stride);
int in_width0 = mad24(out_w_blk, stride, -padding_left);
int in_width_stride = mul24(out_w_blks, stride
_w
);
int in_width0 = mad24(out_w_blk, stride
_w
, -padding_left);
int in_width1 = in_width0 + in_width_stride;
int in_width2 = in_width1 + in_width_stride;
int in_width3 = in_width2 + in_width_stride;
int in_width4 = in_width3 + in_width_stride;
const int height_start = mad24((out_hb % out_height), stride, -padding_top);
const int height_start = mad24((out_hb % out_height), stride
_h
, -padding_top);
int in_height_gap = select(
0,
(-height_start + dilation_h - 1) / dilation_h,
...
...
mace/ops/opencl/image/conv_2d.cc
浏览文件 @
24c46a1a
...
...
@@ -73,8 +73,7 @@ MaceStatus Conv2dKernel::Compute(
Tensor
*
output
)
{
index_t
kernel_h
=
filter
->
dim
(
2
);
index_t
kernel_w
=
filter
->
dim
(
3
);
if
(
strides
[
0
]
!=
strides
[
1
]
||
(
dilations
[
0
]
>
1
&&
(
strides
[
0
]
>
1
||
kernel_h
==
1
)))
{
if
(
dilations
[
0
]
>
1
&&
(
strides
[
0
]
>
1
||
kernel_h
==
1
))
{
LOG
(
WARNING
)
<<
"OpenCL conv2d kernel with "
<<
"filter"
<<
kernel_h
<<
"x"
<<
kernel_w
<<
","
<<
" stride "
<<
strides
[
0
]
<<
"x"
<<
strides
[
1
]
...
...
@@ -131,6 +130,7 @@ MaceStatus Conv2dKernel::Compute(
filter
,
bias
,
strides
[
0
],
strides
[
1
],
paddings
.
data
(),
dilations
,
activation
,
...
...
@@ -148,6 +148,7 @@ MaceStatus Conv2dKernel::Compute(
filter
,
bias
,
strides
[
0
],
strides
[
1
],
paddings
.
data
(),
dilations
,
activation
,
...
...
@@ -165,6 +166,7 @@ MaceStatus Conv2dKernel::Compute(
filter
,
bias
,
strides
[
0
],
strides
[
1
],
paddings
.
data
(),
dilations
,
activation
,
...
...
mace/ops/opencl/image/conv_2d.h
浏览文件 @
24c46a1a
...
...
@@ -33,7 +33,8 @@ extern MaceStatus Conv2dK1x1(OpContext *context,
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
stride
,
const
int
stride_h
,
const
int
stride_w
,
const
int
*
padding
,
const
int
*
dilations
,
const
ActivationType
activation
,
...
...
@@ -48,7 +49,8 @@ extern MaceStatus Conv2dK3x3(OpContext *context,
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
stride
,
const
int
stride_h
,
const
int
stride_w
,
const
int
*
padding
,
const
int
*
dilations
,
const
ActivationType
activation
,
...
...
@@ -63,7 +65,8 @@ extern MaceStatus Conv2d(OpContext *context,
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
stride
,
const
int
stride_h
,
const
int
stride_w
,
const
int
*
padding
,
const
int
*
dilations
,
const
ActivationType
activation
,
...
...
mace/ops/opencl/image/conv_2d_1x1.cc
浏览文件 @
24c46a1a
...
...
@@ -71,7 +71,8 @@ MaceStatus Conv2dK1x1(OpContext *context,
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
stride
,
const
int
stride_h
,
const
int
stride_w
,
const
int
*
padding
,
const
int
*
dilations
,
const
ActivationType
activation
,
...
...
@@ -170,7 +171,8 @@ MaceStatus Conv2dK1x1(OpContext *context,
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input_channel_blocks
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
height
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
width
));
kernel
->
setArg
(
idx
++
,
stride
);
kernel
->
setArg
(
idx
++
,
stride_h
);
kernel
->
setArg
(
idx
++
,
stride_w
);
*
prev_input_shape
=
input
->
shape
();
}
...
...
mace/ops/opencl/image/conv_2d_3x3.cc
浏览文件 @
24c46a1a
...
...
@@ -64,7 +64,8 @@ MaceStatus Conv2dK3x3(OpContext *context,
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
stride
,
const
int
stride_h
,
const
int
stride_w
,
const
int
*
padding
,
const
int
*
dilations
,
const
ActivationType
activation
,
...
...
@@ -154,7 +155,8 @@ MaceStatus Conv2dK3x3(OpContext *context,
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input_channel_blocks
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
height
));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
width
));
kernel
->
setArg
(
idx
++
,
stride
);
kernel
->
setArg
(
idx
++
,
stride_h
);
kernel
->
setArg
(
idx
++
,
stride_w
);
kernel
->
setArg
(
idx
++
,
padding
[
0
]
/
2
);
kernel
->
setArg
(
idx
++
,
padding
[
1
]
/
2
);
kernel
->
setArg
(
idx
++
,
dilations
[
0
]);
...
...
mace/ops/opencl/image/conv_2d_general.cc
浏览文件 @
24c46a1a
...
...
@@ -72,7 +72,8 @@ MaceStatus Conv2d(OpContext *context,
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
stride
,
const
int
stride_h
,
const
int
stride_w
,
const
int
*
padding
,
const
int
*
dilations
,
const
ActivationType
activation
,
...
...
@@ -164,7 +165,8 @@ MaceStatus Conv2d(OpContext *context,
kernel
->
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
width
));
kernel
->
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
filter
->
dim
(
2
)));
kernel
->
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
filter
->
dim
(
3
)));
kernel
->
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
stride
));
kernel
->
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
stride_h
));
kernel
->
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
stride_w
));
kernel
->
setArg
(
idx
++
,
padding
[
0
]
/
2
);
kernel
->
setArg
(
idx
++
,
padding
[
1
]
/
2
);
kernel
->
setArg
(
idx
++
,
dilations
[
0
]);
...
...
test/ccunit/mace/ops/conv_2d_test.cc
浏览文件 @
24c46a1a
...
...
@@ -42,7 +42,8 @@ void TestNHWCSimple3x3VALID(int wino_blk_size = 0) {
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
1
,
2
,
3
,
3
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
},
true
);
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
},
true
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
1
},
{
0.1
f
},
true
);
const
std
::
vector
<
index_t
>
output_shape
=
{
1
,
1
,
1
,
1
};
...
...
@@ -100,7 +101,8 @@ void TestNHWCSimple3x3SAME(int wino_blk_size = 0) {
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
1
,
2
,
3
,
3
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
},
true
);
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
},
true
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
1
},
{
0.1
f
},
true
);
const
std
::
vector
<
index_t
>
output_shape
=
{
1
,
3
,
3
,
1
};
...
...
@@ -149,6 +151,65 @@ void TestNHWCSimple3x3SAME(int wino_blk_size = 0) {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-3
,
1e-3
);
}
}
template
<
DeviceType
D
,
typename
T
>
void
TestNHWCSimple3x3NeqStride
(
int
wino_blk_size
=
0
)
{
OpsTestNet
net
;
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
3
,
3
,
2
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
1
,
2
,
3
,
3
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
},
true
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
1
},
{
0.1
f
},
true
);
const
std
::
vector
<
index_t
>
output_shape
=
{
1
,
3
,
2
,
1
};
if
(
D
==
DeviceType
::
CPU
)
{
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
DataFormat
::
NHWC
,
"InputNCHW"
,
DataFormat
::
NCHW
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputNCHW"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"OutputNCHW"
)
.
AddIntsArg
(
"strides"
,
{
1
,
2
})
.
AddIntArg
(
"padding"
,
Padding
::
SAME
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"OutputNCHW"
,
DataFormat
::
NCHW
,
"Output"
,
DataFormat
::
NHWC
);
}
else
if
(
D
==
DeviceType
::
GPU
)
{
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
OutputShape
(
output_shape
)
.
AddIntsArg
(
"strides"
,
{
1
,
2
})
.
AddIntArg
(
"padding"
,
Padding
::
SAME
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
AddIntArg
(
"wino_block_size"
,
wino_blk_size
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
(
D
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
auto
expected
=
net
.
CreateTensor
<
float
>
(
output_shape
,
{
8.1
f
,
8.1
f
,
12.1
f
,
12.1
f
,
8.1
f
,
8.1
f
});
if
(
DataTypeToEnum
<
T
>::
value
==
DataType
::
DT_FLOAT
)
{
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
else
{
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-3
,
1e-3
);
}
}
}
// namespace
TEST_F
(
Conv2dOpTest
,
CPUSimple
)
{
...
...
@@ -171,7 +232,12 @@ TEST_F(Conv2dOpTest, OPENCLSimpleWinograd) {
TestNHWCSimple3x3VALID
<
DeviceType
::
GPU
,
float
>
(
2
);
TestNHWCSimple3x3VALID
<
DeviceType
::
GPU
,
half
>
(
2
);
// TODO(liutuo): the precision of the last value is not normal.
// TestNHWCSimple3x3SAME<DeviceType::GPU, half>(4);
// TestNHWCSimple3x3SAME<DeviceType::GPU, half>(4);
}
TEST_F
(
Conv2dOpTest
,
NotEqualStrideSimple
)
{
TestNHWCSimple3x3NeqStride
<
DeviceType
::
CPU
,
float
>
();
TestNHWCSimple3x3NeqStride
<
DeviceType
::
GPU
,
float
>
();
}
namespace
{
...
...
@@ -186,7 +252,8 @@ void TestNHWCSimple3x3WithoutBias() {
net
.
AddInputFromArray
<
D
,
T
>
(
"Filter"
,
{
1
,
2
,
3
,
3
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
},
true
);
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
},
true
);
if
(
D
==
DeviceType
::
CPU
)
{
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
...
...
@@ -316,7 +383,8 @@ void TestFusedNHWCSimple3x3VALID(int wino_blk_size = 0) {
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
1
,
2
,
3
,
3
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
},
true
);
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
},
true
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
1
},
{
-
0.1
f
},
true
);
const
std
::
vector
<
index_t
>
output_shape
=
{
1
,
1
,
1
,
1
};
...
...
@@ -372,7 +440,8 @@ void TestFusedNHWCSimple3x3WithoutBias(int wino_blk_size = 0) {
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
1
,
2
,
3
,
3
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
},
true
);
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
},
true
);
const
std
::
vector
<
index_t
>
output_shape
=
{
1
,
1
,
1
,
1
};
if
(
D
==
DeviceType
::
CPU
)
{
...
...
@@ -502,17 +571,87 @@ void TestConv1x1() {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
template
<
DeviceType
D
>
void
TestConv1x1NeqStride
()
{
// Construct graph
OpsTestNet
net
;
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
3
,
10
,
5
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
2
,
5
,
1
,
1
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
2.0
f
,
2.0
f
,
2.0
f
,
2.0
f
,
2.0
f
},
true
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
},
true
);
if
(
D
==
DeviceType
::
CPU
)
{
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
DataFormat
::
NHWC
,
"InputNCHW"
,
DataFormat
::
NCHW
);
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
.
Input
(
"InputNCHW"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"OutputNCHW"
)
.
AddIntsArg
(
"strides"
,
{
1
,
2
})
.
AddIntArg
(
"padding"
,
Padding
::
VALID
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"OutputNCHW"
,
DataFormat
::
NCHW
,
"Output"
,
DataFormat
::
NHWC
);
}
else
if
(
D
==
DeviceType
::
GPU
)
{
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
AddIntsArg
(
"strides"
,
{
1
,
2
})
.
AddIntArg
(
"padding"
,
Padding
::
VALID
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
// Check
auto
expected
=
net
.
CreateTensor
<
float
>
(
{
1
,
3
,
5
,
2
},
{
5.1
f
,
10.2
f
,
5.1
f
,
10.2
f
,
5.1
f
,
10.2
f
,
5.1
f
,
10.2
f
,
5.1
f
,
10.2
f
,
5.1
f
,
10.2
f
,
5.1
f
,
10.2
f
,
5.1
f
,
10.2
f
,
5.1
f
,
10.2
f
,
5.1
f
,
10.2
f
,
5.1
f
,
10.2
f
,
5.1
f
,
10.2
f
,
5.1
f
,
10.2
f
,
5.1
f
,
10.2
f
,
5.1
f
,
10.2
f
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
}
// namespace
TEST_F
(
Conv2dOpTest
,
CPUConv1x1
)
{
TestConv1x1
<
DeviceType
::
CPU
>
();
}
TEST_F
(
Conv2dOpTest
,
OPENCLConv1x1
)
{
TestConv1x1
<
DeviceType
::
GPU
>
();
}
TEST_F
(
Conv2dOpTest
,
CPUConv1x1NotEqualStride
)
{
TestConv1x1NeqStride
<
DeviceType
::
CPU
>
();
}
TEST_F
(
Conv2dOpTest
,
OPENCLConv1x1NotEqualStride
)
{
TestConv1x1NeqStride
<
DeviceType
::
GPU
>
();
}
namespace
{
template
<
DeviceType
D
,
typename
T
>
void
TestComplexConvNxN
S12
(
const
std
::
vector
<
index_t
>
&
shape
,
const
int
stride
,
const
int
wino_blk_size
=
0
)
{
void
TestComplexConvNxN
(
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
int
>
strides
,
const
int
wino_blk_size
=
0
)
{
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
Padding
type
)
{
...
...
@@ -581,35 +720,40 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape,
};
for
(
int
kernel_size
:
{
1
,
3
,
5
,
7
})
{
func
(
kernel_size
,
kernel_size
,
stride
,
stride
,
VALID
);
func
(
kernel_size
,
kernel_size
,
stride
,
stride
,
SAME
);
func
(
kernel_size
,
kernel_size
,
stride
s
[
0
],
strides
[
1
]
,
VALID
);
func
(
kernel_size
,
kernel_size
,
stride
s
[
0
],
strides
[
1
]
,
SAME
);
}
}
}
// namespace
TEST_F
(
Conv2dOpTest
,
OPENCLAlignedConvNxNS12
)
{
TestComplexConvNxN
S12
<
DeviceType
::
GPU
,
float
>
({
32
,
16
,
16
,
32
},
1
);
TestComplexConvNxN
S12
<
DeviceType
::
GPU
,
float
>
({
32
,
16
,
16
,
32
},
2
);
TestComplexConvNxN
<
DeviceType
::
GPU
,
float
>
({
32
,
16
,
16
,
32
},
{
1
,
1
}
);
TestComplexConvNxN
<
DeviceType
::
GPU
,
float
>
({
32
,
16
,
16
,
32
},
{
2
,
2
}
);
}
TEST_F
(
Conv2dOpTest
,
OPENCLAlignedConvNxNWinograd
)
{
TestComplexConvNxN
S12
<
DeviceType
::
GPU
,
float
>
({
32
,
16
,
16
,
32
},
1
,
2
);
TestComplexConvNxN
S12
<
DeviceType
::
GPU
,
float
>
({
32
,
16
,
16
,
32
},
1
,
4
);
TestComplexConvNxN
<
DeviceType
::
GPU
,
float
>
({
32
,
16
,
16
,
32
},
{
1
,
1
}
,
2
);
TestComplexConvNxN
<
DeviceType
::
GPU
,
float
>
({
32
,
16
,
16
,
32
},
{
1
,
1
}
,
4
);
}
TEST_F
(
Conv2dOpTest
,
OPENCLUnalignedConvNxNS12
)
{
TestComplexConvNxN
S12
<
DeviceType
::
GPU
,
float
>
({
17
,
113
,
5
,
7
},
1
);
TestComplexConvNxN
S12
<
DeviceType
::
GPU
,
float
>
({
17
,
113
,
5
,
7
},
2
);
TestComplexConvNxN
<
DeviceType
::
GPU
,
float
>
({
17
,
113
,
5
,
7
},
{
1
,
1
}
);
TestComplexConvNxN
<
DeviceType
::
GPU
,
float
>
({
17
,
113
,
5
,
7
},
{
2
,
2
}
);
}
TEST_F
(
Conv2dOpTest
,
OPENCLUnalignedConvNxNWinograd
)
{
TestComplexConvNxN
S12
<
DeviceType
::
GPU
,
float
>
({
17
,
113
,
5
,
7
},
1
,
4
);
TestComplexConvNxN
S12
<
DeviceType
::
GPU
,
float
>
({
17
,
113
,
5
,
7
},
1
,
2
);
TestComplexConvNxN
<
DeviceType
::
GPU
,
float
>
({
17
,
113
,
5
,
7
},
{
1
,
1
}
,
4
);
TestComplexConvNxN
<
DeviceType
::
GPU
,
float
>
({
17
,
113
,
5
,
7
},
{
1
,
1
}
,
2
);
}
TEST_F
(
Conv2dOpTest
,
OPENCLUnalignedConvNxNS34
)
{
TestComplexConvNxNS12
<
DeviceType
::
GPU
,
float
>
({
31
,
113
,
13
,
17
},
3
);
TestComplexConvNxNS12
<
DeviceType
::
GPU
,
float
>
({
32
,
32
,
13
,
17
},
4
);
TestComplexConvNxN
<
DeviceType
::
GPU
,
float
>
({
31
,
113
,
13
,
17
},
{
3
,
3
});
TestComplexConvNxN
<
DeviceType
::
GPU
,
float
>
({
32
,
32
,
13
,
17
},
{
4
,
4
});
}
TEST_F
(
Conv2dOpTest
,
OPENCLUnalignedConvNxNWithNotEqualStride
)
{
TestComplexConvNxN
<
DeviceType
::
GPU
,
float
>
({
31
,
113
,
13
,
17
},
{
1
,
2
});
TestComplexConvNxN
<
DeviceType
::
GPU
,
float
>
({
32
,
32
,
13
,
17
},
{
3
,
4
});
}
namespace
{
...
...
@@ -646,9 +790,7 @@ void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
output_channels
,
input_channels
,
kernel_h
,
kernel_w
},
float_filter_data
,
true
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
output_channels
},
float_bias_data
,
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
output_channels
},
float_bias_data
,
true
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
...
...
@@ -713,11 +855,11 @@ TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv3x3S12) {
}
TEST_F
(
Conv2dOpTest
,
OPENCLHalfAlignedConv3x3Winograd
)
{
TestHalfComplexConvNxNS12
<
DeviceType
::
GPU
>
({
32
,
32
},
{
3
,
3
,
32
,
64
},
{
1
,
1
},
2
);
// TODO(liutuo) : the precision error is large.
// TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {3, 3, 32, 64},
// {1, 1}, 4);
TestHalfComplexConvNxNS12
<
DeviceType
::
GPU
>
({
32
,
32
},
{
3
,
3
,
32
,
64
},
{
1
,
1
},
2
);
// TODO(liutuo) : the precision error is large.
// TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {3, 3, 32, 64},
// {1, 1}, 4);
}
TEST_F
(
Conv2dOpTest
,
OPENCLHalfAlignedConv5x5S12
)
{
...
...
@@ -769,10 +911,10 @@ TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv3x3S12) {
TEST_F
(
Conv2dOpTest
,
OPENCLHalfUnalignedConv3x3Winograd
)
{
// TODO(liutuo) : the precision error is large.
// TestHalfComplexConvNxNS12<DeviceType::GPU>({107, 113}, {3, 3, 5, 7},
// {1, 1}, 4);
TestHalfComplexConvNxNS12
<
DeviceType
::
GPU
>
({
107
,
113
},
{
3
,
3
,
5
,
7
},
{
1
,
1
},
2
);
// TestHalfComplexConvNxNS12<DeviceType::GPU>({107, 113}, {3, 3, 5, 7},
// {1, 1}, 4);
TestHalfComplexConvNxNS12
<
DeviceType
::
GPU
>
({
107
,
113
},
{
3
,
3
,
5
,
7
},
{
1
,
1
},
2
);
}
TEST_F
(
Conv2dOpTest
,
OPENCLHalfConv5x5Dilation2
)
{
...
...
@@ -1045,10 +1187,10 @@ TEST_F(Conv2dOpTest, OPENCLAlignedPad2) {
}
TEST_F
(
Conv2dOpTest
,
OPENCLAlignedPad2Winograd
)
{
TestArbitraryPadConvNxN
<
DeviceType
::
GPU
,
float
>
({
128
,
128
,
16
,
16
},
{
2
,
2
},
2
);
TestArbitraryPadConvNxN
<
DeviceType
::
GPU
,
float
>
({
128
,
128
,
16
,
16
},
{
2
,
2
},
4
);
TestArbitraryPadConvNxN
<
DeviceType
::
GPU
,
float
>
({
128
,
128
,
16
,
16
},
{
2
,
2
},
2
);
TestArbitraryPadConvNxN
<
DeviceType
::
GPU
,
float
>
({
128
,
128
,
16
,
16
},
{
2
,
2
},
4
);
}
TEST_F
(
Conv2dOpTest
,
OPENCLUnalignedPad4
)
{
...
...
@@ -1071,15 +1213,15 @@ void TestQuantSimple3x3() {
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
uint8_t
>
(
"Filter"
,
{
1
,
3
,
3
,
2
},
{
102
,
150
,
123
,
135
,
1
,
216
,
137
,
47
,
53
,
75
,
145
,
130
,
171
,
62
,
255
,
122
,
72
,
211
},
true
,
0.0226
,
127
);
"Filter"
,
{
1
,
3
,
3
,
2
},
{
102
,
150
,
123
,
135
,
1
,
216
,
137
,
47
,
53
,
75
,
145
,
130
,
171
,
62
,
255
,
122
,
72
,
211
}
,
true
,
0.0226
,
127
);
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
uint8_t
>
(
"Input"
,
{
1
,
3
,
3
,
2
},
{
1
,
75
,
117
,
161
,
127
,
119
,
94
,
151
,
203
,
151
,
84
,
61
,
55
,
142
,
113
,
139
,
3
,
255
},
false
,
0.0204
,
93
);
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
int32_t
>
(
"Bias"
,
{
1
},
{
2
},
true
,
0.00046104
,
0
);
"Input"
,
{
1
,
3
,
3
,
2
},
{
1
,
75
,
117
,
161
,
127
,
119
,
94
,
151
,
203
,
151
,
84
,
61
,
55
,
142
,
113
,
139
,
3
,
255
}
,
false
,
0.0204
,
93
);
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
int32_t
>
(
"Bias"
,
{
1
},
{
2
},
true
,
0.00046104
,
0
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"Input"
)
...
...
@@ -1113,17 +1255,15 @@ void TestQuant(const index_t batch,
enum
Padding
padding_type
,
const
std
::
vector
<
int
>
&
strides
)
{
OpsTestNet
net
;
net
.
AddRandomInput
<
CPU
,
float
>
(
"Input"
,
{
batch
,
in_height
,
in_width
,
in_channels
});
net
.
AddRandomInput
<
CPU
,
float
>
(
"Filter"
,
{
out_channels
,
k_height
,
k_width
,
in_channels
},
true
);
net
.
AddRandomInput
<
CPU
,
float
>
(
"Input"
,
{
batch
,
in_height
,
in_width
,
in_channels
});
net
.
AddRandomInput
<
CPU
,
float
>
(
"Filter"
,
{
out_channels
,
k_height
,
k_width
,
in_channels
},
true
);
net
.
AddRandomInput
<
CPU
,
float
>
(
"Bias"
,
{
out_channels
},
true
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
DataFormat
::
NHWC
,
"InputNCHW"
,
DataFormat
::
NCHW
);
net
.
TransformFilterDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Filter"
,
DataFormat
::
OHWI
,
"FilterOIHW"
,
DataFormat
::
OIHW
);
net
.
TransformFilterDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Filter"
,
DataFormat
::
OHWI
,
"FilterOIHW"
,
DataFormat
::
OIHW
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputNCHW"
)
...
...
@@ -1172,10 +1312,10 @@ void TestQuant(const index_t batch,
auto
bias_data
=
bias
->
data
<
float
>
();
float
bias_scale
=
q_input
->
scale
()
*
q_filter
->
scale
();
std
::
vector
<
int32_t
>
q_bias
(
bias
->
size
());
QuantizeUtil
<
float
,
int32_t
>
quantize_util
(
OpTestContext
::
Get
()
->
thread_pool
());
quantize_util
.
QuantizeWithScaleAndZeropoint
(
bias_data
,
bias
->
size
(),
bias_scale
,
0
,
q_bias
.
data
());
QuantizeUtil
<
float
,
int32_t
>
quantize_util
(
OpTestContext
::
Get
()
->
thread_pool
());
quantize_util
.
QuantizeWithScaleAndZeropoint
(
bias_data
,
bias
->
size
(),
bias_scale
,
0
,
q_bias
.
data
());
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
int32_t
>
(
"QuantizedBias"
,
{
out_channels
},
q_bias
,
true
,
bias_scale
,
0
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录