Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
4df34723
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
4df34723
编写于
5月 08, 2018
作者:
李
李寅
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'feature_wuch' into 'master'
fix memory invalid read See merge request !453
上级
2821ff2e
85258d86
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
119 addition
and
106 deletion
+119
-106
mace/core/tensor.h
mace/core/tensor.h
+10
-2
mace/kernels/arm/conv_2d_neon_3x3.cc
mace/kernels/arm/conv_2d_neon_3x3.cc
+18
-18
mace/kernels/arm/depthwise_conv2d_neon_3x3.cc
mace/kernels/arm/depthwise_conv2d_neon_3x3.cc
+20
-20
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+5
-4
mace/ops/depthwise_conv2d_test.cc
mace/ops/depthwise_conv2d_test.cc
+66
-62
未找到文件。
mace/core/tensor.h
浏览文件 @
4df34723
...
...
@@ -28,6 +28,13 @@
#include "mace/public/mace.h"
#include "mace/utils/logging.h"
#ifdef MACE_ENABLE_NEON
// Avoid over-bound accessing memory
#define EXTRA_BUFFER_PAD_SIZE 64
#else
#define EXTRA_BUFFER_PAD_SIZE 0
#endif
namespace
mace
{
#define SINGLE_ARG(...) __VA_ARGS__
...
...
@@ -212,10 +219,11 @@ class Tensor {
image_shape_
.
clear
();
if
(
buffer_
!=
nullptr
)
{
MACE_CHECK
(
!
has_opencl_image
(),
"Cannot resize image, use ResizeImage."
);
if
(
raw_size
()
>
buffer_
->
size
())
buffer_
->
Resize
(
raw_size
());
if
(
raw_size
()
+
EXTRA_BUFFER_PAD_SIZE
>
buffer_
->
size
())
buffer_
->
Resize
(
raw_size
()
+
EXTRA_BUFFER_PAD_SIZE
);
}
else
{
MACE_CHECK
(
is_buffer_owner_
);
buffer_
=
new
Buffer
(
allocator_
,
raw_size
());
buffer_
=
new
Buffer
(
allocator_
,
raw_size
()
+
EXTRA_BUFFER_PAD_SIZE
);
}
}
...
...
mace/kernels/arm/conv_2d_neon_3x3.cc
浏览文件 @
4df34723
...
...
@@ -334,7 +334,7 @@ void Conv2dNeonK3x3S1(const float *input,
float32x4_t
vf00
,
vf01
,
vf02
;
vf00
=
vld1q_f32
(
filter_ptr0
);
vf01
=
vld1q_f32
(
filter_ptr0
+
3
);
vf02
=
vld1q_f32
(
filter_ptr0
+
6
);
vf02
=
vld1q_f32
(
filter_ptr0
+
5
);
for
(
index_t
h
=
0
;
h
+
1
<
out_height
;
h
+=
2
)
{
for
(
index_t
w
=
0
;
w
+
3
<
out_width
;
w
+=
4
)
{
...
...
@@ -377,9 +377,9 @@ void Conv2dNeonK3x3S1(const float *input,
vo00
=
vfmaq_laneq_f32
(
vo00
,
vi10
,
vf01
,
0
);
vo00
=
vfmaq_laneq_f32
(
vo00
,
vi11
,
vf01
,
1
);
vo00
=
vfmaq_laneq_f32
(
vo00
,
vi12
,
vf01
,
2
);
vo00
=
vfmaq_laneq_f32
(
vo00
,
vi20
,
vf02
,
0
);
vo00
=
vfmaq_laneq_f32
(
vo00
,
vi21
,
vf02
,
1
);
vo00
=
vfmaq_laneq_f32
(
vo00
,
vi22
,
vf02
,
2
);
vo00
=
vfmaq_laneq_f32
(
vo00
,
vi20
,
vf02
,
1
);
vo00
=
vfmaq_laneq_f32
(
vo00
,
vi21
,
vf02
,
2
);
vo00
=
vfmaq_laneq_f32
(
vo00
,
vi22
,
vf02
,
3
);
// outch 0, height 1
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi10
,
vf00
,
0
);
...
...
@@ -388,9 +388,9 @@ void Conv2dNeonK3x3S1(const float *input,
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi20
,
vf01
,
0
);
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi21
,
vf01
,
1
);
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi22
,
vf01
,
2
);
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi30
,
vf02
,
0
);
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi31
,
vf02
,
1
);
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi32
,
vf02
,
2
);
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi30
,
vf02
,
1
);
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi31
,
vf02
,
2
);
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi32
,
vf02
,
3
);
vst1q_f32
(
out_ptr0
,
vo00
);
vst1q_f32
(
out_ptr0
+
out_width
,
vo01
);
...
...
@@ -413,12 +413,12 @@ void Conv2dNeonK3x3S1(const float *input,
}
// h
#elif defined(MACE_ENABLE_NEON) // arm v7
// load filter (1 outch x 3 height x 3 width): vf_outch_height
float32x2_t
vf01
,
vf23
,
vf45
,
vf67
,
vf
89
;
float32x2_t
vf01
,
vf23
,
vf45
,
vf67
,
vf
78
;
vf01
=
vld1_f32
(
filter_ptr0
);
vf23
=
vld1_f32
(
filter_ptr0
+
2
);
vf45
=
vld1_f32
(
filter_ptr0
+
4
);
vf67
=
vld1_f32
(
filter_ptr0
+
6
);
vf
89
=
vld1_f32
(
filter_ptr0
+
8
);
vf
78
=
vld1_f32
(
filter_ptr0
+
7
);
for
(
index_t
h
=
0
;
h
+
1
<
out_height
;
h
+=
2
)
{
for
(
index_t
w
=
0
;
w
+
3
<
out_width
;
w
+=
4
)
{
...
...
@@ -463,7 +463,7 @@ void Conv2dNeonK3x3S1(const float *input,
vo00
=
vmlaq_lane_f32
(
vo00
,
vi12
,
vf45
,
1
);
vo00
=
vmlaq_lane_f32
(
vo00
,
vi20
,
vf67
,
0
);
vo00
=
vmlaq_lane_f32
(
vo00
,
vi21
,
vf67
,
1
);
vo00
=
vmlaq_lane_f32
(
vo00
,
vi22
,
vf
89
,
0
);
vo00
=
vmlaq_lane_f32
(
vo00
,
vi22
,
vf
78
,
1
);
// outch 0, height 1
vo01
=
vmlaq_lane_f32
(
vo01
,
vi10
,
vf01
,
0
);
...
...
@@ -474,7 +474,7 @@ void Conv2dNeonK3x3S1(const float *input,
vo01
=
vmlaq_lane_f32
(
vo01
,
vi22
,
vf45
,
1
);
vo01
=
vmlaq_lane_f32
(
vo01
,
vi30
,
vf67
,
0
);
vo01
=
vmlaq_lane_f32
(
vo01
,
vi31
,
vf67
,
1
);
vo01
=
vmlaq_lane_f32
(
vo01
,
vi32
,
vf
89
,
0
);
vo01
=
vmlaq_lane_f32
(
vo01
,
vi32
,
vf
78
,
1
);
vst1q_f32
(
out_ptr0
,
vo00
);
vst1q_f32
(
out_ptr0
+
out_width
,
vo01
);
...
...
@@ -544,7 +544,7 @@ void Conv2dNeonK3x3S2(const float *input,
float32x4_t
vf00
,
vf01
,
vf02
;
vf00
=
vld1q_f32
(
filter_ptr
);
vf01
=
vld1q_f32
(
filter_ptr
+
3
);
vf02
=
vld1q_f32
(
filter_ptr
+
6
);
vf02
=
vld1q_f32
(
filter_ptr
+
5
);
for
(
index_t
h
=
0
;
h
<
out_height
;
++
h
)
{
for
(
index_t
w
=
0
;
w
+
3
<
out_width
;
w
+=
4
)
{
...
...
@@ -592,21 +592,21 @@ void Conv2dNeonK3x3S2(const float *input,
vo
=
vfmaq_laneq_f32
(
vo
,
vi10
,
vf01
,
0
);
vo
=
vfmaq_laneq_f32
(
vo
,
vi11
,
vf01
,
1
);
vo
=
vfmaq_laneq_f32
(
vo
,
vi12
,
vf01
,
2
);
vo
=
vfmaq_laneq_f32
(
vo
,
vi20
,
vf02
,
0
);
vo
=
vfmaq_laneq_f32
(
vo
,
vi21
,
vf02
,
1
);
vo
=
vfmaq_laneq_f32
(
vo
,
vi22
,
vf02
,
2
);
vo
=
vfmaq_laneq_f32
(
vo
,
vi20
,
vf02
,
1
);
vo
=
vfmaq_laneq_f32
(
vo
,
vi21
,
vf02
,
2
);
vo
=
vfmaq_laneq_f32
(
vo
,
vi22
,
vf02
,
3
);
vst1q_f32
(
out_base
+
out_offset
,
vo
);
}
// w
}
// h
#elif defined(MACE_ENABLE_NEON) // arm v7
// load filter (1 outch x 3 height x 3 width): vf_outch_height
float32x2_t
vf01
,
vf23
,
vf45
,
vf67
,
vf
89
;
float32x2_t
vf01
,
vf23
,
vf45
,
vf67
,
vf
78
;
vf01
=
vld1_f32
(
filter_ptr
);
vf23
=
vld1_f32
(
filter_ptr
+
2
);
vf45
=
vld1_f32
(
filter_ptr
+
4
);
vf67
=
vld1_f32
(
filter_ptr
+
6
);
vf
89
=
vld1_f32
(
filter_ptr
+
8
);
vf
78
=
vld1_f32
(
filter_ptr
+
7
);
for
(
index_t
h
=
0
;
h
<
out_height
;
++
h
)
{
for
(
index_t
w
=
0
;
w
+
3
<
out_width
;
w
+=
4
)
{
...
...
@@ -656,7 +656,7 @@ void Conv2dNeonK3x3S2(const float *input,
vo
=
vmlaq_lane_f32
(
vo
,
vi12
,
vf45
,
1
);
vo
=
vmlaq_lane_f32
(
vo
,
vi20
,
vf67
,
0
);
vo
=
vmlaq_lane_f32
(
vo
,
vi21
,
vf67
,
1
);
vo
=
vmlaq_lane_f32
(
vo
,
vi22
,
vf
89
,
0
);
vo
=
vmlaq_lane_f32
(
vo
,
vi22
,
vf
78
,
1
);
vst1q_f32
(
out_base
+
out_offset
,
vo
);
}
// w
...
...
mace/kernels/arm/depthwise_conv2d_neon_3x3.cc
浏览文件 @
4df34723
...
...
@@ -104,7 +104,7 @@ void DepthwiseConv2dNeonK3x3S1(const float *input,
float32x4_t
vf00
,
vf01
,
vf02
;
vf00
=
vld1q_f32
(
filter_ptr
);
vf01
=
vld1q_f32
(
filter_ptr
+
3
);
vf02
=
vld1q_f32
(
filter_ptr
+
6
);
vf02
=
vld1q_f32
(
filter_ptr
+
5
);
for
(
h
=
valid_h_start
;
h
+
1
<
valid_h_stop
;
h
+=
2
)
{
// left
...
...
@@ -180,9 +180,9 @@ void DepthwiseConv2dNeonK3x3S1(const float *input,
vo00
=
vfmaq_laneq_f32
(
vo00
,
vi10
,
vf01
,
0
);
vo00
=
vfmaq_laneq_f32
(
vo00
,
vi11
,
vf01
,
1
);
vo00
=
vfmaq_laneq_f32
(
vo00
,
vi12
,
vf01
,
2
);
vo00
=
vfmaq_laneq_f32
(
vo00
,
vi20
,
vf02
,
0
);
vo00
=
vfmaq_laneq_f32
(
vo00
,
vi21
,
vf02
,
1
);
vo00
=
vfmaq_laneq_f32
(
vo00
,
vi22
,
vf02
,
2
);
vo00
=
vfmaq_laneq_f32
(
vo00
,
vi20
,
vf02
,
1
);
vo00
=
vfmaq_laneq_f32
(
vo00
,
vi21
,
vf02
,
2
);
vo00
=
vfmaq_laneq_f32
(
vo00
,
vi22
,
vf02
,
3
);
// outch 0, height 1
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi10
,
vf00
,
0
);
...
...
@@ -191,9 +191,9 @@ void DepthwiseConv2dNeonK3x3S1(const float *input,
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi20
,
vf01
,
0
);
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi21
,
vf01
,
1
);
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi22
,
vf01
,
2
);
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi30
,
vf02
,
0
);
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi31
,
vf02
,
1
);
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi32
,
vf02
,
2
);
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi30
,
vf02
,
1
);
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi31
,
vf02
,
2
);
vo01
=
vfmaq_laneq_f32
(
vo01
,
vi32
,
vf02
,
3
);
#else
// outch 0, height 0
vo00
=
vmlaq_lane_f32
(
vo00
,
vi00
,
vget_low_f32
(
vf00
),
0
);
...
...
@@ -202,9 +202,9 @@ void DepthwiseConv2dNeonK3x3S1(const float *input,
vo00
=
vmlaq_lane_f32
(
vo00
,
vi10
,
vget_low_f32
(
vf01
),
0
);
vo00
=
vmlaq_lane_f32
(
vo00
,
vi11
,
vget_low_f32
(
vf01
),
1
);
vo00
=
vmlaq_lane_f32
(
vo00
,
vi12
,
vget_high_f32
(
vf01
),
0
);
vo00
=
vmlaq_lane_f32
(
vo00
,
vi20
,
vget_low_f32
(
vf02
),
0
);
vo00
=
vmlaq_lane_f32
(
vo00
,
vi21
,
vget_
low_f32
(
vf02
),
1
);
vo00
=
vmlaq_lane_f32
(
vo00
,
vi22
,
vget_high_f32
(
vf02
),
0
);
vo00
=
vmlaq_lane_f32
(
vo00
,
vi20
,
vget_low_f32
(
vf02
),
1
);
vo00
=
vmlaq_lane_f32
(
vo00
,
vi21
,
vget_
high_f32
(
vf02
),
0
);
vo00
=
vmlaq_lane_f32
(
vo00
,
vi22
,
vget_high_f32
(
vf02
),
1
);
// outch 0, height 1
vo01
=
vmlaq_lane_f32
(
vo01
,
vi10
,
vget_low_f32
(
vf00
),
0
);
...
...
@@ -213,9 +213,9 @@ void DepthwiseConv2dNeonK3x3S1(const float *input,
vo01
=
vmlaq_lane_f32
(
vo01
,
vi20
,
vget_low_f32
(
vf01
),
0
);
vo01
=
vmlaq_lane_f32
(
vo01
,
vi21
,
vget_low_f32
(
vf01
),
1
);
vo01
=
vmlaq_lane_f32
(
vo01
,
vi22
,
vget_high_f32
(
vf01
),
0
);
vo01
=
vmlaq_lane_f32
(
vo01
,
vi30
,
vget_low_f32
(
vf02
),
0
);
vo01
=
vmlaq_lane_f32
(
vo01
,
vi31
,
vget_
low_f32
(
vf02
),
1
);
vo01
=
vmlaq_lane_f32
(
vo01
,
vi32
,
vget_high_f32
(
vf02
),
0
);
vo01
=
vmlaq_lane_f32
(
vo01
,
vi30
,
vget_low_f32
(
vf02
),
1
);
vo01
=
vmlaq_lane_f32
(
vo01
,
vi31
,
vget_
high_f32
(
vf02
),
0
);
vo01
=
vmlaq_lane_f32
(
vo01
,
vi32
,
vget_high_f32
(
vf02
),
1
);
#endif
vst1q_f32
(
out_base
+
out_offset
,
vo00
);
vst1q_f32
(
out_base
+
out_offset
+
out_width
,
vo01
);
...
...
@@ -344,7 +344,7 @@ void DepthwiseConv2dNeonK3x3S2(const float *input,
float32x4_t
vf00
,
vf01
,
vf02
;
vf00
=
vld1q_f32
(
filter_ptr
);
vf01
=
vld1q_f32
(
filter_ptr
+
3
);
vf02
=
vld1q_f32
(
filter_ptr
+
6
);
vf02
=
vld1q_f32
(
filter_ptr
+
5
);
for
(
h
=
valid_h_start
;
h
<
valid_h_stop
;
++
h
)
{
// left
...
...
@@ -409,9 +409,9 @@ void DepthwiseConv2dNeonK3x3S2(const float *input,
vo
=
vfmaq_laneq_f32
(
vo
,
vi10
,
vf01
,
0
);
vo
=
vfmaq_laneq_f32
(
vo
,
vi11
,
vf01
,
1
);
vo
=
vfmaq_laneq_f32
(
vo
,
vi12
,
vf01
,
2
);
vo
=
vfmaq_laneq_f32
(
vo
,
vi20
,
vf02
,
0
);
vo
=
vfmaq_laneq_f32
(
vo
,
vi21
,
vf02
,
1
);
vo
=
vfmaq_laneq_f32
(
vo
,
vi22
,
vf02
,
2
);
vo
=
vfmaq_laneq_f32
(
vo
,
vi20
,
vf02
,
1
);
vo
=
vfmaq_laneq_f32
(
vo
,
vi21
,
vf02
,
2
);
vo
=
vfmaq_laneq_f32
(
vo
,
vi22
,
vf02
,
3
);
#else
// outch 0, height 0
vo
=
vmlaq_lane_f32
(
vo
,
vi00
,
vget_low_f32
(
vf00
),
0
);
...
...
@@ -420,9 +420,9 @@ void DepthwiseConv2dNeonK3x3S2(const float *input,
vo
=
vmlaq_lane_f32
(
vo
,
vi10
,
vget_low_f32
(
vf01
),
0
);
vo
=
vmlaq_lane_f32
(
vo
,
vi11
,
vget_low_f32
(
vf01
),
1
);
vo
=
vmlaq_lane_f32
(
vo
,
vi12
,
vget_high_f32
(
vf01
),
0
);
vo
=
vmlaq_lane_f32
(
vo
,
vi20
,
vget_low_f32
(
vf02
),
0
);
vo
=
vmlaq_lane_f32
(
vo
,
vi21
,
vget_
low_f32
(
vf02
),
1
);
vo
=
vmlaq_lane_f32
(
vo
,
vi22
,
vget_high_f32
(
vf02
),
0
);
vo
=
vmlaq_lane_f32
(
vo
,
vi20
,
vget_low_f32
(
vf02
),
1
);
vo
=
vmlaq_lane_f32
(
vo
,
vi21
,
vget_
high_f32
(
vf02
),
0
);
vo
=
vmlaq_lane_f32
(
vo
,
vi22
,
vget_high_f32
(
vf02
),
1
);
#endif
vst1q_f32
(
out_base
+
out_offset
,
vo
);
}
// w
...
...
mace/kernels/conv_2d.h
浏览文件 @
4df34723
...
...
@@ -461,7 +461,8 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
||
extra_input_width
!=
input_width
)
{
padded_input_size
=
batch
*
input_channels
*
(
input_height
+
pad_top
+
pad_bottom
)
*
(
input_width
+
pad_left
+
pad_right
)
*
sizeof
(
float
);
*
(
input_width
+
pad_left
+
pad_right
)
*
sizeof
(
float
)
+
EXTRA_BUFFER_PAD_SIZE
;
total_scratch_size
+=
padded_input_size
;
}
if
(
extra_output_height
!=
height
||
extra_output_width
!=
width
)
{
...
...
@@ -482,8 +483,8 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
// decide which convolution function to call
if
(
use_winograd
)
{
transformed_input
.
Res
iz
e
(
transformed_input_shape
);
transformed_output
.
Res
iz
e
(
transformed_output_shape
);
transformed_input
.
Res
hap
e
(
transformed_input_shape
);
transformed_output
.
Res
hap
e
(
transformed_output_shape
);
const
float
*
transformed_filter_ptr
;
if
(
transformed_filter_
.
dim_size
()
==
0
)
{
if
(
is_filter_transformed_
)
{
...
...
@@ -652,7 +653,7 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
Tensor
*
pad_output_ptr
=
output
;
if
(
extra_output_height
!=
height
||
extra_output_width
!=
width
)
{
padded_output
.
Res
iz
e
({
batch
,
channels
,
extra_output_height
,
padded_output
.
Res
hap
e
({
batch
,
channels
,
extra_output_height
,
extra_output_width
});
padded_output
.
Clear
();
pad_output_ptr
=
&
padded_output
;
...
...
mace/ops/depthwise_conv2d_test.cc
浏览文件 @
4df34723
...
...
@@ -114,50 +114,28 @@ TEST_F(DepthwiseConv2dOpTest, SimpleOpenCLHalf) {
namespace
{
template
<
DeviceType
D
,
typename
T
>
void
ComplexValidTest
()
{
void
ComplexValidTest
(
index_t
batch
,
index_t
channel
,
index_t
height
,
index_t
width
,
index_t
kernel
,
index_t
multiplier
,
int
stride
)
{
testing
::
internal
::
LogToStderr
();
// Construct graph
OpsTestNet
net
;
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
10
,
10
,
3
},
{
0.0
,
0.01
,
0.02
,
0.03
,
0.04
,
0.05
,
0.06
,
0.07
,
0.08
,
0.09
,
0.1
,
0.11
,
0.12
,
0.13
,
0.14
,
0.15
,
0.16
,
0.17
,
0.18
,
0.19
,
0.2
,
0.21
,
0.22
,
0.23
,
0.24
,
0.25
,
0.26
,
0.27
,
0.28
,
0.29
,
0.3
,
0.31
,
0.32
,
0.33
,
0.34
,
0.35
,
0.36
,
0.37
,
0.38
,
0.39
,
0.4
,
0.41
,
0.42
,
0.43
,
0.44
,
0.45
,
0.46
,
0.47
,
0.48
,
0.49
,
0.5
,
0.51
,
0.52
,
0.53
,
0.54
,
0.55
,
0.56
,
0.57
,
0.58
,
0.59
,
0.6
,
0.61
,
0.62
,
0.63
,
0.64
,
0.65
,
0.66
,
0.67
,
0.68
,
0.69
,
0.7
,
0.71
,
0.72
,
0.73
,
0.74
,
0.75
,
0.76
,
0.77
,
0.78
,
0.79
,
0.8
,
0.81
,
0.82
,
0.83
,
0.84
,
0.85
,
0.86
,
0.87
,
0.88
,
0.89
,
0.9
,
0.91
,
0.92
,
0.93
,
0.94
,
0.95
,
0.96
,
0.97
,
0.98
,
0.99
,
1.0
,
1.01
,
1.02
,
1.03
,
1.04
,
1.05
,
1.06
,
1.07
,
1.08
,
1.09
,
1.1
,
1.11
,
1.12
,
1.13
,
1.14
,
1.15
,
1.16
,
1.17
,
1.18
,
1.19
,
1.2
,
1.21
,
1.22
,
1.23
,
1.24
,
1.25
,
1.26
,
1.27
,
1.28
,
1.29
,
1.3
,
1.31
,
1.32
,
1.33
,
1.34
,
1.35
,
1.36
,
1.37
,
1.38
,
1.39
,
1.4
,
1.41
,
1.42
,
1.43
,
1.44
,
1.45
,
1.46
,
1.47
,
1.48
,
1.49
,
1.5
,
1.51
,
1.52
,
1.53
,
1.54
,
1.55
,
1.56
,
1.57
,
1.58
,
1.59
,
1.6
,
1.61
,
1.62
,
1.63
,
1.64
,
1.65
,
1.66
,
1.67
,
1.68
,
1.69
,
1.7
,
1.71
,
1.72
,
1.73
,
1.74
,
1.75
,
1.76
,
1.77
,
1.78
,
1.79
,
1.8
,
1.81
,
1.82
,
1.83
,
1.84
,
1.85
,
1.86
,
1.87
,
1.88
,
1.89
,
1.9
,
1.91
,
1.92
,
1.93
,
1.94
,
1.95
,
1.96
,
1.97
,
1.98
,
1.99
,
2.0
,
2.01
,
2.02
,
2.03
,
2.04
,
2.05
,
2.06
,
2.07
,
2.08
,
2.09
,
2.1
,
2.11
,
2.12
,
2.13
,
2.14
,
2.15
,
2.16
,
2.17
,
2.18
,
2.19
,
2.2
,
2.21
,
2.22
,
2.23
,
2.24
,
2.25
,
2.26
,
2.27
,
2.28
,
2.29
,
2.3
,
2.31
,
2.32
,
2.33
,
2.34
,
2.35
,
2.36
,
2.37
,
2.38
,
2.39
,
2.4
,
2.41
,
2.42
,
2.43
,
2.44
,
2.45
,
2.46
,
2.47
,
2.48
,
2.49
,
2.5
,
2.51
,
2.52
,
2.53
,
2.54
,
2.55
,
2.56
,
2.57
,
2.58
,
2.59
,
2.6
,
2.61
,
2.62
,
2.63
,
2.64
,
2.65
,
2.66
,
2.67
,
2.68
,
2.69
,
2.7
,
2.71
,
2.72
,
2.73
,
2.74
,
2.75
,
2.76
,
2.77
,
2.78
,
2.79
,
2.8
,
2.81
,
2.82
,
2.83
,
2.84
,
2.85
,
2.86
,
2.87
,
2.88
,
2.89
,
2.9
,
2.91
,
2.92
,
2.93
,
2.94
,
2.95
,
2.96
,
2.97
,
2.98
,
2.99
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
5
,
5
,
3
,
1
},
{
0.0
,
0.01
,
0.02
,
0.03
,
0.04
,
0.05
,
0.06
,
0.07
,
0.08
,
0.09
,
0.1
,
0.11
,
0.12
,
0.13
,
0.14
,
0.15
,
0.16
,
0.17
,
0.18
,
0.19
,
0.2
,
0.21
,
0.22
,
0.23
,
0.24
,
0.25
,
0.26
,
0.27
,
0.28
,
0.29
,
0.3
,
0.31
,
0.32
,
0.33
,
0.34
,
0.35
,
0.36
,
0.37
,
0.38
,
0.39
,
0.4
,
0.41
,
0.42
,
0.43
,
0.44
,
0.45
,
0.46
,
0.47
,
0.48
,
0.49
,
0.5
,
0.51
,
0.52
,
0.53
,
0.54
,
0.55
,
0.56
,
0.57
,
0.58
,
0.59
,
0.6
,
0.61
,
0.62
,
0.63
,
0.64
,
0.65
,
0.66
,
0.67
,
0.68
,
0.69
,
0.7
,
0.71
,
0.72
,
0.73
,
0.74
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
6
},
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
});
std
::
vector
<
float
>
input_data
(
batch
*
height
*
width
*
channel
);
GenerateRandomRealTypeData
({
batch
,
height
,
width
,
channel
},
&
input_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channel
},
input_data
);
std
::
vector
<
float
>
filter_data
(
kernel
*
kernel
*
channel
*
multiplier
);
GenerateRandomRealTypeData
({
kernel
,
kernel
,
channel
,
multiplier
},
&
filter_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
kernel
,
kernel
,
channel
,
multiplier
},
filter_data
);
std
::
vector
<
float
>
bias_data
(
channel
*
multiplier
);
GenerateRandomRealTypeData
({
channel
*
multiplier
},
&
bias_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
channel
*
multiplier
},
bias_data
);
if
(
D
==
DeviceType
::
CPU
)
{
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
...
...
@@ -173,7 +151,7 @@ void ComplexValidTest() {
.
Input
(
"FilterOIHW"
)
.
Input
(
"Bias"
)
.
Output
(
"OutputNCHW"
)
.
AddIntsArg
(
"strides"
,
{
2
,
2
})
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntArg
(
"padding"
,
Padding
::
SAME
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
...
...
@@ -196,7 +174,7 @@ void ComplexValidTest() {
.
Input
(
"FilterImage"
)
.
Input
(
"BiasImage"
)
.
Output
(
"OutputImage"
)
.
AddIntsArg
(
"strides"
,
{
2
,
2
})
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntArg
(
"padding"
,
Padding
::
SAME
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
...
...
@@ -212,25 +190,43 @@ void ComplexValidTest() {
MACE_NOT_IMPLEMENTED
;
}
// Check
// expect
index_t
out_height
=
(
height
-
1
)
/
stride
+
1
;
index_t
out_width
=
(
width
-
1
)
/
stride
+
1
;
index_t
pad_top
=
((
out_height
-
1
)
*
stride
+
kernel
-
height
)
>>
1
;
index_t
pad_left
=
((
out_width
-
1
)
*
stride
+
kernel
-
width
)
>>
1
;
index_t
out_channels
=
channel
*
multiplier
;
std
::
vector
<
T
>
expect
(
batch
*
out_height
*
out_width
*
out_channels
);
for
(
index_t
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
index_t
h
=
0
;
h
<
out_height
;
++
h
)
{
for
(
index_t
w
=
0
;
w
<
out_width
;
++
w
)
{
for
(
index_t
m
=
0
;
m
<
out_channels
;
++
m
)
{
index_t
out_offset
=
((
b
*
out_height
+
h
)
*
out_width
+
w
)
*
out_channels
+
m
;
index_t
c
=
m
/
multiplier
;
index_t
o
=
m
%
multiplier
;
float
sum
=
0
;
for
(
index_t
kh
=
0
;
kh
<
kernel
;
++
kh
)
{
for
(
index_t
kw
=
0
;
kw
<
kernel
;
++
kw
)
{
index_t
ih
=
h
*
stride
-
pad_top
+
kh
;
index_t
iw
=
w
*
stride
-
pad_left
+
kw
;
if
(
ih
>=
0
&&
ih
<
height
&&
iw
>=
0
&&
iw
<
width
)
{
index_t
in_offset
=
((
b
*
height
+
ih
)
*
width
+
iw
)
*
channel
+
c
;
index_t
filter_offset
=
(((
kh
*
kernel
)
+
kw
)
*
channel
+
c
)
*
multiplier
+
o
;
sum
+=
input_data
[
in_offset
]
*
filter_data
[
filter_offset
];
}
}
}
expect
[
out_offset
]
=
sum
+
bias_data
[
m
];
}
}
}
}
auto
expected
=
CreateTensor
<
T
>
(
{
1
,
5
,
5
,
3
},
VectorStaticCast
<
T
>
(
{
4.48200035
,
4.63479996
,
4.79079962
,
5.85899973
,
6.05599976
,
6.25699997
,
6.38100004
,
6.59000015
,
6.80300045
,
6.90299988
,
7.1239996
,
7.34899998
,
4.03559971
,
4.16820002
,
4.30319977
,
8.90999985
,
9.1760006
,
9.44599915
,
11.20499992
,
11.54500103
,
11.89000034
,
11.74499989
,
12.09999943
,
12.46000004
,
12.28499985
,
12.65500069
,
13.03000069
,
7.00200033
,
7.22399998
,
7.44900036
,
13.4100008
,
13.79599953
,
14.18599987
,
16.60500145
,
17.09499741
,
17.59000015
,
17.14500046
,
17.65000153
,
18.15999794
,
17.68499947
,
18.20499992
,
18.72999954
,
9.97200012
,
10.28399944
,
10.59899998
,
17.90999985
,
18.41600037
,
18.92599869
,
22.00500107
,
22.64500046
,
23.28999901
,
22.54500008
,
23.19999886
,
23.8599987
,
23.0850029
,
23.75500107
,
24.43000031
,
12.94200039
,
13.34400082
,
13.7489996
,
6.97500038
,
7.29659986
,
7.62060022
,
8.32049942
,
8.72700024
,
9.13650036
,
8.5095005
,
8.92500019
,
9.34349918
,
8.69849968
,
9.12300014
,
9.55049992
,
4.55220032
,
4.80690002
,
5.06340027
}));
{
1
,
out_height
,
out_width
,
out_channels
},
expect
);
if
(
DataTypeToEnum
<
T
>::
value
==
DT_FLOAT
)
{
ExpectTensorNear
<
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
...
...
@@ -241,15 +237,23 @@ void ComplexValidTest() {
}
// namespace
TEST_F
(
DepthwiseConv2dOpTest
,
ComplexCPU
)
{
ComplexValidTest
<
DeviceType
::
CPU
,
float
>
();
ComplexValidTest
<
DeviceType
::
CPU
,
float
>
(
1
,
3
,
10
,
10
,
5
,
1
,
2
);
}
TEST_F
(
DepthwiseConv2dOpTest
,
ComplexCPU3x3s1
)
{
ComplexValidTest
<
DeviceType
::
CPU
,
float
>
(
1
,
3
,
10
,
10
,
3
,
1
,
1
);
}
TEST_F
(
DepthwiseConv2dOpTest
,
ComplexCPU3x3s2
)
{
ComplexValidTest
<
DeviceType
::
CPU
,
float
>
(
1
,
3
,
10
,
10
,
3
,
1
,
2
);
}
TEST_F
(
DepthwiseConv2dOpTest
,
ComplexOpenCL
)
{
ComplexValidTest
<
DeviceType
::
GPU
,
float
>
();
ComplexValidTest
<
DeviceType
::
GPU
,
float
>
(
1
,
3
,
10
,
10
,
5
,
1
,
2
);
}
TEST_F
(
DepthwiseConv2dOpTest
,
ComplexOpenCLHalf
)
{
ComplexValidTest
<
DeviceType
::
GPU
,
half
>
();
ComplexValidTest
<
DeviceType
::
GPU
,
half
>
(
1
,
3
,
10
,
10
,
5
,
1
,
2
);
}
namespace
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录