Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
as350144
Mace
提交
e5df2906
Mace
项目概览
as350144
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
2
Star
1
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
e5df2906
编写于
9月 18, 2017
作者:
W
wuchenghui
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add avg pooling 2x2 3x3
上级
1e51497a
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
498 addition
and
22 deletion
+498
-22
mace/kernels/neon/avg_pooling_neon_2x2.cc
mace/kernels/neon/avg_pooling_neon_2x2.cc
+177
-0
mace/kernels/neon/avg_pooling_neon_3x3.cc
mace/kernels/neon/avg_pooling_neon_3x3.cc
+223
-0
mace/kernels/neon/pooling_neon.cc
mace/kernels/neon/pooling_neon.cc
+59
-15
mace/ops/pooling_test.cc
mace/ops/pooling_test.cc
+39
-7
未找到文件。
mace/kernels/neon/avg_pooling_neon_2x2.cc
0 → 100644
浏览文件 @
e5df2906
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include <float.h>
#include <limits>
#include "mace/core/common.h"
namespace
mace
{
namespace
kernels
{
void
PoolingAvgNeonK2x2S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
const
int
*
paddings
)
{
index_t
batch
=
in_shape
[
0
];
index_t
channels
=
in_shape
[
1
];
index_t
in_height
=
in_shape
[
2
];
index_t
in_width
=
in_shape
[
3
];
index_t
out_height
=
out_shape
[
2
];
index_t
out_width
=
out_shape
[
3
];
int
padding_top
=
paddings
[
0
]
/
2
;
int
padding_bottom
=
paddings
[
0
]
-
padding_top
;
int
padding_left
=
paddings
[
1
]
/
2
;
int
padding_right
=
paddings
[
1
]
-
padding_left
;
int
in_image_size
=
in_height
*
in_width
;
int
out_image_size
=
out_height
*
out_width
;
index_t
input_offset
=
0
;
index_t
output_offset
=
0
;
float
avg_factors
[
4
]
=
{
0.25
,
0.25
,
0.25
,
0.25
};
#pragma omp parallel for collapse(2)
for
(
int
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
float
*
outptr
=
output
+
output_offset
;
const
float
*
r0
,
*
r1
;
for
(
int
h
=
0
;
h
<
out_height
;
++
h
)
{
int
w
=
0
;
int
num_vectors
=
0
;
if
(
!
((
h
==
0
&&
padding_top
>
0
)
||
(
h
==
out_height
-
1
&&
padding_bottom
>
0
)))
{
r0
=
input
+
input_offset
+
(
h
*
2
-
padding_top
)
*
in_width
;
r1
=
r0
+
in_width
;
if
(
padding_left
>
0
)
{
*
outptr
=
(
r0
[
0
]
+
r1
[
0
])
*
0.25
;
++
r0
;
++
r1
;
++
outptr
;
++
w
;
}
if
(
padding_right
>
0
)
{
num_vectors
=
(
out_width
-
w
-
1
)
>>
2
;
}
else
{
num_vectors
=
(
out_width
-
w
)
>>
2
;
}
}
w
+=
num_vectors
<<
2
;
float32x4_t
factors
=
vld1q_f32
(
avg_factors
);
for
(;
num_vectors
>
0
;
--
num_vectors
)
{
float32x4_t
r00
=
vld1q_f32
(
r0
);
float32x4_t
r10
=
vld1q_f32
(
r1
);
float32x4_t
r01
=
vld1q_f32
(
r0
+
4
);
float32x4_t
r11
=
vld1q_f32
(
r1
+
4
);
float32x4_t
sum0
=
vaddq_f32
(
r00
,
r10
);
float32x4_t
sum1
=
vaddq_f32
(
r01
,
r11
);
float32x4_t
sum_result
=
vpaddq_f32
(
sum0
,
sum1
);
float32x4_t
avg_result
=
vmulq_f32
(
sum_result
,
factors
);
vst1q_f32
(
outptr
,
avg_result
);
r0
+=
8
;
r1
+=
8
;
outptr
+=
4
;
}
for
(;
w
<
out_width
;
++
w
)
{
float
sum
=
0.0
;
for
(
int
kh
=
0
;
kh
<
2
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
2
;
++
kw
)
{
int
inh
=
h
*
2
-
padding_top
+
kh
;
int
inw
=
w
*
2
-
padding_left
+
kw
;
if
(
inh
>=
0
&&
inh
<
in_height
&&
inw
>=
0
&&
inw
<
in_width
)
{
sum
+=
input
[
input_offset
+
inh
*
in_width
+
inw
];
}
}
}
*
outptr
=
sum
*
0.25
;
++
outptr
;
}
}
input_offset
+=
in_image_size
;
output_offset
+=
out_image_size
;
}
}
}
// assume the input has already been padded
void
PoolingAvgNeonK2x2S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
)
{
index_t
batch
=
in_shape
[
0
];
index_t
channels
=
in_shape
[
1
];
index_t
in_height
=
in_shape
[
2
];
index_t
in_width
=
in_shape
[
3
];
index_t
out_height
=
out_shape
[
2
];
index_t
out_width
=
out_shape
[
3
];
int
in_image_size
=
in_height
*
in_width
;
int
out_image_size
=
out_height
*
out_width
;
index_t
input_offset
=
0
;
index_t
output_offset
=
0
;
float
avg_factors
[
4
]
=
{
0.25
,
0.25
,
0.25
,
0.25
};
#pragma omp parallel for collapse(2)
for
(
int
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
const
float
*
img0
=
input
+
input_offset
;
float
*
outptr
=
output
+
output_offset
;
const
float
*
r0
=
img0
;
const
float
*
r1
=
img0
+
in_width
;
for
(
int
h
=
0
;
h
<
out_height
;
++
h
)
{
int
num_vectors
=
out_width
>>
2
;
int
remain
=
out_width
-
(
num_vectors
<<
2
);
float32x4_t
factors
=
vld1q_f32
(
avg_factors
);
for
(;
num_vectors
>
0
;
--
num_vectors
)
{
float32x4_t
r00
=
vld1q_f32
(
r0
);
float32x4_t
r10
=
vld1q_f32
(
r1
);
float32x4_t
r01
=
vld1q_f32
(
r0
+
4
);
float32x4_t
r11
=
vld1q_f32
(
r1
+
4
);
float32x4_t
sum0
=
vaddq_f32
(
r00
,
r10
);
float32x4_t
sum1
=
vaddq_f32
(
r01
,
r11
);
float32x4_t
sum_result
=
vpaddq_f32
(
sum0
,
sum1
);
float32x4_t
avg_result
=
vmulq_f32
(
sum_result
,
factors
);
vst1q_f32
(
outptr
,
avg_result
);
r0
+=
8
;
r1
+=
8
;
outptr
+=
4
;
}
for
(;
remain
>
0
;
--
remain
)
{
*
outptr
=
(
r0
[
0
]
+
r0
[
1
]
+
r1
[
0
]
+
r1
[
1
])
*
0.25
;
r0
+=
2
;
r1
+=
2
;
outptr
++
;
}
r0
+=
in_width
;
r1
+=
in_width
;
}
input_offset
+=
in_image_size
;
output_offset
+=
out_image_size
;
}
}
}
}
// namespace kernels
}
// namespace mace
mace/kernels/neon/avg_pooling_neon_3x3.cc
0 → 100644
浏览文件 @
e5df2906
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include <float.h>
#include <limits>
#include "mace/core/common.h"
namespace
mace
{
namespace
kernels
{
void
PoolingAvgNeonK3x3S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
const
int
*
paddings
)
{
index_t
batch
=
in_shape
[
0
];
index_t
channels
=
in_shape
[
1
];
index_t
in_height
=
in_shape
[
2
];
index_t
in_width
=
in_shape
[
3
];
index_t
out_height
=
out_shape
[
2
];
index_t
out_width
=
out_shape
[
3
];
int
padding_top
=
paddings
[
0
]
/
2
;
int
padding_bottom
=
paddings
[
0
]
-
padding_top
;
int
padding_left
=
paddings
[
1
]
/
2
;
int
padding_right
=
paddings
[
1
]
-
padding_left
;
int
in_image_size
=
in_height
*
in_width
;
int
out_image_size
=
out_height
*
out_width
;
index_t
input_offset
=
0
;
index_t
output_offset
=
0
;
float
avg_factors
[
4
]
=
{
1.0
/
9.0
,
1.0
/
9.0
,
1.0
/
9.0
,
1.0
/
9.0
};
#pragma omp parallel for collapse(2)
for
(
int
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
float
*
outptr
=
output
+
output_offset
;
for
(
int
h
=
0
;
h
<
out_height
;
++
h
)
{
int
w
=
0
;
int
num_vectors
=
0
;
const
float
*
r0
,
*
r1
,
*
r2
;
if
(
!
((
h
==
0
&&
padding_top
>
0
)
||
(
h
==
out_height
-
1
&&
padding_bottom
>
0
)))
{
r0
=
input
+
input_offset
+
(
h
*
2
-
padding_top
)
*
in_width
;
r1
=
r0
+
in_width
;
r2
=
r1
+
in_width
;
if
(
padding_left
>
0
)
{
if
(
padding_left
==
1
)
{
float
sum0
=
std
::
max
(
r0
[
0
],
r0
[
1
]);
float
sum1
=
std
::
max
(
r1
[
0
],
r1
[
1
]);
float
max2
=
std
::
max
(
r2
[
0
],
r2
[
1
]);
*
outptr
=
(
r0
[
0
]
+
r0
[
1
]
+
r1
[
0
]
+
r1
[
1
]
+
r2
[
0
]
+
r2
[
1
])
/
9.0
;
++
r0
;
++
r1
;
}
else
{
// padding_left == 2
*
outptr
=
(
r0
[
0
]
+
r1
[
0
]
+
r2
[
0
])
/
9.0
;
}
++
outptr
;
++
w
;
}
if
(
padding_right
>
0
)
{
num_vectors
=
(
out_width
-
w
-
1
)
>>
2
;
}
else
{
num_vectors
=
(
out_width
-
w
)
>>
2
;
}
}
w
+=
num_vectors
<<
2
;
float32x4_t
factors
=
vld1q_f32
(
avg_factors
);
float32x4x2_t
row0
=
vld2q_f32
(
r0
);
float32x4x2_t
row1
=
vld2q_f32
(
r1
);
float32x4x2_t
row2
=
vld2q_f32
(
r2
);
for
(;
num_vectors
>
0
;
--
num_vectors
)
{
float32x4x2_t
row0_next
=
vld2q_f32
(
r0
+
8
);
float32x4x2_t
row1_next
=
vld2q_f32
(
r1
+
8
);
float32x4x2_t
row2_next
=
vld2q_f32
(
r2
+
8
);
float32x4_t
sum0
=
vaddq_f32
(
row0
.
val
[
0
],
row0
.
val
[
1
]);
float32x4_t
sum1
=
vaddq_f32
(
row1
.
val
[
0
],
row1
.
val
[
1
]);
float32x4_t
sum2
=
vaddq_f32
(
row2
.
val
[
0
],
row2
.
val
[
1
]);
float32x4_t
row02
=
vextq_f32
(
row0
.
val
[
0
],
row0_next
.
val
[
0
],
1
);
float32x4_t
row12
=
vextq_f32
(
row1
.
val
[
0
],
row1_next
.
val
[
0
],
1
);
float32x4_t
row22
=
vextq_f32
(
row2
.
val
[
0
],
row2_next
.
val
[
0
],
1
);
sum0
=
vaddq_f32
(
sum0
,
row02
);
sum1
=
vaddq_f32
(
sum1
,
row12
);
sum2
=
vaddq_f32
(
sum2
,
row22
);
float32x4_t
sum_result
=
vaddq_f32
(
vaddq_f32
(
sum0
,
sum1
),
sum2
);
float32x4_t
avg_result
=
vmulq_f32
(
sum_result
,
factors
);
vst1q_f32
(
outptr
,
avg_result
);
row0
=
row0_next
;
row1
=
row1_next
;
row2
=
row2_next
;
r0
+=
8
;
r1
+=
8
;
r2
+=
8
;
outptr
+=
4
;
}
for
(;
w
<
out_width
;
++
w
)
{
float
sum
=
0.0
;
for
(
int
kh
=
0
;
kh
<
3
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
3
;
++
kw
)
{
int
inh
=
h
*
2
-
padding_top
+
kh
;
int
inw
=
w
*
2
-
padding_left
+
kw
;
if
(
inh
>=
0
&&
inh
<
in_height
&&
inw
>=
0
&&
inw
<
in_width
)
{
sum
+=
input
[
input_offset
+
inh
*
in_width
+
inw
];
}
}
}
*
outptr
=
sum
/
9.0
;
++
outptr
;
}
}
input_offset
+=
in_image_size
;
output_offset
+=
out_image_size
;
}
}
}
// assume the input has already been padded
void
PoolingAvgNeonK3x3S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
)
{
index_t
batch
=
in_shape
[
0
];
index_t
channels
=
in_shape
[
1
];
index_t
in_height
=
in_shape
[
2
];
index_t
in_width
=
in_shape
[
3
];
index_t
out_height
=
out_shape
[
2
];
index_t
out_width
=
out_shape
[
3
];
int
in_image_size
=
in_height
*
in_width
;
int
out_image_size
=
out_height
*
out_width
;
index_t
input_offset
=
0
;
index_t
output_offset
=
0
;
float
avg_factors
[
4
]
=
{
1.0
/
9.0
,
1.0
/
9.0
,
1.0
/
9.0
,
1.0
/
9.0
};
#pragma omp parallel for collapse(2)
for
(
int
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
const
float
*
img0
=
input
+
input_offset
;
float
*
outptr
=
output
+
output_offset
;
const
float
*
r0
=
img0
;
const
float
*
r1
=
r0
+
in_width
;
const
float
*
r2
=
r1
+
in_width
;
for
(
int
h
=
0
;
h
<
out_height
;
h
++
)
{
int
num_vectors
=
out_width
>>
2
;
int
remain
=
out_width
-
(
num_vectors
<<
2
);
float32x4_t
factors
=
vld1q_f32
(
avg_factors
);
float32x4x2_t
row0
=
vld2q_f32
(
r0
);
float32x4x2_t
row1
=
vld2q_f32
(
r1
);
float32x4x2_t
row2
=
vld2q_f32
(
r2
);
for
(;
num_vectors
>
0
;
--
num_vectors
)
{
float32x4x2_t
row0_next
=
vld2q_f32
(
r0
+
8
);
float32x4x2_t
row1_next
=
vld2q_f32
(
r1
+
8
);
float32x4x2_t
row2_next
=
vld2q_f32
(
r2
+
8
);
float32x4_t
sum0
=
vaddq_f32
(
row0
.
val
[
0
],
row0
.
val
[
1
]);
float32x4_t
sum1
=
vaddq_f32
(
row1
.
val
[
0
],
row1
.
val
[
1
]);
float32x4_t
sum2
=
vaddq_f32
(
row2
.
val
[
0
],
row2
.
val
[
1
]);
float32x4_t
row02
=
vextq_f32
(
row0
.
val
[
0
],
row0_next
.
val
[
0
],
1
);
float32x4_t
row12
=
vextq_f32
(
row1
.
val
[
0
],
row1_next
.
val
[
0
],
1
);
float32x4_t
row22
=
vextq_f32
(
row2
.
val
[
0
],
row2_next
.
val
[
0
],
1
);
sum0
=
vaddq_f32
(
sum0
,
row02
);
sum1
=
vaddq_f32
(
sum1
,
row12
);
sum2
=
vaddq_f32
(
sum2
,
row22
);
float32x4_t
sum_result
=
vaddq_f32
(
vaddq_f32
(
sum0
,
sum1
),
sum2
);
float32x4_t
avg_result
=
vmulq_f32
(
sum_result
,
factors
);
vst1q_f32
(
outptr
,
avg_result
);
row0
=
row0_next
;
row1
=
row1_next
;
row2
=
row2_next
;
r0
+=
8
;
r1
+=
8
;
r2
+=
8
;
outptr
+=
4
;
}
for
(;
remain
>
0
;
remain
--
)
{
*
outptr
=
(
r0
[
0
]
+
r0
[
1
]
+
r0
[
2
]
+
r1
[
0
]
+
r1
[
1
]
+
r1
[
2
]
+
r2
[
0
]
+
r2
[
1
]
+
r2
[
2
])
/
9.0
;
r0
+=
2
;
r1
+=
2
;
r2
+=
2
;
outptr
++
;
}
r0
+=
1
+
in_width
;
r1
+=
1
+
in_width
;
r2
+=
1
+
in_width
;
}
input_offset
+=
in_image_size
;
output_offset
+=
out_image_size
;
}
}
}
}
// namespace kernels
}
// namespace mace
mace/kernels/neon/pooling_neon.cc
浏览文件 @
e5df2906
...
...
@@ -15,51 +15,95 @@ extern void PoolingMaxNeonK2x2S2x2(const float *input,
const
index_t
*
out_shape
,
const
int
*
paddings
);
extern
void
PoolingAvgNeonK2x2S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
const
int
*
paddings
);
extern
void
PoolingMaxNeonK3x3S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
const
int
*
paddings
);
extern
void
PoolingAvgNeonK3x3S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
const
int
*
paddings
);
#ifdef __COPY_MAKE_PADDING
extern
void
PoolingMaxNeonK2x2S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
);
extern
void
PoolingAvgNeonK2x2S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
);
extern
void
PoolingMaxNeonK3x3S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
);
extern
void
PoolingAvgNeonK3x3S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
);
#endif
template
<
>
template
<
>
void
PoolingFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
index_t
*
input_shape
,
float
*
output
,
const
index_t
*
output_shape
)
{
#ifdef __COPY_MAKE_PADDING
Tensor
padded_input
;
ConstructInputWithPadding
(
input
,
input_shape
,
paddings_
,
&
padded_input
);
input
=
padded_input
.
data
<
float
>
();
input_shape
=
padded_input
.
shape
().
data
();
#endif
if
(
kernels_
[
0
]
==
2
&&
kernels_
[
1
]
==
2
&&
strides_
[
0
]
==
2
&&
strides_
[
1
]
==
2
&&
pooling_type_
==
MAX
)
{
strides_
[
1
]
==
2
)
{
// kernel_size: 2x2, strides: 2x2
if
(
pooling_type_
==
MAX
)
{
// MAX_POOL_2x2s2x2
#ifdef __COPY_MAKE_PADDING
PoolingMaxNeonK2x2S2x2Padded
(
input
,
input_shape
,
output
,
output_shape
);
#else
PoolingMaxNeonK2x2S2x2
(
input
,
input_shape
,
output
,
output_shape
,
paddings_
);
#endif
}
else
{
// AVG_POOL_2x2s2x2
#ifdef __COPY_MAKE_PADDING
Tensor
padded_input
;
ConstructInputWithPadding
(
input
,
input_shape
,
paddings_
,
&
padded_input
);
input
=
padded_input
.
data
<
float
>
();
input_shape
=
padded_input
.
shape
().
data
();
PoolingMaxNeonK2x2S2x2Padded
(
input
,
input_shape
,
output
,
output_shape
);
PoolingAvgNeonK2x2S2x2Padded
(
input
,
input_shape
,
output
,
output_shape
);
#else
PoolingMaxNeonK2x2S2x2
(
input
,
input_shape
,
output
,
output_shape
,
paddings_
);
PoolingAvgNeonK2x2S2x2
(
input
,
input_shape
,
output
,
output_shape
,
paddings_
);
#endif
}
}
else
if
(
kernels_
[
0
]
==
3
&&
kernels_
[
1
]
==
3
&&
strides_
[
0
]
==
2
&&
strides_
[
1
]
==
2
&&
pooling_type_
==
MAX
)
{
strides_
[
1
]
==
2
)
{
// kernel_size: 3x3, strides: 2x2
if
(
pooling_type_
==
MAX
)
{
// MAX_POOL_3x3s2x2
#ifdef __COPY_MAKE_PADDING
PoolingMaxNeonK3x3S2x2Padded
(
input
,
input_shape
,
output
,
output_shape
);
#else
PoolingMaxNeonK3x3S2x2
(
input
,
input_shape
,
output
,
output_shape
,
paddings_
);
#endif
}
else
{
// AVG_POOL_3x3s2x2
#ifdef __COPY_MAKE_PADDING
Tensor
padded_input
;
ConstructInputWithPadding
(
input
,
input_shape
,
paddings_
,
&
padded_input
);
input
=
padded_input
.
data
<
float
>
();
input_shape
=
padded_input
.
shape
().
data
();
PoolingMaxNeonK3x3S2x2V2Padded
(
input
,
input_shape
,
output
,
output_shape
);
PoolingAvgNeonK3x3S2x2Padded
(
input
,
input_shape
,
output
,
output_shape
);
#else
PoolingMaxNeonK3x3S2x2
(
input
,
input_shape
,
output
,
output_shape
,
paddings_
);
PoolingAvgNeonK3x3S2x2
(
input
,
input_shape
,
output
,
output_shape
,
paddings_
);
#endif
}
}
else
{
// not implement yet
PoolingFunctor
<
DeviceType
::
CPU
,
float
>
(
pooling_type_
,
kernels_
,
strides_
,
paddings_
,
dilations_
)(
...
...
mace/ops/pooling_test.cc
浏览文件 @
e5df2906
...
...
@@ -148,14 +148,14 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) {
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
1
,
4
,
5
}
,
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
});
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
1
,
2
,
9
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
});
// Run
net
.
RunOp
(
DeviceType
::
NEON
);
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
6
,
8
,
9
,
16
,
18
,
19
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
1
,
5
},
{
10
,
12
,
14
,
16
,
17
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
...
...
@@ -172,18 +172,50 @@ TEST_F(PoolingOpTest, MAX_k3x3s2x2) {
net
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
MAX
);
net
.
AddIntsArg
(
"kernels"
,
{
3
,
3
});
net
.
AddIntsArg
(
"strides"
,
{
2
,
2
});
net
.
AddIntArg
(
"padding"
,
Padding
::
VALID
);
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
1
,
3
,
9
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
});
// Run
net
.
RunOp
(
DeviceType
::
NEON
);
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
1
,
4
},
{
20
,
22
,
24
,
26
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
TEST_F
(
PoolingOpTest
,
AVG_k2x2s2x2
)
{
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
AVG
);
net
.
AddIntsArg
(
"kernels"
,
{
2
,
2
});
net
.
AddIntsArg
(
"strides"
,
{
2
,
2
});
net
.
AddIntArg
(
"padding"
,
Padding
::
SAME
);
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
1
,
4
,
5
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
});
"Input"
,
{
1
,
1
,
2
,
8
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
});
// Run
net
.
RunOp
(
DeviceType
::
NEON
);
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
11
,
13
,
14
,
16
,
18
,
19
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
1
,
4
},
{
4.5
,
6.5
,
8.5
,
10.5
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录