Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
7b6c3241
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
7b6c3241
编写于
9月 19, 2017
作者:
W
wuchenghui
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
global avg pooling
上级
5b21653b
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
284 addition
and
0 deletion
+284
-0
mace/kernels/global_avg_pooling.h
mace/kernels/global_avg_pooling.h
+48
-0
mace/kernels/neon/global_avg_pooling_neon.cc
mace/kernels/neon/global_avg_pooling_neon.cc
+57
-0
mace/ops/global_avg_pooling.cc
mace/ops/global_avg_pooling.cc
+17
-0
mace/ops/global_avg_pooling.h
mace/ops/global_avg_pooling.h
+43
-0
mace/ops/global_avg_pooling_benchmark.cc
mace/ops/global_avg_pooling_benchmark.cc
+58
-0
mace/ops/global_avg_pooling_test.cc
mace/ops/global_avg_pooling_test.cc
+61
-0
未找到文件。
mace/kernels/global_avg_pooling.h
0 → 100644
浏览文件 @
7b6c3241
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_KERNELS_GLOBAL_AVG_POOLING_H_
#define MACE_KERNELS_GLOBAL_AVG_POOLING_H_
#include "mace/core/tensor.h"
namespace
mace
{
namespace
kernels
{
template
<
DeviceType
D
,
typename
T
>
class
GlobalAvgPoolingFunctor
{
public:
GlobalAvgPoolingFunctor
()
{}
void
operator
()(
const
T
*
input
,
const
index_t
*
input_shape
,
T
*
output
)
{
index_t
batch
=
input_shape
[
0
];
index_t
channels
=
input_shape
[
1
];
index_t
height
=
input_shape
[
2
];
index_t
width
=
input_shape
[
3
];
index_t
image_size
=
height
*
width
;
index_t
input_offset
=
0
;
index_t
total_channels
=
batch
*
channels
;
for
(
int
c
=
0
;
c
<
total_channels
;
++
c
)
{
T
sum
=
0
;
for
(
int
i
=
0
;
i
<
image_size
;
++
i
)
{
sum
+=
input
[
input_offset
+
i
];
}
output
[
c
]
=
sum
/
image_size
;
input_offset
+=
image_size
;
}
}
};
template
<
>
void
GlobalAvgPoolingFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
index_t
*
input_shape
,
float
*
output
);
}
// namespace kernels
}
// namespace mace
#endif // MACE_KERNELS_GLOBAL_AVG_POOLING_H_
\ No newline at end of file
mace/kernels/neon/global_avg_pooling_neon.cc
0 → 100644
浏览文件 @
7b6c3241
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/kernels/global_avg_pooling.h"
#include <arm_neon.h>
namespace
mace
{
namespace
kernels
{
template
<
>
void
GlobalAvgPoolingFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
index_t
*
input_shape
,
float
*
output
)
{
index_t
batch
=
input_shape
[
0
];
index_t
channels
=
input_shape
[
1
];
index_t
height
=
input_shape
[
2
];
index_t
width
=
input_shape
[
3
];
index_t
image_size
=
height
*
width
;
index_t
input_offset
=
0
;
index_t
total_channels
=
batch
*
channels
;
#pragma omp parallel for
for
(
int
c
=
0
;
c
<
total_channels
;
++
c
)
{
const
float
*
inptr
=
input
+
c
*
image_size
;
float
sum
=
0.0
;
int
num_vectors
=
image_size
>>
2
;
int
remain
=
image_size
-
(
num_vectors
<<
2
);
if
(
num_vectors
>
0
)
{
float
sum_out
[
4
]
=
{
0.0
,
0.0
,
0.0
,
0.0
};
float32x4_t
sum_vector
=
vld1q_f32
(
inptr
);
inptr
+=
4
;
for
(
int
n
=
1
;
n
<
num_vectors
;
++
n
)
{
float32x4_t
vector
=
vld1q_f32
(
inptr
);
sum_vector
=
vaddq_f32
(
sum_vector
,
vector
);
inptr
+=
4
;
}
vst1q_f32
(
sum_out
,
sum_vector
);
sum
=
sum_out
[
0
]
+
sum_out
[
1
]
+
sum_out
[
2
]
+
sum_out
[
3
];
}
for
(
int
i
=
0
;
i
<
remain
;
++
i
)
{
sum
+=
*
inptr
;
++
inptr
;
}
output
[
c
]
=
sum
/
image_size
;
}
};
}
// namespace kernels
}
// namespace mace
\ No newline at end of file
mace/ops/global_avg_pooling.cc
0 → 100644
浏览文件 @
7b6c3241
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/ops/global_avg_pooling.h"
namespace
mace
{
REGISTER_CPU_OPERATOR
(
GlobalAvgPooling
,
GlobalAvgPoolingOp
<
DeviceType
::
CPU
,
float
>
);
#if __ARM_NEON
REGISTER_NEON_OPERATOR
(
GlobalAvgPooling
,
GlobalAvgPoolingOp
<
DeviceType
::
NEON
,
float
>
);
#endif // __ARM_NEON
}
// namespace mace
mace/ops/global_avg_pooling.h
0 → 100644
浏览文件 @
7b6c3241
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_OPS_GLOBAL_AVG_POOLING_H_
#define MACE_OPS_GLOBAL_AVG_POOLING_H_
#include "mace/core/operator.h"
#include "mace/kernels/global_avg_pooling.h"
namespace
mace
{
template
<
DeviceType
D
,
class
T
>
class
GlobalAvgPoolingOp
:
public
Operator
<
D
,
T
>
{
public:
GlobalAvgPoolingOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
)
{}
bool
Run
()
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
std
::
vector
<
index_t
>
output_shape
(
4
);
output_shape
[
0
]
=
input
->
shape
()[
0
];
output_shape
[
1
]
=
input
->
shape
()[
1
];
output_shape
[
2
]
=
output_shape
[
3
]
=
1
;
output
->
Resize
(
output_shape
);
auto
pooling_func
=
kernels
::
GlobalAvgPoolingFunctor
<
D
,
T
>
();
pooling_func
(
input
->
data
<
float
>
(),
input
->
shape
().
data
(),
output
->
mutable_data
<
float
>
());
return
true
;
}
protected:
OP_INPUT_TAGS
(
INPUT
);
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace mace
#endif // MACE_OPS_GLOBAL_AVG_POOLING_H_
mace/ops/global_avg_pooling_benchmark.cc
0 → 100644
浏览文件 @
7b6c3241
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/kernels/global_avg_pooling.h"
#include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h"
using
namespace
mace
;
using
namespace
mace
::
kernels
;
template
<
DeviceType
D
>
static
void
GlobalAvgPooling
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
)
{
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
OpDefBuilder
(
"GlobalAvgPooling"
,
"GlobalAvgPoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add input data
net
.
AddRandomInput
<
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
// Warm-up
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
net
.
RunOp
(
D
);
}
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
net
.
RunOp
(
D
);
}
}
#define BM_GLOBAL_AVG_POOLING_MACRO(N, C, H, W, DEVICE) \
static void \
BM_GLOBAL_AVG_POOLING_##N##_##C##_##H##_##W##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot*(sizeof(float))); \
GlobalAvgPooling<DEVICE>(iters, N, C, H, W); \
} \
BENCHMARK(BM_GLOBAL_AVG_POOLING_##N##_##C##_##H##_##W##_##DEVICE)
#define BM_GLOBAL_AVG_POOLING(N, C, H, W) \
BM_GLOBAL_AVG_POOLING_MACRO(N, C, H, W, CPU); \
BM_GLOBAL_AVG_POOLING_MACRO(N, C, H, W, NEON);
BM_GLOBAL_AVG_POOLING
(
1
,
3
,
7
,
7
);
BM_GLOBAL_AVG_POOLING
(
1
,
3
,
64
,
64
);
BM_GLOBAL_AVG_POOLING
(
1
,
3
,
256
,
256
);
\ No newline at end of file
mace/ops/global_avg_pooling_test.cc
0 → 100644
浏览文件 @
7b6c3241
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
using
namespace
mace
;
class
GlobalAvgPoolingOpTest
:
public
OpsTestBase
{};
TEST_F
(
GlobalAvgPoolingOpTest
,
3
x7x7_CPU
)
{
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"GlobalAvgPooling"
,
"GlobalAvgPoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add input data
std
::
vector
<
float
>
input
(
147
);
for
(
int
i
=
0
;
i
<
147
;
++
i
)
{
input
[
i
]
=
i
/
49
+
1
;
}
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
3
,
7
,
7
},
input
);
// Run
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
3
,
1
,
1
},
{
1
,
2
,
3
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
TEST_F
(
GlobalAvgPoolingOpTest
,
3
x7x7_NEON
)
{
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"GlobalAvgPooling"
,
"GlobalAvgPoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add input data
std
::
vector
<
float
>
input
(
147
);
for
(
int
i
=
0
;
i
<
147
;
++
i
)
{
input
[
i
]
=
i
/
49
+
1
;
}
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
3
,
7
,
7
},
input
);
// Run
net
.
RunOp
(
DeviceType
::
NEON
);
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
3
,
1
,
1
},
{
1
,
2
,
3
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录