Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
303311af
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
303311af
编写于
9月 06, 2017
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Change index type to TIndex and Add member variable to batch_norm kernel functor.
上级
9aabdccc
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
64 addition
and
72 deletion
+64
-72
mace/core/operator.h
mace/core/operator.h
+2
-2
mace/core/tensor.h
mace/core/tensor.h
+4
-11
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+29
-32
mace/kernels/neon/batch_norm_neon.cc
mace/kernels/neon/batch_norm_neon.cc
+20
-18
mace/ops/batch_norm.cc
mace/ops/batch_norm.cc
+0
-1
mace/ops/batch_norm.h
mace/ops/batch_norm.h
+9
-8
未找到文件。
mace/core/operator.h
浏览文件 @
303311af
...
...
@@ -44,8 +44,8 @@ class OperatorBase {
*
operator_def_
,
name
,
default_value
);
}
inline
const
Tensor
*
Input
(
int
idx
)
{
MACE_CHECK
(
static_cast
<
size_t
>
(
idx
)
<
inputs_
.
size
());
inline
const
Tensor
*
Input
(
TIndex
idx
)
{
MACE_CHECK
(
idx
<
inputs_
.
size
());
return
inputs_
[
idx
];
}
...
...
mace/core/tensor.h
浏览文件 @
303311af
...
...
@@ -68,15 +68,8 @@ class Tensor {
inline
TIndex
dim_size
()
const
{
return
shape_
.
size
();
}
inline
int
dim32
(
int
index
)
const
{
MACE_CHECK
(
static_cast
<
size_t
>
(
index
)
<
shape_
.
size
(),
"Exceeding ndim limit"
);
MACE_CHECK
(
index
>=
0
,
"Cannot have negative dimension index"
);
MACE_CHECK
(
shape_
[
index
],
std
::
numeric_limits
<
int
>::
max
());
return
static_cast
<
int
>
(
shape_
[
index
]);
}
inline
TIndex
dim
(
int
index
)
const
{
MACE_CHECK
(
static_cast
<
size_t
>
(
index
)
<
shape_
.
size
(),
"Exceeding ndim limit"
);
inline
TIndex
dim
(
TIndex
index
)
const
{
MACE_CHECK
(
index
<
shape_
.
size
(),
"Exceeding ndim limit"
);
MACE_CHECK
(
index
>=
0
,
"Cannot have negative dimension index"
);
return
shape_
[
index
];
}
...
...
@@ -133,8 +126,8 @@ class Tensor {
}
template
<
typename
T
>
inline
void
Copy
(
const
T
*
src
,
size_t
size
)
{
MACE_CHECK
(
s
tatic_cast
<
TIndex
>
(
size
)
==
size_
,
"copy src and dst with different size."
);
inline
void
Copy
(
const
T
*
src
,
TIndex
size
)
{
MACE_CHECK
(
s
ize
==
size_
,
"copy src and dst with different size."
);
CopyBytes
(
static_cast
<
const
void
*>
(
src
),
sizeof
(
T
)
*
size
);
}
...
...
mace/kernels/batch_norm.h
浏览文件 @
303311af
...
...
@@ -11,33 +11,29 @@
namespace
mace
{
namespace
kernels
{
template
<
DeviceType
D
,
typename
T
>
struct
BatchNormFunctorBase
{
BatchNormFunctorBase
(
const
float
variance_epsilon
)
:
variance_epsilon_
(
variance_epsilon
){}
template
<
DeviceType
D
>
struct
BatchNormFunctor
{
void
operator
()(
const
float
*
input
,
const
float
*
scale
,
const
float
*
offset
,
const
float
*
mean
,
const
float
*
var
,
const
int
n
,
const
int
channel
,
const
int
sample_size
,
const
float
variance_epsilon
,
float
*
output
)
;
float
variance_epsilon_
;
};
template
<
>
struct
BatchNormFunctor
<
DeviceType
::
CPU
>
{
void
operator
()(
const
float
*
input
,
const
float
*
scale
,
const
float
*
offset
,
const
float
*
mean
,
const
float
*
var
,
const
int
n
,
const
int
channel
,
const
int
sample_size
,
const
float
variance_epsilon
,
float
*
output
)
{
template
<
DeviceType
D
,
typename
T
>
struct
BatchNormFunctor
:
public
BatchNormFunctorBase
<
D
,
T
>
{
BatchNormFunctor
(
const
float
variance_epsilon
)
:
BatchNormFunctorBase
<
D
,
T
>
(
variance_epsilon
){}
void
operator
()(
const
T
*
input
,
const
T
*
scale
,
const
T
*
offset
,
const
T
*
mean
,
const
T
*
var
,
const
TIndex
n
,
const
TIndex
channel
,
const
TIndex
sample_size
,
T
*
output
)
{
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
...
...
@@ -45,21 +41,22 @@ struct BatchNormFunctor<DeviceType::CPU> {
// new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} }
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
float
new_scale
,
new_offset
;
for
(
int
c
=
0
;
c
<
channel
;
++
c
)
{
new_scale
=
scale
[
c
]
/
std
::
sqrt
(
var
[
c
]
+
variance_epsilon
);
T
new_scale
,
new_offset
;
for
(
TIndex
c
=
0
;
c
<
channel
;
++
c
)
{
new_scale
=
scale
[
c
]
/
std
::
sqrt
(
var
[
c
]
+
this
->
variance_epsilon_
);
new_offset
=
offset
[
c
]
-
mean
[
c
]
*
new_scale
;
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
int
pos
=
i
*
channel
*
sample_size
+
c
*
sample_size
;
const
float
*
input_sample_ptr
=
input
+
pos
;
float
*
output_sample_ptr
=
output
+
pos
;
for
(
int
j
=
0
;
j
<
sample_size
;
++
j
)
{
for
(
TIndex
i
=
0
;
i
<
n
;
++
i
)
{
TIndex
pos
=
i
*
channel
*
sample_size
+
c
*
sample_size
;
const
T
*
input_sample_ptr
=
input
+
pos
;
T
*
output_sample_ptr
=
output
+
pos
;
for
(
TIndex
j
=
0
;
j
<
sample_size
;
++
j
)
{
output_sample_ptr
[
j
]
=
new_scale
*
input_sample_ptr
[
j
]
+
new_offset
;
}
}
}
}
};
}
// namepsace kernels
...
...
mace/kernels/neon/batch_norm_neon.cc
浏览文件 @
303311af
...
...
@@ -2,25 +2,27 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#if __ARM_NEON
//
#if __ARM_NEON
#include <arm_neon.h>
#include "mace/kernels/batch_norm.h"
namespace
mace
{
namespace
kernels
{
template
<
>
struct
BatchNormFunctor
<
DeviceType
::
NEON
>
{
void
operator
()(
const
float
*
input
,
const
float
*
scale
,
const
float
*
offset
,
const
float
*
mean
,
const
float
*
var
,
template
<
typename
T
>
struct
BatchNormFunctor
<
DeviceType
::
NEON
>
:
public
BatchNormFunctorBase
<
DeviceType
::
NEON
,
T
>
{
BatchNormFunctor
(
const
float
variance_epsilon
)
:
BatchNormFunctorBase
<
DeviceType
::
NEON
,
T
>
(
variance_epsilon
){}
void
operator
()(
const
T
*
input
,
const
T
*
scale
,
const
T
*
offset
,
const
T
*
mean
,
const
T
*
var
,
const
int
n
,
const
int
channel
,
const
int
sample_size
,
const
float
variance_epsilon
,
float
*
output
)
{
T
*
output
)
{
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
...
...
@@ -29,21 +31,21 @@ struct BatchNormFunctor<DeviceType::NEON> {
// new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} }
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
float
new_scale
,
new_offset
;
T
new_scale
,
new_offset
;
int
count
=
sample_size
>>
2
;
int
remain_count
=
sample_size
-
count
;
for
(
int
c
=
0
;
c
<
channel
;
++
c
)
{
new_scale
=
scale
[
c
]
/
std
::
sqrt
(
var
[
c
]
+
variance_epsilon
);
for
(
TIndex
c
=
0
;
c
<
channel
;
++
c
)
{
new_scale
=
scale
[
c
]
/
std
::
sqrt
(
var
[
c
]
+
variance_epsilon
_
);
new_offset
=
offset
[
c
]
-
mean
[
c
]
*
new_scale
;
float32x4_t
new_scale_f
=
vdupq_n_f32
(
new_scale
);
float32x4_t
new_offset_f
=
vdupq_n_f32
(
new_offset
);
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
int
pos
=
i
*
channel
*
sample_size
+
c
*
sample_size
;
for
(
TIndex
i
=
0
;
i
<
n
;
++
i
)
{
TIndex
pos
=
i
*
channel
*
sample_size
+
c
*
sample_size
;
const
float
*
input_sample_ptr
=
input
+
pos
;
float
*
output_sample_ptr
=
output
+
pos
;
for
(
int
j
=
0
;
j
<
count
;
++
j
)
{
for
(
TIndex
j
=
0
;
j
<
count
;
++
j
)
{
float32x4_t
input_f
=
vld1q_f32
(
input_sample_ptr
);
float32x4_t
output_f
=
new_offset_f
;
output_f
=
vfmaq_f32
(
output_f
,
input_f
,
new_scale_f
);
...
...
@@ -51,7 +53,7 @@ struct BatchNormFunctor<DeviceType::NEON> {
input_sample_ptr
+=
4
;
output_sample_ptr
+=
4
;
}
for
(
int
j
=
0
;
j
<
remain_count
;
++
j
)
{
for
(
TIndex
j
=
0
;
j
<
remain_count
;
++
j
)
{
*
output_sample_ptr
=
new_scale
*
*
input_sample_ptr
+
new_offset
;
++
output_sample_ptr
;
++
input_sample_ptr
;
...
...
@@ -63,4 +65,4 @@ struct BatchNormFunctor<DeviceType::NEON> {
}
// namespace kernels
}
// namespace mace
#endif // __ARM_NEON
//
#endif // __ARM_NEON
mace/ops/batch_norm.cc
浏览文件 @
303311af
...
...
@@ -3,7 +3,6 @@
//
#include "mace/ops/batch_norm.h"
#include "mace/proto/mace.pb.h"
namespace
mace
{
...
...
mace/ops/batch_norm.h
浏览文件 @
303311af
...
...
@@ -14,7 +14,8 @@ template<DeviceType D, class T>
class
BatchNormOp
:
public
Operator
<
D
,
T
>
{
public:
BatchNormOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
)
{}
:
Operator
<
D
,
T
>
(
operator_def
,
ws
),
functor_
(
OperatorBase
::
GetSingleArgument
<
float
>
(
"variance_epsilon"
,
1e-4
)){}
bool
Run
()
override
{
const
Tensor
*
input
=
this
->
Input
(
0
);
...
...
@@ -23,8 +24,6 @@ class BatchNormOp : public Operator<D, T> {
const
Tensor
*
mean
=
this
->
Input
(
3
);
const
Tensor
*
var
=
this
->
Input
(
4
);
const
float
variance_epsilon
=
this
->
template
GetSingleArgument
<
float
>(
"variance_epsilon"
,
1e-4
);
MACE_CHECK
(
input
->
dim_size
()
==
4
,
"input must be 4-dimensional. "
,
input
->
dim_size
());
MACE_CHECK
(
scale
->
dim_size
()
==
1
,
"scale must be 1-dimensional. "
,
scale
->
dim_size
());
MACE_CHECK
(
offset
->
dim_size
()
==
1
,
"offset must be 1-dimensional. "
,
offset
->
dim_size
());
...
...
@@ -34,9 +33,9 @@ class BatchNormOp : public Operator<D, T> {
Tensor
*
output
=
this
->
Output
(
0
);
output
->
ResizeLike
(
input
);
const
int
n
=
input
->
dim32
(
0
);
const
int
channel
=
input
->
dim32
(
1
);
const
int
sample_size
=
input
->
dim32
(
2
)
*
input
->
dim32
(
3
);
const
TIndex
n
=
input
->
dim
(
0
);
const
TIndex
channel
=
input
->
dim
(
1
);
const
TIndex
sample_size
=
input
->
dim
(
2
)
*
input
->
dim
(
3
);
const
float
*
input_ptr
=
input
->
data
<
float
>
();
const
float
*
scale_ptr
=
scale
->
data
<
float
>
();
...
...
@@ -45,11 +44,13 @@ class BatchNormOp : public Operator<D, T> {
const
float
*
var_ptr
=
var
->
data
<
float
>
();
float
*
output_ptr
=
output
->
mutable_data
<
float
>
();
kernels
::
BatchNormFunctor
<
D
>
()
(
input_ptr
,
scale_ptr
,
offset_ptr
,
mean_ptr
,
var_ptr
,
functor_
(
input_ptr
,
scale_ptr
,
offset_ptr
,
mean_ptr
,
var_ptr
,
n
,
channel
,
sample_size
,
variance_epsilon
,
output_ptr
);
output_ptr
);
return
true
;
}
private:
kernels
::
BatchNormFunctor
<
D
,
T
>
functor_
;
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录