Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
0c54e020
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
0c54e020
编写于
9月 18, 2017
作者:
李
李寅
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Implement ReluN
上级
291a5ee6
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
89 addition
and
26 deletion
+89
-26
mace/kernels/neon/relu_neon.cc
mace/kernels/neon/relu_neon.cc
+45
-18
mace/kernels/relu.h
mace/kernels/relu.h
+10
-2
mace/ops/pooling_test.cc
mace/ops/pooling_test.cc
+4
-4
mace/ops/relu.h
mace/ops/relu.h
+4
-1
mace/ops/relu_test.cc
mace/ops/relu_test.cc
+26
-1
未找到文件。
mace/kernels/neon/relu_neon.cc
浏览文件 @
0c54e020
...
...
@@ -12,26 +12,53 @@ template <>
void
ReluFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
float
*
output
,
index_t
size
)
{
#pragma omp parallel for num_threads(1) // no significant performance improve
for
(
int64_t
i
=
0
;
i
<
size
;
i
+=
kCostPerGroup
)
{
int64_t
count
=
std
::
min
(
static_cast
<
int64_t
>
(
kCostPerGroup
),
size
-
i
);
int
nn
=
count
>>
2
;
int
remain
=
count
-
(
nn
<<
2
);
const
float
*
inptr
=
input
+
i
;
float
*
outptr
=
output
+
i
;
float32x4_t
_zero
=
vdupq_n_f32
(
0.
f
);
for
(;
nn
>
0
;
--
nn
)
{
float32x4_t
_inptr
=
vld1q_f32
(
inptr
);
float32x4_t
_outptr
=
vmaxq_f32
(
_inptr
,
_zero
);
vst1q_f32
(
outptr
,
_outptr
);
if
(
max_limit_
<
0
)
{
#pragma omp parallel for num_threads(1) // no significant perf improve
for
(
int64_t
i
=
0
;
i
<
size
;
i
+=
kCostPerGroup
)
{
int64_t
count
=
std
::
min
(
static_cast
<
int64_t
>
(
kCostPerGroup
),
size
-
i
);
int
block
=
count
>>
2
;
int
remain
=
count
-
(
block
<<
2
);
const
float
*
inptr
=
input
+
i
;
float
*
outptr
=
output
+
i
;
float32x4_t
zero
=
vdupq_n_f32
(
0.
f
);
for
(;
block
>
0
;
--
block
)
{
float32x4_t
in
=
vld1q_f32
(
inptr
);
float32x4_t
out
=
vmaxq_f32
(
in
,
zero
);
vst1q_f32
(
outptr
,
out
);
inptr
+=
4
;
outptr
+=
4
;
inptr
+=
4
;
outptr
+=
4
;
}
for
(;
remain
>
0
;
--
remain
)
{
*
outptr
=
std
::
max
(
*
inptr
,
0.
f
);
++
inptr
;
++
outptr
;
}
}
for
(;
remain
>
0
;
--
remain
)
{
*
outptr
=
std
::
max
(
*
inptr
,
0.
f
);
++
inptr
;
++
outptr
;
}
else
{
#pragma omp parallel for num_threads(1) // no significant perf improve
for
(
int64_t
i
=
0
;
i
<
size
;
i
+=
kCostPerGroup
)
{
int64_t
count
=
std
::
min
(
static_cast
<
int64_t
>
(
kCostPerGroup
),
size
-
i
);
int
block
=
count
>>
2
;
int
remain
=
count
-
(
block
<<
2
);
const
float
*
inptr
=
input
+
i
;
float
*
outptr
=
output
+
i
;
float32x4_t
zero
=
vdupq_n_f32
(
0.
f
);
float32x4_t
vmax
=
vdupq_n_f32
(
max_limit_
);
for
(;
block
>
0
;
--
block
)
{
float32x4_t
in
=
vld1q_f32
(
inptr
);
float32x4_t
out
=
vmaxq_f32
(
in
,
zero
);
out
=
vminq_f32
(
out
,
vmax
);
vst1q_f32
(
outptr
,
out
);
inptr
+=
4
;
outptr
+=
4
;
}
for
(;
remain
>
0
;
--
remain
)
{
*
outptr
=
std
::
min
(
std
::
max
(
*
inptr
,
0.
f
),
max_limit_
);
++
inptr
;
++
outptr
;
}
}
}
};
...
...
mace/kernels/relu.h
浏览文件 @
0c54e020
...
...
@@ -12,9 +12,17 @@ namespace kernels {
template
<
DeviceType
D
,
typename
T
>
struct
ReluFunctor
{
T
max_limit_
;
void
operator
()(
const
T
*
input
,
T
*
output
,
index_t
size
)
{
for
(
index_t
i
=
0
;
i
<
size
;
++
i
)
{
output
[
i
]
=
std
::
max
(
input
[
i
],
static_cast
<
T
>
(
0
));
if
(
max_limit_
<
0
)
{
for
(
index_t
i
=
0
;
i
<
size
;
++
i
)
{
output
[
i
]
=
std
::
max
(
input
[
i
],
static_cast
<
T
>
(
0
));
}
}
else
{
for
(
index_t
i
=
0
;
i
<
size
;
++
i
)
{
output
[
i
]
=
std
::
min
(
std
::
max
(
input
[
i
],
static_cast
<
T
>
(
0
)),
max_limit_
);
}
}
}
};
...
...
mace/ops/pooling_test.cc
浏览文件 @
0c54e020
...
...
@@ -155,9 +155,9 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) {
net
.
RunOp
(
DeviceType
::
NEON
);
// Check
Tensor
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
6
,
8
,
9
,
16
,
18
,
19
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
6
,
8
,
9
,
16
,
18
,
19
});
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
TEST_F
(
PoolingOpTest
,
MAX_k3x3s2x2
)
{
...
...
@@ -183,7 +183,7 @@ TEST_F(PoolingOpTest, MAX_k3x3s2x2) {
net
.
RunOp
(
DeviceType
::
NEON
);
// Check
Tensor
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
11
,
13
,
14
,
16
,
18
,
19
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
11
,
13
,
14
,
16
,
18
,
19
});
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
mace/ops/relu.h
浏览文件 @
0c54e020
...
...
@@ -14,7 +14,10 @@ template <DeviceType D, class T>
class
ReluOp
:
public
Operator
<
D
,
T
>
{
public:
ReluOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
)
{}
:
Operator
<
D
,
T
>
(
operator_def
,
ws
)
{
functor_
.
max_limit_
=
OperatorBase
::
GetSingleArgument
<
T
>
(
"max_limit"
,
static_cast
<
T
>
(
-
1
));
}
bool
Run
()
override
{
const
Tensor
*
input_tensor
=
this
->
inputs_
[
0
];
Tensor
*
output_tensor
=
this
->
outputs_
[
0
];
...
...
mace/ops/relu_test.cc
浏览文件 @
0c54e020
...
...
@@ -18,7 +18,7 @@ TEST_F(ReluOpTest, ReluOp) {
.
Finalize
(
net
.
operator_def
());
// Add input data
net
.
AddRandomInput
<
float
>
(
"Input"
,
{
1
,
2
,
3
,
4
});
net
.
AddRandomInput
<
float
>
(
"Input"
,
{
1
,
2
,
3
,
5
});
// Run
net
.
RunOp
();
...
...
@@ -32,4 +32,29 @@ TEST_F(ReluOpTest, ReluOp) {
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
}
TEST_F
(
ReluOpTest
,
ReluOpWithMax
)
{
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"Relu"
,
"ReluTestWithMax"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add input data
net
.
AddRandomInput
<
float
>
(
"Input"
,
{
1
,
2
,
3
,
5
});
net
.
AddFloatArg
(
"max_limit"
,
0.5
);
// Run
net
.
RunOp
();
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Check
net
.
RunOp
(
DeviceType
::
NEON
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
}
}
// namespace mace
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录