Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
6ac95c81
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
6ac95c81
编写于
10月 25, 2017
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'batch_norm_opencl' into 'master'
Add opencl batch norm kernel and fix bugs. See merge request !76
上级
129608cc
e2ae6261
变更
12
显示空白变更内容
内联
并排
Showing
12 changed file
with
332 addition
and
86 deletion
+332
-86
mace/core/runtime/opencl/opencl_runtime.cc
mace/core/runtime/opencl/opencl_runtime.cc
+1
-3
mace/core/runtime/opencl/opencl_runtime.h
mace/core/runtime/opencl/opencl_runtime.h
+7
-4
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+48
-25
mace/kernels/neon/batch_norm_neon.cc
mace/kernels/neon/batch_norm_neon.cc
+26
-18
mace/kernels/opencl/batch_norm_opencl.cc
mace/kernels/opencl/batch_norm_opencl.cc
+46
-0
mace/kernels/opencl/cl/batch_norm.cl
mace/kernels/opencl/cl/batch_norm.cl
+31
-0
mace/ops/BUILD
mace/ops/BUILD
+1
-2
mace/ops/batch_norm.cc
mace/ops/batch_norm.cc
+2
-0
mace/ops/batch_norm.h
mace/ops/batch_norm.h
+1
-14
mace/ops/batch_norm_benchmark.cc
mace/ops/batch_norm_benchmark.cc
+10
-7
mace/ops/batch_norm_test.cc
mace/ops/batch_norm_test.cc
+152
-13
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+7
-0
未找到文件。
mace/core/runtime/opencl/opencl_runtime.cc
浏览文件 @
6ac95c81
...
...
@@ -8,11 +8,9 @@
#include <mutex>
#include <dirent.h>
#include <errno.h>
#include "mace/core/logging.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/runtime/opencl/opencl_wrapper.h"
namespace
mace
{
namespace
{
...
...
@@ -66,7 +64,7 @@ bool BuildProgram(OpenCLRuntime *runtime,
};
*
program
=
cl
::
Program
(
runtime
->
context
(),
sources
);
std
::
string
build_options
=
"-Werror -cl-mad-enable -I"
+
path
;
std
::
string
build_options
=
"-Werror -cl-mad-enable -
cl-fast-relaxed-math -
I"
+
path
;
// TODO(heliangliang) -cl-unsafe-math-optimizations -cl-fast-relaxed-math
if
(
program
->
build
({
runtime
->
device
()},
build_options
.
c_str
())
!=
CL_SUCCESS
)
{
if
(
program
->
getBuildInfo
<
CL_PROGRAM_BUILD_STATUS
>
(
runtime
->
device
())
==
...
...
mace/core/runtime/opencl/opencl_runtime.h
浏览文件 @
6ac95c81
...
...
@@ -20,15 +20,18 @@ namespace mace {
class
OpenCLRuntime
{
public:
static
OpenCLRuntime
*
Get
();
OpenCLRuntime
(
cl
::
Context
context
,
cl
::
Device
device
,
cl
::
CommandQueue
command_queue
);
~
OpenCLRuntime
();
cl
::
Context
&
context
();
cl
::
Device
&
device
();
cl
::
CommandQueue
&
command_queue
();
cl
::
Program
&
program
();
private:
OpenCLRuntime
(
cl
::
Context
context
,
cl
::
Device
device
,
cl
::
CommandQueue
command_queue
);
~
OpenCLRuntime
();
OpenCLRuntime
(
const
OpenCLRuntime
&
)
=
delete
;
OpenCLRuntime
&
operator
=
(
const
OpenCLRuntime
&
)
=
delete
;
private:
cl
::
Context
context_
;
...
...
mace/kernels/batch_norm.h
浏览文件 @
6ac95c81
...
...
@@ -13,16 +13,13 @@ namespace kernels {
template
<
DeviceType
D
,
typename
T
>
struct
BatchNormFunctor
{
void
operator
()(
const
T
*
input
,
const
T
*
scale
,
const
T
*
offset
,
const
T
*
mean
,
const
T
*
var
,
const
float
variance_epsilon
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
T
*
output
)
{
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
Tensor
*
epsilon
,
Tensor
*
output
)
{
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
...
...
@@ -31,16 +28,35 @@ struct BatchNormFunctor {
// new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} }
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
T
new_scale
,
new_offset
;
const
index_t
n
=
input
->
dim
(
0
);
const
index_t
channel
=
input
->
dim
(
1
);
const
index_t
sample_size
=
input
->
dim
(
2
)
*
input
->
dim
(
3
);
Tensor
::
MappingGuard
input_mapper
(
input
);
Tensor
::
MappingGuard
scale_mapper
(
scale
);
Tensor
::
MappingGuard
offset_mapper
(
offset
);
Tensor
::
MappingGuard
mean_mapper
(
mean
);
Tensor
::
MappingGuard
var_mapper
(
var
);
Tensor
::
MappingGuard
epsilon_mapper
(
epsilon
);
Tensor
::
MappingGuard
output_mapper
(
output
);
const
T
*
input_ptr
=
input
->
data
<
T
>
();
const
T
*
scale_ptr
=
scale
->
data
<
T
>
();
const
T
*
offset_ptr
=
offset
->
data
<
T
>
();
const
T
*
mean_ptr
=
mean
->
data
<
T
>
();
const
T
*
var_ptr
=
var
->
data
<
T
>
();
const
T
*
epsilon_ptr
=
epsilon
->
data
<
T
>
();
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
#pragma omp parallel for
for
(
index_t
c
=
0
;
c
<
channel
;
++
c
)
{
new_scale
=
scale
[
c
]
/
std
::
sqrt
(
var
[
c
]
+
variance_epsilon
);
new_offset
=
offset
[
c
]
-
mean
[
c
]
*
new_scale
;
T
new_scale
=
scale_ptr
[
c
]
/
std
::
sqrt
(
var_ptr
[
c
]
+
*
epsilon_ptr
);
T
new_offset
=
offset_ptr
[
c
]
-
mean_ptr
[
c
]
*
new_scale
;
index_t
pos
=
c
*
sample_size
;
for
(
index_t
i
=
0
;
i
<
n
;
++
i
)
{
const
T
*
input_sample_ptr
=
input
+
pos
;
T
*
output_sample_ptr
=
output
+
pos
;
const
T
*
input_sample_ptr
=
input
_ptr
+
pos
;
T
*
output_sample_ptr
=
output
_ptr
+
pos
;
for
(
index_t
j
=
0
;
j
<
sample_size
;
++
j
)
{
output_sample_ptr
[
j
]
=
new_scale
*
input_sample_ptr
[
j
]
+
new_offset
;
}
...
...
@@ -52,16 +68,23 @@ struct BatchNormFunctor {
template
<
>
void
BatchNormFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
float
*
scale
,
const
float
*
offset
,
const
float
*
mean
,
const
float
*
var
,
const
float
variance_epsilon
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
float
*
output
);
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
Tensor
*
epsilon
,
Tensor
*
output
);
template
<
>
void
BatchNormFunctor
<
DeviceType
::
OPENCL
,
float
>::
operator
()(
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
Tensor
*
epsilon
,
Tensor
*
output
);
}
// namepsace kernels
}
// namespace mace
...
...
mace/kernels/neon/batch_norm_neon.cc
浏览文件 @
6ac95c81
...
...
@@ -10,38 +10,46 @@ namespace kernels {
template
<
>
void
BatchNormFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
float
*
scale
,
const
float
*
offset
,
const
float
*
mean
,
const
float
*
var
,
const
float
variance_epsilon
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
float
*
output
)
{
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
Tensor
*
epsilon
,
Tensor
*
output
)
{
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\
variance_
epsilon} } * X +
// ( \offset - \frac { \scale * mean } { \sqrt{var+\
variance_
epsilon}
// Y = \frac{ \scale } { \sqrt{var+\epsilon} } * X +
// ( \offset - \frac { \scale * mean } { \sqrt{var+\epsilon}
// }
// new_scale = \frac{ \scale } { \sqrt{var+\
variance_
epsilon} }
// new_scale = \frac{ \scale } { \sqrt{var+\epsilon} }
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
float
new_scale
,
new_offset
;
const
index_t
n
=
input
->
dim
(
0
);
const
index_t
channel
=
input
->
dim
(
1
);
const
index_t
sample_size
=
input
->
dim
(
2
)
*
input
->
dim
(
3
);
const
float
*
input_ptr
=
input
->
data
<
float
>
();
const
float
*
scale_ptr
=
scale
->
data
<
float
>
();
const
float
*
offset_ptr
=
offset
->
data
<
float
>
();
const
float
*
mean_ptr
=
mean
->
data
<
float
>
();
const
float
*
var_ptr
=
var
->
data
<
float
>
();
const
float
*
epsilon_ptr
=
epsilon
->
data
<
float
>
();
float
*
output_ptr
=
output
->
mutable_data
<
float
>
();
index_t
count
=
sample_size
>>
2
;
index_t
remain_count
=
sample_size
-
(
count
<<
2
);
#pragma omp parallel for
for
(
index_t
c
=
0
;
c
<
channel
;
++
c
)
{
new_scale
=
scale
[
c
]
/
std
::
sqrt
(
var
[
c
]
+
variance_epsilon
);
new_offset
=
offset
[
c
]
-
mean
[
c
]
*
new_scale
;
float
new_scale
=
scale_ptr
[
c
]
/
std
::
sqrt
(
var_ptr
[
c
]
+
*
epsilon_ptr
);
float
new_offset
=
offset_ptr
[
c
]
-
mean_ptr
[
c
]
*
new_scale
;
index_t
pos
=
c
*
sample_size
;
float32x4_t
new_scale_f
=
vdupq_n_f32
(
new_scale
);
float32x4_t
new_offset_f
=
vdupq_n_f32
(
new_offset
);
for
(
index_t
i
=
0
;
i
<
n
;
++
i
)
{
const
float
*
input_sample_ptr
=
input
+
pos
;
float
*
output_sample_ptr
=
output
+
pos
;
const
float
*
input_sample_ptr
=
input
_ptr
+
pos
;
float
*
output_sample_ptr
=
output
_ptr
+
pos
;
for
(
index_t
j
=
0
;
j
<
count
;
++
j
)
{
float32x4_t
input_f
=
vld1q_f32
(
input_sample_ptr
);
...
...
mace/kernels/opencl/batch_norm_opencl.cc
0 → 100644
浏览文件 @
6ac95c81
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/kernels/batch_norm.h"
#include "mace/core/runtime/opencl/cl2.hpp"
#include "mace/core/runtime/opencl/opencl_runtime.h"
namespace
mace
{
namespace
kernels
{
template
<
>
void
BatchNormFunctor
<
DeviceType
::
OPENCL
,
float
>::
operator
()(
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
Tensor
*
epsilon
,
Tensor
*
output
)
{
const
index_t
n
=
input
->
dim
(
0
);
const
index_t
channel
=
input
->
dim
(
1
);
const
index_t
sample_size
=
input
->
dim
(
2
)
*
input
->
dim
(
3
);
auto
runtime
=
OpenCLRuntime
::
Get
();
auto
program
=
runtime
->
program
();
auto
_kernel
=
cl
::
Kernel
(
program
,
"batch_norm"
);
_kernel
.
setArg
(
0
,
*
(
static_cast
<
const
cl
::
Buffer
*>
(
input
->
buffer
())));
_kernel
.
setArg
(
1
,
*
(
static_cast
<
cl
::
Buffer
*>
(
scale
->
buffer
())));
_kernel
.
setArg
(
2
,
*
(
static_cast
<
cl
::
Buffer
*>
(
offset
->
buffer
())));
_kernel
.
setArg
(
3
,
*
(
static_cast
<
cl
::
Buffer
*>
(
mean
->
buffer
())));
_kernel
.
setArg
(
4
,
*
(
static_cast
<
cl
::
Buffer
*>
(
var
->
buffer
())));
_kernel
.
setArg
(
5
,
*
(
static_cast
<
cl
::
Buffer
*>
(
epsilon
->
buffer
())));
_kernel
.
setArg
(
6
,
static_cast
<
int
>
(
sample_size
));
_kernel
.
setArg
(
7
,
*
(
static_cast
<
cl
::
Buffer
*>
(
output
->
buffer
())));
_kernel
.
setArg
(
8
,
32u
,
nullptr
);
_kernel
.
setArg
(
9
,
32u
,
nullptr
);
cl_int
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
_kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
n
,
channel
,
sample_size
),
cl
::
NDRange
(
1
,
1
,
128
));
MACE_CHECK
(
error
==
CL_SUCCESS
);
}
}
// namespace kernels
}
// namespace mace
\ No newline at end of file
mace/kernels/opencl/cl/batch_norm.cl
0 → 100644
浏览文件 @
6ac95c81
void
kernel
batch_norm
(
global
const
float
*input,
global
const
float
*scale,
global
const
float
*offset,
global
const
float
*mean,
global
const
float
*var,
global
const
float
*epsilon,
private
const
int
pixels,
global
float
*output,
__local
float
*new_scale,
__local
float
*new_offset
)
{
const
int
batch
=
get_global_id
(
0
)
;
const
int
channel
=
get_global_id
(
1
)
;
const
int
channels
=
get_global_size
(
1
)
;
const
int
pixel_offset
=
get_global_id
(
2
)
;
const
unsigned
int
local_channel
=
get_local_id
(
1
)
;
const
int
local_pixel_idx
=
get_local_id
(
2
)
;
if
(
local_pixel_idx
==
0
)
{
new_scale[local_channel]
=
scale[channel]
*
rsqrt
(
var[channel]
+
*epsilon
)
;
new_offset[local_channel]
=
offset[channel]
-
mean[channel]
*
new_scale[local_channel]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
const
int
sample_offset
=
(
batch
*
channels
+
channel
)
*
pixels
+
pixel_offset
;
const
float
*input_ptr
=
input
+
sample_offset
;
float
*output_ptr
=
output
+
sample_offset
;
*output_ptr
=
new_scale[local_channel]
*
*input_ptr
+
new_offset[local_channel]
;
}
mace/ops/BUILD
浏览文件 @
6ac95c81
...
...
@@ -17,6 +17,7 @@ cc_library(
],
deps
=
[
"//mace/core"
,
"//mace/core:opencl_runtime"
,
"@gtest//:gtest"
,
],
)
...
...
@@ -39,7 +40,6 @@ cc_library(
"-fopenmp"
,
],
deps
=
[
"//mace/core"
,
"//mace/kernels"
,
"//mace/proto:cc_proto"
,
],
...
...
@@ -72,7 +72,6 @@ cc_test(
deps
=
[
":ops"
,
":test"
,
"//mace/core"
,
"//mace/core:test_benchmark_main"
,
],
)
mace/ops/batch_norm.cc
浏览文件 @
6ac95c81
...
...
@@ -12,4 +12,6 @@ REGISTER_CPU_OPERATOR(BatchNorm, BatchNormOp<DeviceType::CPU, float>);
REGISTER_NEON_OPERATOR
(
BatchNorm
,
BatchNormOp
<
DeviceType
::
NEON
,
float
>
);
#endif // __ARM_NEON
REGISTER_OPENCL_OPERATOR
(
BatchNorm
,
BatchNormOp
<
DeviceType
::
OPENCL
,
float
>
);
}
// namespace mace
\ No newline at end of file
mace/ops/batch_norm.h
浏览文件 @
6ac95c81
...
...
@@ -40,20 +40,7 @@ class BatchNormOp : public Operator<D, T> {
Tensor
*
output
=
this
->
Output
(
0
);
output
->
ResizeLike
(
input
);
const
index_t
n
=
input
->
dim
(
0
);
const
index_t
channel
=
input
->
dim
(
1
);
const
index_t
sample_size
=
input
->
dim
(
2
)
*
input
->
dim
(
3
);
const
T
*
input_ptr
=
input
->
data
<
T
>
();
const
T
*
scale_ptr
=
scale
->
data
<
T
>
();
const
T
*
offset_ptr
=
offset
->
data
<
T
>
();
const
T
*
mean_ptr
=
mean
->
data
<
T
>
();
const
T
*
var_ptr
=
var
->
data
<
T
>
();
const
T
*
epsilon_ptr
=
epsilon
->
data
<
T
>
();
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
functor_
(
input_ptr
,
scale_ptr
,
offset_ptr
,
mean_ptr
,
var_ptr
,
*
epsilon_ptr
,
n
,
channel
,
sample_size
,
output_ptr
);
functor_
(
input
,
scale
,
offset
,
mean
,
var
,
epsilon
,
output
);
return
true
;
}
...
...
mace/ops/batch_norm_benchmark.cc
浏览文件 @
6ac95c81
...
...
@@ -24,21 +24,23 @@ static void BatchNorm(
.
Finalize
(
net
.
operator_def
());
// Add input data
net
.
AddRandomInput
<
D
eviceType
::
CPU
,
T
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
D
eviceType
::
CPU
,
T
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
D
eviceType
::
CPU
,
T
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
D
eviceType
::
CPU
,
T
>
(
"Mean"
,
{
channels
});
net
.
AddRandomInput
<
D
eviceType
::
CPU
,
T
>
(
"Var"
,
{
channels
},
true
);
net
.
AddInputFromArray
<
D
eviceType
::
CPU
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
net
.
AddRandomInput
<
D
,
T
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
D
,
T
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
D
,
T
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
D
,
T
>
(
"Mean"
,
{
channels
});
net
.
AddRandomInput
<
D
,
T
>
(
"Var"
,
{
channels
},
true
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
// Warm-up
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
net
.
RunOp
(
D
);
net
.
Sync
();
}
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
net
.
RunOp
(
D
);
net
.
Sync
();
}
}
...
...
@@ -54,7 +56,8 @@ static void BatchNorm(
#define BM_BATCH_NORM(N, C, H, W, TYPE) \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, NEON);
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, NEON); \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, OPENCL);
BM_BATCH_NORM
(
1
,
1
,
512
,
512
,
float
);
BM_BATCH_NORM
(
1
,
3
,
128
,
128
,
float
);
...
...
mace/ops/batch_norm_test.cc
浏览文件 @
6ac95c81
...
...
@@ -9,9 +9,10 @@ namespace mace {
class
BatchNormOpTest
:
public
OpsTestBase
{};
TEST_F
(
BatchNormOpTest
,
SimpleCPU
)
{
template
<
DeviceType
D
>
void
Simple
()
{
// Construct graph
auto
&
net
=
test_net
()
;
OpsTestNet
net
;
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
...
...
@@ -23,26 +24,79 @@ TEST_F(BatchNormOpTest, SimpleCPU) {
.
Finalize
(
net
.
operator_def
());
// Add input data
net
.
AddInputFromArray
<
D
eviceType
::
CPU
,
float
>
(
"Input"
,
{
1
,
1
,
6
,
2
},
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
1
,
6
,
2
},
{
5
,
5
,
7
,
7
,
9
,
9
,
11
,
11
,
13
,
13
,
15
,
15
});
net
.
AddInputFromArray
<
D
eviceType
::
CPU
,
float
>
(
"Scale"
,
{
1
},
{
4.0
f
});
net
.
AddInputFromArray
<
D
eviceType
::
CPU
,
float
>
(
"Offset"
,
{
1
},
{
2.0
});
net
.
AddInputFromArray
<
D
eviceType
::
CPU
,
float
>
(
"Mean"
,
{
1
},
{
10
});
net
.
AddInputFromArray
<
D
eviceType
::
CPU
,
float
>
(
"Var"
,
{
1
},
{
11.67
f
});
net
.
AddInputFromArray
<
D
eviceType
::
CPU
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Scale"
,
{
1
},
{
4.0
f
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Offset"
,
{
1
},
{
2.0
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Mean"
,
{
1
},
{
10
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Var"
,
{
1
},
{
11.67
f
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
// Run
net
.
RunOp
();
net
.
RunOp
(
D
);
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
6
,
2
},
{
-
3.86
,
-
3.86
,
-
1.51
,
-
1.51
,
0.83
,
0.83
,
3.17
,
3.17
,
5.51
,
5.51
,
7.86
,
7.86
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
}
TEST_F
(
BatchNormOpTest
,
SimpleCPU
)
{
Simple
<
DeviceType
::
CPU
>
();
}
TEST_F
(
BatchNormOpTest
,
SimpleNEON
)
{
Simple
<
DeviceType
::
NEON
>
();
}
TEST_F
(
BatchNormOpTest
,
SimpleOPENCL
)
{
Simple
<
DeviceType
::
OPENCL
>
();
}
TEST_F
(
BatchNormOpTest
,
SimpleNeon
)
{
TEST_F
(
BatchNormOpTest
,
SimpleRandomNeon
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
index_t
height
=
64
;
index_t
width
=
64
;
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Input
(
"Mean"
)
.
Input
(
"Var"
)
.
Input
(
"Epsilon"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Mean"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Var"
,
{
channels
},
true
);
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
// run cpu
net
.
RunOp
();
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run NEON
net
.
RunOp
(
DeviceType
::
NEON
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
}
TEST_F
(
BatchNormOpTest
,
ComplexRandomNeon
)
{
srand
(
time
(
NULL
));
// generate random input
...
...
@@ -74,11 +128,96 @@ TEST_F(BatchNormOpTest, SimpleNeon) {
net
.
RunOp
();
// Check
Tensor
*
expected
=
net
.
GetOutput
(
"Output"
);
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run NEON
net
.
RunOp
(
DeviceType
::
NEON
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
}
TEST_F
(
BatchNormOpTest
,
SimpleRandomOPENCL
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
index_t
height
=
64
;
index_t
width
=
64
;
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Input
(
"Mean"
)
.
Input
(
"Var"
)
.
Input
(
"Epsilon"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Var"
,
{
channels
},
true
);
net
.
AddInputFromArray
<
DeviceType
::
OPENCL
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
// Run NEON
net
.
RunOp
(
DeviceType
::
OPENCL
);
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run cpu
net
.
RunOp
();
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
}
TEST_F
(
BatchNormOpTest
,
ComplexRandomOPENCL
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
index_t
height
=
103
;
index_t
width
=
113
;
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Input
(
"Mean"
)
.
Input
(
"Var"
)
.
Input
(
"Epsilon"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Var"
,
{
channels
},
true
);
net
.
AddInputFromArray
<
DeviceType
::
OPENCL
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
// Run NEON
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run cpu
net
.
RunOp
();
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
}
}
mace/ops/ops_test_util.h
浏览文件 @
6ac95c81
...
...
@@ -11,6 +11,7 @@
#include "mace/core/common.h"
#include "mace/core/net.h"
#include "mace/core/tensor.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
namespace
mace
{
...
...
@@ -152,6 +153,12 @@ class OpsTestNet {
return
ws_
.
GetTensor
(
output_name
);
}
void
Sync
()
{
if
(
net_
)
{
OpenCLRuntime
::
Get
()
->
command_queue
().
finish
();
}
}
public:
Workspace
ws_
;
OperatorDef
op_def_
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录