Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
bcec92d0
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
bcec92d0
编写于
10月 25, 2017
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add opencl batch norm kernel and fix bugs.
上级
129608cc
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
305 addition
and
77 deletion
+305
-77
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+48
-25
mace/kernels/neon/batch_norm_neon.cc
mace/kernels/neon/batch_norm_neon.cc
+26
-18
mace/kernels/opencl/batch_norm_opencl.cc
mace/kernels/opencl/batch_norm_opencl.cc
+50
-0
mace/kernels/opencl/cl/batch_norm.cl
mace/kernels/opencl/cl/batch_norm.cl
+19
-0
mace/ops/batch_norm.cc
mace/ops/batch_norm.cc
+2
-0
mace/ops/batch_norm.h
mace/ops/batch_norm.h
+1
-14
mace/ops/batch_norm_benchmark.cc
mace/ops/batch_norm_benchmark.cc
+8
-7
mace/ops/batch_norm_test.cc
mace/ops/batch_norm_test.cc
+151
-13
未找到文件。
mace/kernels/batch_norm.h
浏览文件 @
bcec92d0
...
...
@@ -13,16 +13,13 @@ namespace kernels {
template
<
DeviceType
D
,
typename
T
>
struct
BatchNormFunctor
{
void
operator
()(
const
T
*
input
,
const
T
*
scale
,
const
T
*
offset
,
const
T
*
mean
,
const
T
*
var
,
const
float
variance_epsilon
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
T
*
output
)
{
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
Tensor
*
epsilon
,
Tensor
*
output
)
{
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
...
...
@@ -31,16 +28,35 @@ struct BatchNormFunctor {
// new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} }
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
T
new_scale
,
new_offset
;
const
index_t
n
=
input
->
dim
(
0
);
const
index_t
channel
=
input
->
dim
(
1
);
const
index_t
sample_size
=
input
->
dim
(
2
)
*
input
->
dim
(
3
);
Tensor
::
MappingGuard
input_mapper
(
input
);
Tensor
::
MappingGuard
scale_mapper
(
scale
);
Tensor
::
MappingGuard
offset_mapper
(
offset
);
Tensor
::
MappingGuard
mean_mapper
(
mean
);
Tensor
::
MappingGuard
var_mapper
(
var
);
Tensor
::
MappingGuard
epsilon_mapper
(
epsilon
);
Tensor
::
MappingGuard
output_mapper
(
output
);
const
T
*
input_ptr
=
input
->
data
<
T
>
();
const
T
*
scale_ptr
=
scale
->
data
<
T
>
();
const
T
*
offset_ptr
=
offset
->
data
<
T
>
();
const
T
*
mean_ptr
=
mean
->
data
<
T
>
();
const
T
*
var_ptr
=
var
->
data
<
T
>
();
const
T
*
epsilon_ptr
=
epsilon
->
data
<
T
>
();
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
#pragma omp parallel for
for
(
index_t
c
=
0
;
c
<
channel
;
++
c
)
{
new_scale
=
scale
[
c
]
/
std
::
sqrt
(
var
[
c
]
+
variance_epsilon
);
new_offset
=
offset
[
c
]
-
mean
[
c
]
*
new_scale
;
T
new_scale
=
scale_ptr
[
c
]
/
std
::
sqrt
(
var_ptr
[
c
]
+
*
epsilon_ptr
);
T
new_offset
=
offset_ptr
[
c
]
-
mean_ptr
[
c
]
*
new_scale
;
index_t
pos
=
c
*
sample_size
;
for
(
index_t
i
=
0
;
i
<
n
;
++
i
)
{
const
T
*
input_sample_ptr
=
input
+
pos
;
T
*
output_sample_ptr
=
output
+
pos
;
const
T
*
input_sample_ptr
=
input
_ptr
+
pos
;
T
*
output_sample_ptr
=
output
_ptr
+
pos
;
for
(
index_t
j
=
0
;
j
<
sample_size
;
++
j
)
{
output_sample_ptr
[
j
]
=
new_scale
*
input_sample_ptr
[
j
]
+
new_offset
;
}
...
...
@@ -52,16 +68,23 @@ struct BatchNormFunctor {
template
<
>
void
BatchNormFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
float
*
scale
,
const
float
*
offset
,
const
float
*
mean
,
const
float
*
var
,
const
float
variance_epsilon
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
float
*
output
);
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
Tensor
*
epsilon
,
Tensor
*
output
);
template
<
>
void
BatchNormFunctor
<
DeviceType
::
OPENCL
,
float
>::
operator
()(
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
Tensor
*
epsilon
,
Tensor
*
output
);
}
// namepsace kernels
}
// namespace mace
...
...
mace/kernels/neon/batch_norm_neon.cc
浏览文件 @
bcec92d0
...
...
@@ -10,38 +10,46 @@ namespace kernels {
template
<
>
void
BatchNormFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
float
*
scale
,
const
float
*
offset
,
const
float
*
mean
,
const
float
*
var
,
const
float
variance_epsilon
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
float
*
output
)
{
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
Tensor
*
epsilon
,
Tensor
*
output
)
{
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\
variance_
epsilon} } * X +
// ( \offset - \frac { \scale * mean } { \sqrt{var+\
variance_
epsilon}
// Y = \frac{ \scale } { \sqrt{var+\epsilon} } * X +
// ( \offset - \frac { \scale * mean } { \sqrt{var+\epsilon}
// }
// new_scale = \frac{ \scale } { \sqrt{var+\
variance_
epsilon} }
// new_scale = \frac{ \scale } { \sqrt{var+\epsilon} }
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
float
new_scale
,
new_offset
;
const
index_t
n
=
input
->
dim
(
0
);
const
index_t
channel
=
input
->
dim
(
1
);
const
index_t
sample_size
=
input
->
dim
(
2
)
*
input
->
dim
(
3
);
const
float
*
input_ptr
=
input
->
data
<
float
>
();
const
float
*
scale_ptr
=
scale
->
data
<
float
>
();
const
float
*
offset_ptr
=
offset
->
data
<
float
>
();
const
float
*
mean_ptr
=
mean
->
data
<
float
>
();
const
float
*
var_ptr
=
var
->
data
<
float
>
();
const
float
*
epsilon_ptr
=
epsilon
->
data
<
float
>
();
float
*
output_ptr
=
output
->
mutable_data
<
float
>
();
index_t
count
=
sample_size
>>
2
;
index_t
remain_count
=
sample_size
-
(
count
<<
2
);
#pragma omp parallel for
for
(
index_t
c
=
0
;
c
<
channel
;
++
c
)
{
new_scale
=
scale
[
c
]
/
std
::
sqrt
(
var
[
c
]
+
variance_epsilon
);
new_offset
=
offset
[
c
]
-
mean
[
c
]
*
new_scale
;
float
new_scale
=
scale_ptr
[
c
]
/
std
::
sqrt
(
var_ptr
[
c
]
+
*
epsilon_ptr
);
float
new_offset
=
offset_ptr
[
c
]
-
mean_ptr
[
c
]
*
new_scale
;
index_t
pos
=
c
*
sample_size
;
float32x4_t
new_scale_f
=
vdupq_n_f32
(
new_scale
);
float32x4_t
new_offset_f
=
vdupq_n_f32
(
new_offset
);
for
(
index_t
i
=
0
;
i
<
n
;
++
i
)
{
const
float
*
input_sample_ptr
=
input
+
pos
;
float
*
output_sample_ptr
=
output
+
pos
;
const
float
*
input_sample_ptr
=
input
_ptr
+
pos
;
float
*
output_sample_ptr
=
output
_ptr
+
pos
;
for
(
index_t
j
=
0
;
j
<
count
;
++
j
)
{
float32x4_t
input_f
=
vld1q_f32
(
input_sample_ptr
);
...
...
mace/kernels/opencl/batch_norm_opencl.cc
0 → 100644
浏览文件 @
bcec92d0
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/kernels/batch_norm.h"
#include "mace/core/runtime/opencl/cl2.hpp"
#include "mace/core/runtime/opencl/opencl_runtime.h"
namespace
mace
{
namespace
kernels
{
template
<
>
void
BatchNormFunctor
<
DeviceType
::
OPENCL
,
float
>::
operator
()(
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
Tensor
*
epsilon
,
Tensor
*
output
)
{
const
index_t
n
=
input
->
dim
(
0
);
const
index_t
channel
=
input
->
dim
(
1
);
const
index_t
sample_size
=
input
->
dim
(
2
)
*
input
->
dim
(
3
);
auto
runtime
=
OpenCLRuntime
::
Get
();
auto
program
=
runtime
->
program
();
auto
batch_norm_kernel
=
cl
::
KernelFunctor
<
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
,
int
,
int
,
cl
::
Buffer
>
(
program
,
"batch_norm"
);
cl_int
error
;
auto
res_event
=
batch_norm_kernel
(
cl
::
EnqueueArgs
(
runtime
->
command_queue
(),
cl
::
NDRange
(
n
*
channel
*
sample_size
),
cl
::
NDRange
(
128
)),
*
(
static_cast
<
const
cl
::
Buffer
*>
(
input
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
scale
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
offset
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
mean
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
var
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
epsilon
->
buffer
())),
static_cast
<
int
>
(
channel
),
static_cast
<
int
>
(
sample_size
),
*
(
static_cast
<
cl
::
Buffer
*>
(
output
->
buffer
())),
error
);
res_event
.
wait
();
MACE_CHECK
(
error
==
CL_SUCCESS
);
}
}
// namespace kernels
}
// namespace mace
\ No newline at end of file
mace/kernels/opencl/cl/batch_norm.cl
0 → 100644
浏览文件 @
bcec92d0
void
kernel
batch_norm
(
global
const
float
*input,
global
const
float
*scale,
global
const
float
*offset,
global
const
float
*mean,
global
const
float
*var,
global
const
float
*epsilon,
private
const
int
channels,
private
const
int
pixels,
global
float
*output
)
{
int
idx
=
get_global_id
(
0
)
;
int
channel
=
(
idx
%
(
channels
*
pixels
))
/
pixels
;
const
float
*input_ptr
=
input
+
idx
;
const
float
new_scale
=
scale[channel]
*
rsqrt
(
var[channel]
+
*epsilon
)
;
const
float
new_offset
=
offset[channel]
-
mean[channel]
*
new_scale
;
float
*output_ptr
=
output
+
idx
;
*output_ptr
=
new_scale
*
*input_ptr
+
new_offset
;
}
mace/ops/batch_norm.cc
浏览文件 @
bcec92d0
...
...
@@ -12,4 +12,6 @@ REGISTER_CPU_OPERATOR(BatchNorm, BatchNormOp<DeviceType::CPU, float>);
REGISTER_NEON_OPERATOR
(
BatchNorm
,
BatchNormOp
<
DeviceType
::
NEON
,
float
>
);
#endif // __ARM_NEON
REGISTER_OPENCL_OPERATOR
(
BatchNorm
,
BatchNormOp
<
DeviceType
::
OPENCL
,
float
>
);
}
// namespace mace
\ No newline at end of file
mace/ops/batch_norm.h
浏览文件 @
bcec92d0
...
...
@@ -40,20 +40,7 @@ class BatchNormOp : public Operator<D, T> {
Tensor
*
output
=
this
->
Output
(
0
);
output
->
ResizeLike
(
input
);
const
index_t
n
=
input
->
dim
(
0
);
const
index_t
channel
=
input
->
dim
(
1
);
const
index_t
sample_size
=
input
->
dim
(
2
)
*
input
->
dim
(
3
);
const
T
*
input_ptr
=
input
->
data
<
T
>
();
const
T
*
scale_ptr
=
scale
->
data
<
T
>
();
const
T
*
offset_ptr
=
offset
->
data
<
T
>
();
const
T
*
mean_ptr
=
mean
->
data
<
T
>
();
const
T
*
var_ptr
=
var
->
data
<
T
>
();
const
T
*
epsilon_ptr
=
epsilon
->
data
<
T
>
();
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
functor_
(
input_ptr
,
scale_ptr
,
offset_ptr
,
mean_ptr
,
var_ptr
,
*
epsilon_ptr
,
n
,
channel
,
sample_size
,
output_ptr
);
functor_
(
input
,
scale
,
offset
,
mean
,
var
,
epsilon
,
output
);
return
true
;
}
...
...
mace/ops/batch_norm_benchmark.cc
浏览文件 @
bcec92d0
...
...
@@ -24,12 +24,12 @@ static void BatchNorm(
.
Finalize
(
net
.
operator_def
());
// Add input data
net
.
AddRandomInput
<
D
eviceType
::
CPU
,
T
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
D
eviceType
::
CPU
,
T
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
D
eviceType
::
CPU
,
T
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
D
eviceType
::
CPU
,
T
>
(
"Mean"
,
{
channels
});
net
.
AddRandomInput
<
D
eviceType
::
CPU
,
T
>
(
"Var"
,
{
channels
},
true
);
net
.
AddInputFromArray
<
D
eviceType
::
CPU
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
net
.
AddRandomInput
<
D
,
T
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
D
,
T
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
D
,
T
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
D
,
T
>
(
"Mean"
,
{
channels
});
net
.
AddRandomInput
<
D
,
T
>
(
"Var"
,
{
channels
},
true
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
// Warm-up
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
...
...
@@ -54,7 +54,8 @@ static void BatchNorm(
#define BM_BATCH_NORM(N, C, H, W, TYPE) \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, NEON);
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, NEON); \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, OPENCL);
BM_BATCH_NORM
(
1
,
1
,
512
,
512
,
float
);
BM_BATCH_NORM
(
1
,
3
,
128
,
128
,
float
);
...
...
mace/ops/batch_norm_test.cc
浏览文件 @
bcec92d0
...
...
@@ -9,9 +9,10 @@ namespace mace {
class
BatchNormOpTest
:
public
OpsTestBase
{};
TEST_F
(
BatchNormOpTest
,
SimpleCPU
)
{
template
<
DeviceType
D
>
void
Simple
()
{
// Construct graph
auto
&
net
=
test_net
()
;
OpsTestNet
net
;
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
...
...
@@ -23,26 +24,79 @@ TEST_F(BatchNormOpTest, SimpleCPU) {
.
Finalize
(
net
.
operator_def
());
// Add input data
net
.
AddInputFromArray
<
D
eviceType
::
CPU
,
float
>
(
"Input"
,
{
1
,
1
,
6
,
2
},
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
1
,
6
,
2
},
{
5
,
5
,
7
,
7
,
9
,
9
,
11
,
11
,
13
,
13
,
15
,
15
});
net
.
AddInputFromArray
<
D
eviceType
::
CPU
,
float
>
(
"Scale"
,
{
1
},
{
4.0
f
});
net
.
AddInputFromArray
<
D
eviceType
::
CPU
,
float
>
(
"Offset"
,
{
1
},
{
2.0
});
net
.
AddInputFromArray
<
D
eviceType
::
CPU
,
float
>
(
"Mean"
,
{
1
},
{
10
});
net
.
AddInputFromArray
<
D
eviceType
::
CPU
,
float
>
(
"Var"
,
{
1
},
{
11.67
f
});
net
.
AddInputFromArray
<
D
eviceType
::
CPU
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Scale"
,
{
1
},
{
4.0
f
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Offset"
,
{
1
},
{
2.0
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Mean"
,
{
1
},
{
10
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Var"
,
{
1
},
{
11.67
f
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
// Run
net
.
RunOp
();
net
.
RunOp
(
D
);
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
6
,
2
},
{
-
3.86
,
-
3.86
,
-
1.51
,
-
1.51
,
0.83
,
0.83
,
3.17
,
3.17
,
5.51
,
5.51
,
7.86
,
7.86
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
}
TEST_F
(
BatchNormOpTest
,
SimpleCPU
)
{
Simple
<
DeviceType
::
CPU
>
();
}
TEST_F
(
BatchNormOpTest
,
SimpleNEON
)
{
Simple
<
DeviceType
::
NEON
>
();
}
TEST_F
(
BatchNormOpTest
,
SimpleOPENCL
)
{
Simple
<
DeviceType
::
OPENCL
>
();
}
TEST_F
(
BatchNormOpTest
,
SimpleNeon
)
{
TEST_F
(
BatchNormOpTest
,
SimpleRandomNeon
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
index_t
height
=
64
;
index_t
width
=
64
;
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Input
(
"Mean"
)
.
Input
(
"Var"
)
.
Input
(
"Epsilon"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Mean"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Var"
,
{
channels
},
true
);
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
// run cpu
net
.
RunOp
();
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run NEON
net
.
RunOp
(
DeviceType
::
NEON
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
}
TEST_F
(
BatchNormOpTest
,
ComplexRandomNeon
)
{
srand
(
time
(
NULL
));
// generate random input
...
...
@@ -74,11 +128,95 @@ TEST_F(BatchNormOpTest, SimpleNeon) {
net
.
RunOp
();
// Check
Tensor
*
expected
=
net
.
GetOutput
(
"Output"
);
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run NEON
net
.
RunOp
(
DeviceType
::
NEON
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
}
TEST_F
(
BatchNormOpTest
,
SimpleRandomOPENCL
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
index_t
height
=
64
;
index_t
width
=
64
;
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Input
(
"Mean"
)
.
Input
(
"Var"
)
.
Input
(
"Epsilon"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Var"
,
{
channels
},
true
);
net
.
AddInputFromArray
<
DeviceType
::
OPENCL
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
// Run NEON
net
.
RunOp
(
DeviceType
::
OPENCL
);
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run cpu
net
.
RunOp
();
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
}
TEST_F
(
BatchNormOpTest
,
ComplexRandomOPENCL
)
{
srand
(
time
(
NULL
));
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
channels
=
3
+
rand
()
%
50
;
index_t
height
=
103
;
index_t
width
=
113
;
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Input
(
"Mean"
)
.
Input
(
"Var"
)
.
Input
(
"Epsilon"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Var"
,
{
channels
},
true
);
net
.
AddInputFromArray
<
DeviceType
::
OPENCL
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
// Run NEON
net
.
RunOp
(
DeviceType
::
OPENCL
);
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run cpu
net
.
RunOp
();
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录