Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
冰之2023
Mace
提交
6ed08429
Mace
项目概览
冰之2023
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
6ed08429
编写于
12月 04, 2017
作者:
Y
yejianwu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update variable name and local group size in batch norm
上级
ffdae79f
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
22 addition
and
25 deletion
+22
-25
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+8
-7
mace/kernels/opencl/batch_norm_opencl.cc
mace/kernels/opencl/batch_norm_opencl.cc
+5
-4
mace/kernels/opencl/cl/batch_norm.cl
mace/kernels/opencl/cl/batch_norm.cl
+9
-12
mace/ops/batch_norm_test.cc
mace/ops/batch_norm_test.cc
+0
-2
未找到文件。
mace/kernels/batch_norm.h
浏览文件 @
6ed08429
...
...
@@ -28,10 +28,9 @@ struct BatchNormFunctor {
// new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} }
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
const
index_t
batch
s
=
input
->
dim
(
0
);
const
index_t
batch
=
input
->
dim
(
0
);
const
index_t
height
=
input
->
dim
(
1
);
const
index_t
width
=
input
->
dim
(
2
);
const
index_t
height_width
=
height
*
width
;
const
index_t
channels
=
input
->
dim
(
3
);
Tensor
::
MappingGuard
input_mapper
(
input
);
...
...
@@ -62,11 +61,13 @@ struct BatchNormFunctor {
index_t
pos
=
0
;
#pragma omp parallel for
for
(
index_t
n
=
0
;
n
<
batchs
;
++
n
)
{
for
(
index_t
hb
=
0
;
hb
<
height_width
;
++
hb
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
output_ptr
[
pos
]
=
new_scale
[
c
]
*
input_ptr
[
pos
]
+
new_offset
[
c
];
++
pos
;
for
(
index_t
n
=
0
;
n
<
batch
;
++
n
)
{
for
(
index_t
h
=
0
;
h
<
height
;
++
h
)
{
for
(
index_t
w
=
0
;
w
<
width
;
++
w
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
output_ptr
[
pos
]
=
new_scale
[
c
]
*
input_ptr
[
pos
]
+
new_offset
[
c
];
++
pos
;
}
}
}
}
...
...
mace/kernels/opencl/batch_norm_opencl.cc
浏览文件 @
6ed08429
...
...
@@ -21,7 +21,7 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(
const
Tensor
*
epsilon
,
Tensor
*
output
)
{
const
index_t
batch
s
=
input
->
dim
(
0
);
const
index_t
batch
=
input
->
dim
(
0
);
const
index_t
height
=
input
->
dim
(
1
);
const
index_t
width
=
input
->
dim
(
2
);
const
index_t
channels
=
input
->
dim
(
3
);
...
...
@@ -30,7 +30,7 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
width
),
static_cast
<
uint32_t
>
(
height
*
batch
s
)};
static_cast
<
uint32_t
>
(
height
*
batch
)};
auto
runtime
=
OpenCLRuntime
::
Get
();
std
::
set
<
std
::
string
>
built_options
;
...
...
@@ -40,7 +40,7 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(
auto
bm_kernel
=
runtime
->
BuildKernel
(
"batch_norm"
,
"batch_norm"
,
built_options
);
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
bm_kernel
);
const
std
::
vector
<
uint32_t
>
lws
=
{
1
,
1
,
kwg_size
};
const
std
::
vector
<
uint32_t
>
lws
=
{
1
,
kwg_size
,
1
};
uint32_t
idx
=
0
;
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input
->
buffer
())));
...
...
@@ -52,7 +52,8 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
output
->
buffer
())));
auto
params_generator
=
[
&
kwg_size
]()
->
std
::
vector
<
std
::
vector
<
uint32_t
>>
{
return
{{
1
,
1
,
64
},
return
{{
8
,
128
,
1
},
//SNPE size
{
1
,
1
,
64
},
{
1
,
1
,
128
},
{
1
,
kwg_size
/
16
,
16
},
{
1
,
kwg_size
/
32
,
32
},
...
...
mace/kernels/opencl/cl/batch_norm.cl
浏览文件 @
6ed08429
...
...
@@ -8,24 +8,21 @@ __kernel void batch_norm(__read_only image2d_t input,
__global
const
DATA_TYPE
*epsilon,
__write_only
image2d_t
output
)
{
const
int
ch_blk
=
get_global_id
(
0
)
;
const
int
w
_blk
=
get_global_id
(
1
)
;
const
int
hb
_blk
=
get_global_id
(
2
)
;
const
int
w
=
get_global_id
(
1
)
;
const
int
hb
=
get_global_id
(
2
)
;
const
int
width
=
get_global_size
(
1
)
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
DATA_TYPE4
scale_value
=
READ_IMAGET
(
scale,
sampler,
(
int2
)(
ch_blk,
0
))
;
DATA_TYPE4
offset_value
=
READ_IMAGET
(
offset,
sampler,
(
int2
)(
ch_blk,
0
))
;
DATA_TYPE4
mean_value
=
READ_IMAGET
(
mean,
sampler,
(
int2
)(
ch_blk,
0
))
;
DATA_TYPE4
var_value
=
READ_IMAGET
(
var,
sampler,
(
int2
)(
ch_blk,
0
))
;
DATA_TYPE4
scale_value
=
READ_IMAGET
(
scale,
SAMPLER,
(
int2
)(
ch_blk,
0
))
;
DATA_TYPE4
offset_value
=
READ_IMAGET
(
offset,
SAMPLER,
(
int2
)(
ch_blk,
0
))
;
DATA_TYPE4
mean_value
=
READ_IMAGET
(
mean,
SAMPLER,
(
int2
)(
ch_blk,
0
))
;
DATA_TYPE4
var_value
=
READ_IMAGET
(
var,
SAMPLER,
(
int2
)(
ch_blk,
0
))
;
DATA_TYPE4
new_scale
=
scale_value
*
rsqrt
(
var_value
+
(
DATA_TYPE4
)(
*epsilon
))
;
DATA_TYPE4
new_offset
=
offset_value
-
mean_value
*
new_scale
;
const
int
pos
=
ch_blk
*
width
+
w
_blk
;
const
int
pos
=
ch_blk
*
width
+
w
;
DATA_TYPE4
in
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
pos,
hb_blk
))
;
DATA_TYPE4
in
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
pos,
hb
))
;
DATA_TYPE4
out
=
in
*
new_scale
+
new_offset
;
WRITE_IMAGET
(
output,
(
int2
)(
pos,
hb
_blk
)
,
out
)
;
WRITE_IMAGET
(
output,
(
int2
)(
pos,
hb
)
,
out
)
;
}
mace/ops/batch_norm_test.cc
浏览文件 @
6ed08429
...
...
@@ -5,8 +5,6 @@
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
namespace
mace
{
class
BatchNormOpTest
:
public
OpsTestBase
{};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录