Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
as350144
Mace
提交
0c11ff97
Mace
项目概览
as350144
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
2
Star
1
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
0c11ff97
编写于
12月 04, 2017
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'bm_to_image' into 'master'
Bm to image See merge request !132
上级
ef167287
a9832dfb
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
249 addition
and
142 deletion
+249
-142
mace/BUILD
mace/BUILD
+8
-0
mace/core/BUILD
mace/core/BUILD
+2
-2
mace/core/runtime/opencl/opencl_runtime.cc
mace/core/runtime/opencl/opencl_runtime.cc
+9
-7
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+31
-22
mace/kernels/opencl/batch_norm_opencl.cc
mace/kernels/opencl/batch_norm_opencl.cc
+27
-19
mace/kernels/opencl/cl/batch_norm.cl
mace/kernels/opencl/cl/batch_norm.cl
+22
-37
mace/mace.bzl
mace/mace.bzl
+7
-1
mace/ops/batch_norm.cc
mace/ops/batch_norm.cc
+6
-1
mace/ops/batch_norm_benchmark.cc
mace/ops/batch_norm_benchmark.cc
+30
-13
mace/ops/batch_norm_test.cc
mace/ops/batch_norm_test.cc
+103
-39
tools/bazel-adb-run.sh
tools/bazel-adb-run.sh
+4
-1
未找到文件。
mace/BUILD
浏览文件 @
0c11ff97
...
...
@@ -23,3 +23,11 @@ config_setting(
},
visibility
=
[
"//visibility:public"
],
)
config_setting
(
name
=
"is_profiling"
,
define_values
=
{
"profiling"
:
"true"
,
},
visibility
=
[
"//visibility:public"
],
)
mace/core/BUILD
浏览文件 @
0c11ff97
...
...
@@ -7,7 +7,7 @@ package(
licenses
([
"notice"
])
# Apache 2.0
load
(
"//mace:mace.bzl"
,
"if_android"
)
load
(
"//mace:mace.bzl"
,
"if_android"
,
"if_profiling"
)
cc_library
(
name
=
"opencl_runtime"
,
...
...
@@ -19,7 +19,7 @@ cc_library(
"runtime/opencl/cl2.hpp"
,
"runtime/opencl/*.h"
,
]),
copts
=
[
"-std=c++11"
],
copts
=
[
"-std=c++11"
]
+
if_profiling
([
"-D__ENABLE_PROFILING"
])
,
deps
=
[
":logging"
,
"@opencl_headers//:opencl20_headers"
,
...
...
mace/core/runtime/opencl/opencl_runtime.cc
浏览文件 @
0c11ff97
...
...
@@ -79,14 +79,16 @@ OpenCLRuntime *OpenCLRuntime::Get() {
return
;
}
cl_command_queue_properties
properties
=
0
;
#ifdef __ENABLE_PROFILING
enable_profiling_
=
true
;
profiling_ev_
.
reset
(
new
cl
::
Event
());
properties
=
CL_QUEUE_PROFILING_ENABLE
;
#endif
// a context is like a "runtime link" to the device and platform;
// i.e. communication is possible
cl
::
Context
context
({
gpu_device
});
cl_command_queue_properties
properties
=
0
;
if
(
enable_profiling_
)
{
profiling_ev_
.
reset
(
new
cl
::
Event
());
properties
=
CL_QUEUE_PROFILING_ENABLE
;
}
cl
::
CommandQueue
command_queue
(
context
,
gpu_device
,
properties
);
instance
=
new
OpenCLRuntime
(
context
,
gpu_device
,
command_queue
);
...
...
@@ -104,12 +106,12 @@ cl::Event* OpenCLRuntime::GetDefaultEvent() {
}
cl_ulong
OpenCLRuntime
::
GetEventProfilingStartInfo
()
{
MACE_CHECK
(
enable_profiling_
,
"
should enable profiling first."
);
MACE_CHECK
(
profiling_ev_
,
"is NULL,
should enable profiling first."
);
return
profiling_ev_
->
getProfilingInfo
<
CL_PROFILING_COMMAND_START
>
();
}
cl_ulong
OpenCLRuntime
::
GetEventProfilingEndInfo
()
{
MACE_CHECK
(
enable_profiling_
,
"
should enable profiling first."
);
MACE_CHECK
(
profiling_ev_
,
"is NULL,
should enable profiling first."
);
return
profiling_ev_
->
getProfilingInfo
<
CL_PROFILING_COMMAND_END
>
();
}
...
...
mace/kernels/batch_norm.h
浏览文件 @
0c11ff97
...
...
@@ -28,9 +28,10 @@ struct BatchNormFunctor {
// new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} }
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
const
index_t
n
=
input
->
dim
(
0
);
const
index_t
channel
=
input
->
dim
(
1
);
const
index_t
sample_size
=
input
->
dim
(
2
)
*
input
->
dim
(
3
);
const
index_t
batch
=
input
->
dim
(
0
);
const
index_t
height
=
input
->
dim
(
1
);
const
index_t
width
=
input
->
dim
(
2
);
const
index_t
channels
=
input
->
dim
(
3
);
Tensor
::
MappingGuard
input_mapper
(
input
);
Tensor
::
MappingGuard
scale_mapper
(
scale
);
...
...
@@ -48,19 +49,26 @@ struct BatchNormFunctor {
const
T
*
epsilon_ptr
=
epsilon
->
data
<
T
>
();
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
vector
<
T
>
new_scale
(
channels
);
vector
<
T
>
new_offset
(
channels
);
#pragma omp parallel for
for
(
index_t
c
=
0
;
c
<
channel
;
++
c
)
{
T
new_scale
=
scale_ptr
[
c
]
/
std
::
sqrt
(
var_ptr
[
c
]
+
*
epsilon_ptr
);
T
new_offset
=
offset_ptr
[
c
]
-
mean_ptr
[
c
]
*
new_scale
;
index_t
pos
=
c
*
sample_size
;
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
new_scale
[
c
]
=
scale_ptr
[
c
]
/
std
::
sqrt
(
var_ptr
[
c
]
+
*
epsilon_ptr
);
new_offset
[
c
]
=
offset_ptr
[
c
]
-
mean_ptr
[
c
]
*
new_scale
[
c
];
}
index_t
pos
=
0
;
for
(
index_t
i
=
0
;
i
<
n
;
++
i
)
{
const
T
*
input_sample_ptr
=
input_ptr
+
pos
;
T
*
output_sample_ptr
=
output_ptr
+
pos
;
for
(
index_t
j
=
0
;
j
<
sample_size
;
++
j
)
{
output_sample_ptr
[
j
]
=
new_scale
*
input_sample_ptr
[
j
]
+
new_offset
;
#pragma omp parallel for
for
(
index_t
n
=
0
;
n
<
batch
;
++
n
)
{
for
(
index_t
h
=
0
;
h
<
height
;
++
h
)
{
for
(
index_t
w
=
0
;
w
<
width
;
++
w
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
output_ptr
[
pos
]
=
new_scale
[
c
]
*
input_ptr
[
pos
]
+
new_offset
[
c
];
++
pos
;
}
}
pos
+=
channel
*
sample_size
;
}
}
}
...
...
@@ -76,15 +84,16 @@ void BatchNormFunctor<DeviceType::NEON, float>::operator()(
const
Tensor
*
epsilon
,
Tensor
*
output
);
template
<
>
void
BatchNormFunctor
<
DeviceType
::
OPENCL
,
float
>::
operator
()(
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
Tensor
*
epsilon
,
Tensor
*
output
);
template
<
typename
T
>
struct
BatchNormFunctor
<
DeviceType
::
OPENCL
,
T
>
{
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
const
Tensor
*
mean
,
const
Tensor
*
var
,
const
Tensor
*
epsilon
,
Tensor
*
output
);
};
}
// namepsace kernels
}
// namespace mace
...
...
mace/kernels/opencl/batch_norm_opencl.cc
浏览文件 @
0c11ff97
...
...
@@ -11,8 +11,8 @@
namespace
mace
{
namespace
kernels
{
template
<
>
void
BatchNormFunctor
<
DeviceType
::
OPENCL
,
float
>::
operator
()(
template
<
typename
T
>
void
BatchNormFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
...
...
@@ -21,35 +21,39 @@ void BatchNormFunctor<DeviceType::OPENCL, float>::operator()(
const
Tensor
*
epsilon
,
Tensor
*
output
)
{
index_t
pixel_size
=
input
->
dim
(
2
)
*
input
->
dim
(
3
);
index_t
blocks
=
(
pixel_size
+
3
)
/
4
;
const
index_t
batch
=
input
->
dim
(
0
);
const
index_t
height
=
input
->
dim
(
1
);
const
index_t
width
=
input
->
dim
(
2
);
const
index_t
channels
=
input
->
dim
(
3
);
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
input
->
dim
(
0
)),
static_cast
<
uint32_t
>
(
input
->
dim
(
1
)),
static_cast
<
uint32_t
>
(
blocks
)};
const
index_t
channel_blocks
=
RoundUpDiv4
(
channels
);
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
width
),
static_cast
<
uint32_t
>
(
height
*
batch
)};
auto
runtime
=
OpenCLRuntime
::
Get
();
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
input
->
dtype
()));
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
auto
bm_kernel
=
runtime
->
BuildKernel
(
"batch_norm"
,
"batch_norm"
,
built_options
);
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
bm_kernel
);
const
std
::
vector
<
uint32_t
>
lws
=
{
1
,
1
,
kwg_size
};
const
std
::
vector
<
uint32_t
>
lws
=
{
1
,
kwg_size
,
1
};
uint32_t
idx
=
0
;
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Buffer
*>
(
input
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Buffer
*>
(
scale
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Buffer
*>
(
offset
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Buffer
*>
(
mean
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Buffer
*>
(
var
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
scale
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
offset
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
mean
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
var
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Buffer
*>
(
epsilon
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
pixel_size
));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Buffer
*>
(
output
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
lws
[
1
]
*
sizeof
(
float
)
*
4
,
nullptr
);
bm_kernel
.
setArg
(
idx
++
,
lws
[
1
]
*
sizeof
(
float
)
*
4
,
nullptr
);
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
output
->
buffer
())));
auto
params_generator
=
[
&
kwg_size
]()
->
std
::
vector
<
std
::
vector
<
uint32_t
>>
{
return
{{
1
,
1
,
64
},
return
{{
8
,
128
,
1
},
//SNPE size
{
1
,
1
,
64
},
{
1
,
1
,
128
},
{
1
,
kwg_size
/
16
,
16
},
{
1
,
kwg_size
/
32
,
32
},
...
...
@@ -80,5 +84,9 @@ void BatchNormFunctor<DeviceType::OPENCL, float>::operator()(
func
);
}
template
struct
BatchNormFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
BatchNormFunctor
<
DeviceType
::
OPENCL
,
half
>;
}
// namespace kernels
}
// namespace mace
mace/kernels/opencl/cl/batch_norm.cl
浏览文件 @
0c11ff97
#
include
<common.h>
//
Supported
data
types:
half/float
void
kernel
batch_norm
(
global
const
DATA_TYPE
*input,
global
const
DATA_TYPE
*scale,
global
const
DATA_TYPE
*offset,
global
const
DATA_TYPE
*mean,
global
const
DATA_TYPE
*var,
global
const
DATA_TYPE
*epsilon,
private
const
int
pixels,
global
DATA_TYPE
*output,
__local
VEC_DATA_TYPE
(
DATA_TYPE,
4
)
*new_scale,
__local
VEC_DATA_TYPE
(
DATA_TYPE,
4
)
*new_offset
)
{
const
int
batch
=
get_global_id
(
0
)
;
const
int
channel
=
get_global_id
(
1
)
;
const
int
channels
=
get_global_size
(
1
)
;
const
int
pixel_offset
=
get_global_id
(
2
)
;
const
int
local_channel
=
get_local_id
(
1
)
;
const
int
local_pixel_idx
=
get_local_id
(
2
)
;
__kernel
void
batch_norm
(
__read_only
image2d_t
input,
__read_only
image2d_t
scale,
__read_only
image2d_t
offset,
__read_only
image2d_t
mean,
__read_only
image2d_t
var,
__global
const
DATA_TYPE
*epsilon,
__write_only
image2d_t
output
)
{
const
int
ch_blk
=
get_global_id
(
0
)
;
const
int
w
=
get_global_id
(
1
)
;
const
int
hb
=
get_global_id
(
2
)
;
const
int
width
=
get_global_size
(
1
)
;
if
(
local_pixel_idx
==
0
)
{
new_scale[local_channel]
=
(
float4
)(
scale[channel]
*
rsqrt
(
var[channel]
+
*epsilon
))
;
new_offset[local_channel]
=
(
float4
)(
offset[channel]
-
mean[channel]
*
new_scale[local_channel].x
)
;
}
DATA_TYPE4
scale_value
=
READ_IMAGET
(
scale,
SAMPLER,
(
int2
)(
ch_blk,
0
))
;
DATA_TYPE4
offset_value
=
READ_IMAGET
(
offset,
SAMPLER,
(
int2
)(
ch_blk,
0
))
;
DATA_TYPE4
mean_value
=
READ_IMAGET
(
mean,
SAMPLER,
(
int2
)(
ch_blk,
0
)
)
;
DATA_TYPE4
var_value
=
READ_IMAGET
(
var,
SAMPLER,
(
int2
)(
ch_blk,
0
))
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
DATA_TYPE4
new_scale
=
scale_value
*
rsqrt
(
var_value
+
(
DATA_TYPE4
)(
*epsilon
))
;
DATA_TYPE4
new_offset
=
offset_value
-
mean_value
*
new_scale
;
const
int
image_offset
=
(
batch
*
channels
+
channel
)
*
pixels
+
pixel_offset*4
;
const
DATA_TYPE
*input_ptr
=
input
+
image_offset
;
DATA_TYPE
*output_ptr
=
output
+
image_offset
;
const
int
end
=
(
batch
*
channels
+
channel
+
1
)
*
pixels
;
if
((
image_offset+4
)
>
end
)
{
for
(
int
i
=
image_offset
; i < end; ++i) {
*output_ptr
=
new_scale[local_channel].x
*
*input_ptr
+
new_offset[local_channel].x
;
++input_ptr
;
++output_ptr
;
}
}
else
{
VEC_DATA_TYPE
(
DATA_TYPE,
4
)
values
=
vload4
(
0
,
input_ptr
)
;
values
=
values
*
new_scale[local_channel]
+
new_offset[local_channel]
;
vstore4
(
values,
0
,
output_ptr
)
;
}
}
const
int
pos
=
ch_blk
*
width
+
w
;
DATA_TYPE4
in
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
pos,
hb
))
;
DATA_TYPE4
out
=
in
*
new_scale
+
new_offset
;
WRITE_IMAGET
(
output,
(
int2
)(
pos,
hb
)
,
out
)
;
}
mace/mace.bzl
浏览文件 @
0c11ff97
...
...
@@ -22,4 +22,10 @@ def if_android_arm64(a):
return
select
({
"//mace:android_arm64"
:
a
,
"//conditions:default"
:
[],
})
\ No newline at end of file
})
def
if_profiling
(
a
):
return
select
({
"//mace:is_profiling"
:
a
,
"//conditions:default"
:
[],
})
mace/ops/batch_norm.cc
浏览文件 @
0c11ff97
...
...
@@ -23,4 +23,9 @@ REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchNorm")
.
Build
(),
BatchNormOp
<
DeviceType
::
OPENCL
,
float
>
);
}
// namespace mace
\ No newline at end of file
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"BatchNorm"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
BatchNormOp
<
DeviceType
::
OPENCL
,
half
>
);
}
// namespace mace
mace/ops/batch_norm_benchmark.cc
浏览文件 @
0c11ff97
...
...
@@ -13,28 +13,45 @@ static void BatchNorm(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
)
{
mace
::
testing
::
StopTiming
();
if
(
D
==
OPENCL
)
OpenCLRuntime
::
EnableProfiling
();
OpsTestNet
net
;
OpDefBuilder
(
"BatchNorm"
,
"BatchNormBM"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Input
(
"Mean"
)
.
Input
(
"Var"
)
.
Input
(
"Epsilon"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
D
,
T
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
D
,
T
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
D
,
T
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
D
,
T
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
D
,
T
>
(
"Mean"
,
{
channels
});
net
.
AddRandomInput
<
D
,
T
>
(
"Var"
,
{
channels
},
true
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BatchNorm"
,
"BatchNormBM"
)
.
Input
(
"InputImage"
)
.
Input
(
"ScaleImage"
)
.
Input
(
"OffsetImage"
)
.
Input
(
"MeanImage"
)
.
Input
(
"VarImage"
)
.
Input
(
"Epsilon"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
OpDefBuilder
(
"BatchNorm"
,
"BatchNormBM"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Input
(
"Mean"
)
.
Input
(
"Var"
)
.
Input
(
"Epsilon"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
}
// tuning
setenv
(
"MACE_TUNING"
,
"1"
,
1
);
net
.
RunOp
(
D
);
...
...
mace/ops/batch_norm_test.cc
浏览文件 @
0c11ff97
...
...
@@ -11,20 +11,10 @@ class BatchNormOpTest : public OpsTestBase {};
template
<
DeviceType
D
>
void
Simple
()
{
// Construct graph
OpsTestNet
net
;
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Input
(
"Mean"
)
.
Input
(
"Var"
)
.
Input
(
"Epsilon"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
1
,
6
,
2
},
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
6
,
2
,
1
},
{
5
,
5
,
7
,
7
,
9
,
9
,
11
,
11
,
13
,
13
,
15
,
15
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Scale"
,
{
1
},
{
4.0
f
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Offset"
,
{
1
},
{
2.0
});
...
...
@@ -32,12 +22,44 @@ void Simple() {
net
.
AddInputFromArray
<
D
,
float
>
(
"Var"
,
{
1
},
{
11.67
f
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
// Run
net
.
RunOp
(
D
);
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"ScaleImage"
)
.
Input
(
"OffsetImage"
)
.
Input
(
"MeanImage"
)
.
Input
(
"VarImage"
)
.
Input
(
"Epsilon"
)
.
Output
(
"OutputImage"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Input
(
"Mean"
)
.
Input
(
"Var"
)
.
Input
(
"Epsilon"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
}
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
6
,
2
},
{
-
3.86
,
-
3.86
,
-
1.51
,
-
1.51
,
0.83
,
0.83
,
CreateTensor
<
float
>
({
1
,
6
,
2
,
1
},
{
-
3.86
,
-
3.86
,
-
1.51
,
-
1.51
,
0.83
,
0.83
,
3.17
,
3.17
,
5.51
,
5.51
,
7.86
,
7.86
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
...
...
@@ -47,14 +69,17 @@ TEST_F(BatchNormOpTest, SimpleCPU) {
Simple
<
DeviceType
::
CPU
>
();
}
/*
TEST_F(BatchNormOpTest, SimpleNEON) {
Simple<DeviceType::NEON>();
}
*/
TEST_F
(
BatchNormOpTest
,
SimpleOPENCL
)
{
Simple
<
DeviceType
::
OPENCL
>
();
}
/*
TEST_F(BatchNormOpTest, SimpleRandomNeon) {
srand(time(NULL));
...
...
@@ -136,6 +161,7 @@ TEST_F(BatchNormOpTest, ComplexRandomNeon) {
ExpectTensorNear<float>(expected, *net.GetOutput("Output"), 1e-2);
}
*/
TEST_F
(
BatchNormOpTest
,
SimpleRandomOPENCL
)
{
srand
(
time
(
NULL
));
...
...
@@ -145,6 +171,7 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
index_t
channels
=
3
+
rand
()
%
50
;
index_t
height
=
64
;
index_t
width
=
64
;
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
...
...
@@ -158,30 +185,48 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Var"
,
{
channels
},
true
);
net
.
AddInputFromArray
<
DeviceType
::
OPENCL
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
// TODO : there is a bug for tuning
// tuning
// setenv("MACE_TUNING", "1", 1);
// net.RunOp(DeviceType::OPENCL);
// unsetenv("MACE_TUNING");
// Run on opencl
net
.
RunOp
(
DeviceType
::
OPENCL
);
// run cpu
net
.
RunOp
();
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run cpu
net
.
RunOp
();
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"ScaleImage"
)
.
Input
(
"OffsetImage"
)
.
Input
(
"MeanImage"
)
.
Input
(
"VarImage"
)
.
Input
(
"Epsilon"
)
.
Output
(
"OutputImage"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Tuning
setenv
(
"MACE_TUNING"
,
"1"
,
1
);
net
.
RunOp
(
DeviceType
::
OPENCL
);
unsetenv
(
"MACE_TUNING"
);
// Run on opencl
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
}
TEST_F
(
BatchNormOpTest
,
ComplexRandomOPENCL
)
{
...
...
@@ -192,6 +237,7 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
index_t
channels
=
3
+
rand
()
%
50
;
index_t
height
=
103
;
index_t
width
=
113
;
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
...
...
@@ -205,31 +251,49 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Var"
,
{
channels
},
true
);
net
.
AddInputFromArray
<
DeviceType
::
OPENCL
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
// TODO : there is a bug for tuning
// tuning
// setenv("MACE_TUNING", "1", 1);
// net.RunOp(DeviceType::OPENCL);
// unsetenv("MACE_TUNING");
// Run on opencl
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
// run cpu
net
.
RunOp
();
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run cpu
net
.
RunOp
();
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"ScaleImage"
)
.
Input
(
"OffsetImage"
)
.
Input
(
"MeanImage"
)
.
Input
(
"VarImage"
)
.
Input
(
"Epsilon"
)
.
Output
(
"OutputImage"
)
.
Finalize
(
net
.
NewOperatorDef
());
// tuning
setenv
(
"MACE_TUNING"
,
"1"
,
1
);
net
.
RunOp
(
DeviceType
::
OPENCL
);
unsetenv
(
"MACE_TUNING"
);
// Run on opencl
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
}
}
tools/bazel-adb-run.sh
浏览文件 @
0c11ff97
...
...
@@ -22,7 +22,10 @@ ANDROID_ABI=arm64-v8a
STRIP
=
""
STRIP
=
"--strip always"
bazel build
-c
opt
$STRIP
--verbose_failures
$BAZEL_TARGET
--crosstool_top
=
//external:android/crosstool
--host_crosstool_top
=
@bazel_tools//tools/cpp:toolchain
--cpu
=
$ANDROID_ABI
# for profiling
bazel build
-c
opt
$STRIP
--verbose_failures
$BAZEL_TARGET
--crosstool_top
=
//external:android/crosstool
--host_crosstool_top
=
@bazel_tools//tools/cpp:toolchain
--cpu
=
$ANDROID_ABI
--define
profiling
=
true
#bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET --crosstool_top=//external:android/crosstool --host_crosstool_top=@bazel_tools//tools/cpp:toolchain --cpu=$ANDROID_ABI
if
[
$?
-ne
0
]
;
then
exit
1
fi
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录