Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
as350144
Mace
提交
22fdbb98
Mace
项目概览
as350144
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
2
Star
1
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
22fdbb98
编写于
11月 30, 2017
作者:
Y
yejianwu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
modify buffer to image, nchw to nhwc in batch norm op
上级
fd284f6a
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
193 addition
and
97 deletion
+193
-97
mace/BUILD
mace/BUILD
+8
-0
mace/core/BUILD
mace/core/BUILD
+2
-2
mace/core/runtime/opencl/opencl_runtime.cc
mace/core/runtime/opencl/opencl_runtime.cc
+9
-7
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+6
-11
mace/kernels/opencl/batch_norm_opencl.cc
mace/kernels/opencl/batch_norm_opencl.cc
+20
-14
mace/kernels/opencl/cl/batch_norm.cl
mace/kernels/opencl/cl/batch_norm.cl
+37
-28
mace/kernels/opencl/conv_2d_opencl_3x3.cc
mace/kernels/opencl/conv_2d_opencl_3x3.cc
+0
-1
mace/mace.bzl
mace/mace.bzl
+7
-1
mace/ops/batch_norm_test.cc
mace/ops/batch_norm_test.cc
+101
-33
tools/bazel-adb-run.sh
tools/bazel-adb-run.sh
+3
-0
未找到文件。
mace/BUILD
浏览文件 @
22fdbb98
...
...
@@ -23,3 +23,11 @@ config_setting(
},
visibility
=
[
"//visibility:public"
],
)
config_setting
(
name
=
"is_profiling"
,
define_values
=
{
"profiling"
:
"true"
,
},
visibility
=
[
"//visibility:public"
],
)
mace/core/BUILD
浏览文件 @
22fdbb98
...
...
@@ -7,7 +7,7 @@ package(
licenses
([
"notice"
])
# Apache 2.0
load
(
"//mace:mace.bzl"
,
"if_android"
)
load
(
"//mace:mace.bzl"
,
"if_android"
,
"if_profiling"
)
cc_library
(
name
=
"opencl_runtime"
,
...
...
@@ -19,7 +19,7 @@ cc_library(
"runtime/opencl/cl2.hpp"
,
"runtime/opencl/*.h"
,
]),
copts
=
[
"-std=c++11"
],
copts
=
[
"-std=c++11"
]
+
if_profiling
([
"-D__ENABLE_PROFILING"
])
,
deps
=
[
":logging"
,
"@opencl_headers//:opencl20_headers"
,
...
...
mace/core/runtime/opencl/opencl_runtime.cc
浏览文件 @
22fdbb98
...
...
@@ -79,14 +79,16 @@ OpenCLRuntime *OpenCLRuntime::Get() {
return
;
}
cl_command_queue_properties
properties
=
0
;
#ifdef __ENABLE_PROFILING
enable_profiling_
=
true
;
profiling_ev_
.
reset
(
new
cl
::
Event
());
properties
=
CL_QUEUE_PROFILING_ENABLE
;
#endif
// a context is like a "runtime link" to the device and platform;
// i.e. communication is possible
cl
::
Context
context
({
gpu_device
});
cl_command_queue_properties
properties
=
0
;
if
(
enable_profiling_
)
{
profiling_ev_
.
reset
(
new
cl
::
Event
());
properties
=
CL_QUEUE_PROFILING_ENABLE
;
}
cl
::
CommandQueue
command_queue
(
context
,
gpu_device
,
properties
);
instance
=
new
OpenCLRuntime
(
context
,
gpu_device
,
command_queue
);
...
...
@@ -104,12 +106,12 @@ cl::Event* OpenCLRuntime::GetDefaultEvent() {
}
cl_ulong
OpenCLRuntime
::
GetEventProfilingStartInfo
()
{
MACE_CHECK
(
enable_profiling_
,
"
should enable profiling first."
);
MACE_CHECK
(
profiling_ev_
,
"is NULL,
should enable profiling first."
);
return
profiling_ev_
->
getProfilingInfo
<
CL_PROFILING_COMMAND_START
>
();
}
cl_ulong
OpenCLRuntime
::
GetEventProfilingEndInfo
()
{
MACE_CHECK
(
enable_profiling_
,
"
should enable profiling first."
);
MACE_CHECK
(
profiling_ev_
,
"is NULL,
should enable profiling first."
);
return
profiling_ev_
->
getProfilingInfo
<
CL_PROFILING_COMMAND_END
>
();
}
...
...
mace/kernels/batch_norm.h
浏览文件 @
22fdbb98
...
...
@@ -28,9 +28,8 @@ struct BatchNormFunctor {
// new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} }
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
const
index_t
n
=
input
->
dim
(
0
);
const
index_t
channel
=
input
->
dim
(
1
);
const
index_t
sample_size
=
input
->
dim
(
2
)
*
input
->
dim
(
3
);
const
index_t
ch_pixel_size
=
input
->
dim
(
0
)
*
input
->
dim
(
1
)
*
input
->
dim
(
2
);
const
index_t
channel
=
input
->
dim
(
3
);
Tensor
::
MappingGuard
input_mapper
(
input
);
Tensor
::
MappingGuard
scale_mapper
(
scale
);
...
...
@@ -52,15 +51,11 @@ struct BatchNormFunctor {
for
(
index_t
c
=
0
;
c
<
channel
;
++
c
)
{
T
new_scale
=
scale_ptr
[
c
]
/
std
::
sqrt
(
var_ptr
[
c
]
+
*
epsilon_ptr
);
T
new_offset
=
offset_ptr
[
c
]
-
mean_ptr
[
c
]
*
new_scale
;
index_t
pos
=
c
*
sample_size
;
index_t
pos
=
c
;
for
(
index_t
i
=
0
;
i
<
n
;
++
i
)
{
const
T
*
input_sample_ptr
=
input_ptr
+
pos
;
T
*
output_sample_ptr
=
output_ptr
+
pos
;
for
(
index_t
j
=
0
;
j
<
sample_size
;
++
j
)
{
output_sample_ptr
[
j
]
=
new_scale
*
input_sample_ptr
[
j
]
+
new_offset
;
}
pos
+=
channel
*
sample_size
;
for
(
index_t
i
=
0
;
i
<
ch_pixel_size
;
++
i
)
{
output_ptr
[
pos
]
=
new_scale
*
input_ptr
[
pos
]
+
new_offset
;
pos
+=
channel
;
}
}
}
...
...
mace/kernels/opencl/batch_norm_opencl.cc
浏览文件 @
22fdbb98
...
...
@@ -21,32 +21,38 @@ void BatchNormFunctor<DeviceType::OPENCL, float>::operator()(
const
Tensor
*
epsilon
,
Tensor
*
output
)
{
index_t
pixel_size
=
input
->
dim
(
2
)
*
input
->
dim
(
3
);
index_t
blocks
=
(
pixel_size
+
3
)
/
4
;
const
index_t
batchs
=
input
->
dim
(
0
);
const
index_t
height
=
input
->
dim
(
1
);
const
index_t
width
=
input
->
dim
(
2
);
const
index_t
channels
=
input
->
dim
(
3
);
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
input
->
dim
(
0
)),
static_cast
<
uint32_t
>
(
input
->
dim
(
1
)),
static_cast
<
uint32_t
>
(
blocks
)};
const
index_t
channel_blocks
=
RoundUpDiv4
(
channels
);
const
index_t
width_blocks
=
RoundUpDiv4
(
width
);
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
width_blocks
),
static_cast
<
uint32_t
>
(
height
*
batchs
)};
auto
runtime
=
OpenCLRuntime
::
Get
();
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DataTypeToCLType
(
input
->
dtype
()));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DataTypeToOPENCLCMDDataType
(
input
->
dtype
()));
auto
bm_kernel
=
runtime
->
BuildKernel
(
"batch_norm"
,
"batch_norm"
,
built_options
);
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
bm_kernel
);
const
std
::
vector
<
uint32_t
>
lws
=
{
1
,
1
,
kwg_size
};
uint32_t
idx
=
0
;
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Buffer
*>
(
input
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Buffer
*>
(
scale
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Buffer
*>
(
offset
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Buffer
*>
(
mean
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Buffer
*>
(
var
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
scale
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
offset
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
mean
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
var
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Buffer
*>
(
epsilon
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
pixel_size
));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Buffer
*>
(
output
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
lws
[
1
]
*
sizeof
(
float
)
*
4
,
nullptr
);
bm_kernel
.
setArg
(
idx
++
,
lws
[
1
]
*
sizeof
(
float
)
*
4
,
nullptr
);
bm_kernel
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
width
));
bm_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
output
->
buffer
())));
bm_kernel
.
setArg
(
idx
++
,
lws
[
0
]
*
sizeof
(
float
)
*
4
,
nullptr
);
bm_kernel
.
setArg
(
idx
++
,
lws
[
0
]
*
sizeof
(
float
)
*
4
,
nullptr
);
auto
params_generator
=
[
&
kwg_size
]()
->
std
::
vector
<
std
::
vector
<
uint32_t
>>
{
return
{{
1
,
1
,
64
},
...
...
mace/kernels/opencl/cl/batch_norm.cl
浏览文件 @
22fdbb98
#
include
<common.h>
//
Supported
data
types:
half/float
void
kernel
batch_norm
(
global
const
DATA_TYPE
*
input,
global
const
DATA_TYPE
*
scale,
global
const
DATA_TYPE
*
offset,
global
const
DATA_TYPE
*
mean,
global
const
DATA_TYPE
*
var,
void
kernel
batch_norm
(
__read_only
image2d_t
input,
__read_only
image2d_t
scale,
__read_only
image2d_t
offset,
__read_only
image2d_t
mean,
__read_only
image2d_t
var,
global
const
DATA_TYPE
*epsilon,
private
const
int
pixels
,
global
DATA_TYPE
*
output,
private
const
int
width
,
__write_only
image2d_t
output,
__local
VEC_DATA_TYPE
(
DATA_TYPE,
4
)
*new_scale,
__local
VEC_DATA_TYPE
(
DATA_TYPE,
4
)
*new_offset
)
{
const
int
batch
=
get_global_id
(
0
)
;
const
int
channel
=
get_global_id
(
1
)
;
const
int
channels
=
get_global_size
(
1
)
;
const
int
pixel_offset
=
get_global_id
(
2
)
;
const
int
local_channel
=
get_local_id
(
1
)
;
const
int
local_pixel_idx
=
get_local_id
(
2
)
;
const
int
ch_blk
=
get_global_id
(
0
)
;
const
int
w_blk
=
get_global_id
(
1
)
;
const
int
hb_blk
=
get_global_id
(
2
)
;
if
(
local_pixel_idx
==
0
)
{
new_scale[local_channel]
=
(
float4
)(
scale[channel]
*
rsqrt
(
var[channel]
+
*epsilon
))
;
new_offset[local_channel]
=
(
float4
)(
offset[channel]
-
mean[channel]
*
new_scale[local_channel].x
)
;
const
int
local_channel
=
get_local_id
(
0
)
;
const
int
local_w_idx
=
get_local_id
(
1
)
;
const
int
local_hb_idx
=
get_local_id
(
2
)
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
if
(
local_hb_idx
==
0
&&
local_w_idx
==
0
)
{
VEC_DATA_TYPE
(
DATA_TYPE,
4
)
scale4
=
CMD_TYPE
(
read_image,
CMD_DATA_TYPE
)(
scale,
sampler,
(
int2
)(
ch_blk,
0
))
;
VEC_DATA_TYPE
(
DATA_TYPE,
4
)
offset4
=
CMD_TYPE
(
read_image,
CMD_DATA_TYPE
)(
offset,
sampler,
(
int2
)(
ch_blk,
0
))
;
VEC_DATA_TYPE
(
DATA_TYPE,
4
)
mean4
=
CMD_TYPE
(
read_image,
CMD_DATA_TYPE
)(
mean,
sampler,
(
int2
)(
ch_blk,
0
))
;
VEC_DATA_TYPE
(
DATA_TYPE,
4
)
var4
=
CMD_TYPE
(
read_image,
CMD_DATA_TYPE
)(
var,
sampler,
(
int2
)(
ch_blk,
0
))
;
new_scale[local_channel]
=
scale4
*
rsqrt
(
var4
+
(
VEC_DATA_TYPE
(
DATA_TYPE,
4
))(
*epsilon
))
;
new_offset[local_channel]
=
offset4
-
mean4
*
new_scale[local_channel]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
const
int
image_offset
=
(
batch
*
channels
+
channel
)
*
pixels
+
pixel_offset*4
;
const
DATA_TYPE
*input_ptr
=
input
+
image_offset
;
DATA_TYPE
*output_ptr
=
output
+
image_offset
;
const
int
end
=
(
batch
*
channels
+
channel
+
1
)
*
pixels
;
if
((
image_offset+4
)
>
end
)
{
for
(
int
i
=
image_offset
; i < end; ++i) {
*output_ptr
=
new_scale[local_channel].x
*
*input_ptr
+
new_offset[local_channel].x
;
++input_ptr
;
++output_ptr
;
VEC_DATA_TYPE
(
DATA_TYPE,
4
)
in[4]
;
const
int
width_pos
=
w_blk
<<
2
;
const
int
pos
=
ch_blk
*
width
+
width_pos
;
if
(
width_pos
+
4
<
width
)
{
for
(
int
i
=
0
; i < 4; ++i) {
in[i]
=
CMD_TYPE
(
read_image,
CMD_DATA_TYPE
)(
input,
sampler,
(
int2
)(
pos
+
i,
hb_blk
))
;
VEC_DATA_TYPE
(
DATA_TYPE,
4
)
res
=
in[i]
*
new_scale[local_channel]
+
new_offset[local_channel]
;
CMD_TYPE
(
write_image,
CMD_DATA_TYPE
)(
output,
(
int2
)(
pos
+
i,
hb_blk
)
,
res
)
;
}
}
else
{
VEC_DATA_TYPE
(
DATA_TYPE,
4
)
values
=
vload4
(
0
,
input_ptr
)
;
values
=
values
*
new_scale[local_channel]
+
new_offset[local_channel]
;
vstore4
(
values,
0
,
output_ptr
)
;
for
(
int
i
=
0
; i < width - width_pos; ++i) {
in[i]
=
CMD_TYPE
(
read_image,
CMD_DATA_TYPE
)(
input,
sampler,
(
int2
)(
pos
+
i,
hb_blk
))
;
VEC_DATA_TYPE
(
DATA_TYPE,
4
)
res
=
in[i]
*
new_scale[local_channel]
+
new_offset[local_channel]
;
CMD_TYPE
(
write_image,
CMD_DATA_TYPE
)(
output,
(
int2
)(
pos
+
i,
hb_blk
)
,
res
)
;
}
}
}
mace/kernels/opencl/conv_2d_opencl_3x3.cc
浏览文件 @
22fdbb98
...
...
@@ -21,7 +21,6 @@ static void Conv2d3x3S12(const Tensor *input, const Tensor *filter,
const
index_t
input_channels
=
input
->
dim
(
3
);
const
index_t
channel_blocks
=
RoundUpDiv4
(
channels
);
const
index_t
input_channel_blocks
=
RoundUpDiv4
(
input_channels
);
const
index_t
width_blocks
=
RoundUpDiv4
(
width
);
std
::
set
<
std
::
string
>
built_options
;
...
...
mace/mace.bzl
浏览文件 @
22fdbb98
...
...
@@ -22,4 +22,10 @@ def if_android_arm64(a):
return
select
({
"//mace:android_arm64"
:
a
,
"//conditions:default"
:
[],
})
\ No newline at end of file
})
def
if_profiling
(
a
):
return
select
({
"//mace:is_profiling"
:
a
,
"//conditions:default"
:
[],
})
mace/ops/batch_norm_test.cc
浏览文件 @
22fdbb98
...
...
@@ -5,26 +5,18 @@
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
namespace
mace
{
class
BatchNormOpTest
:
public
OpsTestBase
{};
template
<
DeviceType
D
>
void
Simple
()
{
// Construct graph
OpsTestNet
net
;
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Input
(
"Mean"
)
.
Input
(
"Var"
)
.
Input
(
"Epsilon"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
1
,
6
,
2
},
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
6
,
2
,
1
},
{
5
,
5
,
7
,
7
,
9
,
9
,
11
,
11
,
13
,
13
,
15
,
15
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Scale"
,
{
1
},
{
4.0
f
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Offset"
,
{
1
},
{
2.0
});
...
...
@@ -32,12 +24,44 @@ void Simple() {
net
.
AddInputFromArray
<
D
,
float
>
(
"Var"
,
{
1
},
{
11.67
f
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
// Run
net
.
RunOp
(
D
);
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"ScaleImage"
)
.
Input
(
"OffsetImage"
)
.
Input
(
"MeanImage"
)
.
Input
(
"VarImage"
)
.
Input
(
"Epsilon"
)
.
Output
(
"OutputImage"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Input
(
"Mean"
)
.
Input
(
"Var"
)
.
Input
(
"Epsilon"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
}
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
6
,
2
},
{
-
3.86
,
-
3.86
,
-
1.51
,
-
1.51
,
0.83
,
0.83
,
CreateTensor
<
float
>
({
1
,
6
,
2
,
1
},
{
-
3.86
,
-
3.86
,
-
1.51
,
-
1.51
,
0.83
,
0.83
,
3.17
,
3.17
,
5.51
,
5.51
,
7.86
,
7.86
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
...
...
@@ -47,14 +71,17 @@ TEST_F(BatchNormOpTest, SimpleCPU) {
Simple
<
DeviceType
::
CPU
>
();
}
/*
TEST_F(BatchNormOpTest, SimpleNEON) {
Simple<DeviceType::NEON>();
}
*/
TEST_F
(
BatchNormOpTest
,
SimpleOPENCL
)
{
Simple
<
DeviceType
::
OPENCL
>
();
}
/*
TEST_F(BatchNormOpTest, SimpleRandomNeon) {
srand(time(NULL));
...
...
@@ -136,6 +163,7 @@ TEST_F(BatchNormOpTest, ComplexRandomNeon) {
ExpectTensorNear<float>(expected, *net.GetOutput("Output"), 1e-2);
}
*/
TEST_F
(
BatchNormOpTest
,
SimpleRandomOPENCL
)
{
srand
(
time
(
NULL
));
...
...
@@ -145,6 +173,7 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
index_t
channels
=
3
+
rand
()
%
50
;
index_t
height
=
64
;
index_t
width
=
64
;
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
...
...
@@ -158,29 +187,48 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Var"
,
{
channels
},
true
);
net
.
AddInputFromArray
<
DeviceType
::
OPENCL
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
// tuning
// run cpu
net
.
RunOp
();
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"ScaleImage"
)
.
Input
(
"OffsetImage"
)
.
Input
(
"MeanImage"
)
.
Input
(
"VarImage"
)
.
Input
(
"Epsilon"
)
.
Output
(
"OutputImage"
)
.
Finalize
(
net
.
NewOperatorDef
());
// Tuning
setenv
(
"MACE_TUNING"
,
"1"
,
1
);
net
.
RunOp
(
DeviceType
::
OPENCL
);
unsetenv
(
"MACE_TUNING"
);
// Run on opencl
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run cpu
net
.
RunOp
();
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
ImageToBuffer
<
DeviceType
::
OPENCL
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
}
TEST_F
(
BatchNormOpTest
,
ComplexRandomOPENCL
)
{
...
...
@@ -191,6 +239,7 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
index_t
channels
=
3
+
rand
()
%
50
;
index_t
height
=
103
;
index_t
width
=
113
;
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
...
...
@@ -204,13 +253,38 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Var"
,
{
channels
},
true
);
net
.
AddInputFromArray
<
DeviceType
::
OPENCL
,
float
>
(
"Epsilon"
,
{},
{
1e-3
});
// run cpu
net
.
RunOp
();
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"ScaleImage"
)
.
Input
(
"OffsetImage"
)
.
Input
(
"MeanImage"
)
.
Input
(
"VarImage"
)
.
Input
(
"Epsilon"
)
.
Output
(
"OutputImage"
)
.
Finalize
(
net
.
NewOperatorDef
());
// tuning
setenv
(
"MACE_TUNING"
,
"1"
,
1
);
net
.
RunOp
(
DeviceType
::
OPENCL
);
...
...
@@ -220,14 +294,8 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run cpu
net
.
RunOp
();
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
ImageToBuffer
<
DeviceType
::
OPENCL
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
}
}
tools/bazel-adb-run.sh
浏览文件 @
22fdbb98
...
...
@@ -22,6 +22,9 @@ ANDROID_ABI=arm64-v8a
STRIP
=
""
STRIP
=
"--strip always"
# for profiling
# bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET --crosstool_top=//external:android/crosstool --host_crosstool_top=@bazel_tools//tools/cpp:toolchain --cpu=$ANDROID_ABI --define profiling=true
bazel build
-c
opt
$STRIP
--verbose_failures
$BAZEL_TARGET
--crosstool_top
=
//external:android/crosstool
--host_crosstool_top
=
@bazel_tools//tools/cpp:toolchain
--cpu
=
$ANDROID_ABI
if
[
$?
-ne
0
]
;
then
exit
1
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录