Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
野马c
Mace
提交
e2ae6261
Mace
项目概览
野马c
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
e2ae6261
编写于
10月 25, 2017
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add sync function for opencl test and benchmark.
上级
bcec92d0
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
55 addition
and
37 deletion
+55
-37
mace/core/runtime/opencl/opencl_runtime.cc
mace/core/runtime/opencl/opencl_runtime.cc
+1
-3
mace/core/runtime/opencl/opencl_runtime.h
mace/core/runtime/opencl/opencl_runtime.h
+7
-4
mace/kernels/opencl/batch_norm_opencl.cc
mace/kernels/opencl/batch_norm_opencl.cc
+15
-19
mace/kernels/opencl/cl/batch_norm.cl
mace/kernels/opencl/cl/batch_norm.cl
+21
-9
mace/ops/BUILD
mace/ops/BUILD
+1
-2
mace/ops/batch_norm_benchmark.cc
mace/ops/batch_norm_benchmark.cc
+2
-0
mace/ops/batch_norm_test.cc
mace/ops/batch_norm_test.cc
+1
-0
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+7
-0
未找到文件。
mace/core/runtime/opencl/opencl_runtime.cc
浏览文件 @
e2ae6261
...
...
@@ -8,11 +8,9 @@
#include <mutex>
#include <dirent.h>
#include <errno.h>
#include "mace/core/logging.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/runtime/opencl/opencl_wrapper.h"
namespace
mace
{
namespace
{
...
...
@@ -66,7 +64,7 @@ bool BuildProgram(OpenCLRuntime *runtime,
};
*
program
=
cl
::
Program
(
runtime
->
context
(),
sources
);
std
::
string
build_options
=
"-Werror -cl-mad-enable -I"
+
path
;
std
::
string
build_options
=
"-Werror -cl-mad-enable -
cl-fast-relaxed-math -
I"
+
path
;
// TODO(heliangliang) -cl-unsafe-math-optimizations -cl-fast-relaxed-math
if
(
program
->
build
({
runtime
->
device
()},
build_options
.
c_str
())
!=
CL_SUCCESS
)
{
if
(
program
->
getBuildInfo
<
CL_PROGRAM_BUILD_STATUS
>
(
runtime
->
device
())
==
...
...
mace/core/runtime/opencl/opencl_runtime.h
浏览文件 @
e2ae6261
...
...
@@ -20,15 +20,18 @@ namespace mace {
class
OpenCLRuntime
{
public:
static
OpenCLRuntime
*
Get
();
OpenCLRuntime
(
cl
::
Context
context
,
cl
::
Device
device
,
cl
::
CommandQueue
command_queue
);
~
OpenCLRuntime
();
cl
::
Context
&
context
();
cl
::
Device
&
device
();
cl
::
CommandQueue
&
command_queue
();
cl
::
Program
&
program
();
private:
OpenCLRuntime
(
cl
::
Context
context
,
cl
::
Device
device
,
cl
::
CommandQueue
command_queue
);
~
OpenCLRuntime
();
OpenCLRuntime
(
const
OpenCLRuntime
&
)
=
delete
;
OpenCLRuntime
&
operator
=
(
const
OpenCLRuntime
&
)
=
delete
;
private:
cl
::
Context
context_
;
...
...
mace/kernels/opencl/batch_norm_opencl.cc
浏览文件 @
e2ae6261
...
...
@@ -24,25 +24,21 @@ void BatchNormFunctor<DeviceType::OPENCL, float>::operator()(
auto
runtime
=
OpenCLRuntime
::
Get
();
auto
program
=
runtime
->
program
();
auto
batch_norm_kernel
=
cl
::
KernelFunctor
<
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
,
int
,
int
,
cl
::
Buffer
>
(
program
,
"batch_norm"
);
cl_int
error
;
auto
res_event
=
batch_norm_kernel
(
cl
::
EnqueueArgs
(
runtime
->
command_queue
(),
cl
::
NDRange
(
n
*
channel
*
sample_size
),
cl
::
NDRange
(
128
)),
*
(
static_cast
<
const
cl
::
Buffer
*>
(
input
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
scale
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
offset
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
mean
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
var
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
epsilon
->
buffer
())),
static_cast
<
int
>
(
channel
),
static_cast
<
int
>
(
sample_size
),
*
(
static_cast
<
cl
::
Buffer
*>
(
output
->
buffer
())),
error
);
res_event
.
wait
();
auto
_kernel
=
cl
::
Kernel
(
program
,
"batch_norm"
);
_kernel
.
setArg
(
0
,
*
(
static_cast
<
const
cl
::
Buffer
*>
(
input
->
buffer
())));
_kernel
.
setArg
(
1
,
*
(
static_cast
<
cl
::
Buffer
*>
(
scale
->
buffer
())));
_kernel
.
setArg
(
2
,
*
(
static_cast
<
cl
::
Buffer
*>
(
offset
->
buffer
())));
_kernel
.
setArg
(
3
,
*
(
static_cast
<
cl
::
Buffer
*>
(
mean
->
buffer
())));
_kernel
.
setArg
(
4
,
*
(
static_cast
<
cl
::
Buffer
*>
(
var
->
buffer
())));
_kernel
.
setArg
(
5
,
*
(
static_cast
<
cl
::
Buffer
*>
(
epsilon
->
buffer
())));
_kernel
.
setArg
(
6
,
static_cast
<
int
>
(
sample_size
));
_kernel
.
setArg
(
7
,
*
(
static_cast
<
cl
::
Buffer
*>
(
output
->
buffer
())));
_kernel
.
setArg
(
8
,
32u
,
nullptr
);
_kernel
.
setArg
(
9
,
32u
,
nullptr
);
cl_int
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
_kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
n
,
channel
,
sample_size
),
cl
::
NDRange
(
1
,
1
,
128
));
MACE_CHECK
(
error
==
CL_SUCCESS
);
}
...
...
mace/kernels/opencl/cl/batch_norm.cl
浏览文件 @
e2ae6261
...
...
@@ -4,16 +4,28 @@ void kernel batch_norm(global const float *input,
global
const
float
*mean,
global
const
float
*var,
global
const
float
*epsilon,
private
const
int
channels,
private
const
int
pixels,
global
float
*output
)
{
int
idx
=
get_global_id
(
0
)
;
int
channel
=
(
idx
%
(
channels
*
pixels
))
/
pixels
;
global
float
*output,
__local
float
*new_scale,
__local
float
*new_offset
)
{
const
int
batch
=
get_global_id
(
0
)
;
const
int
channel
=
get_global_id
(
1
)
;
const
int
channels
=
get_global_size
(
1
)
;
const
int
pixel_offset
=
get_global_id
(
2
)
;
const
unsigned
int
local_channel
=
get_local_id
(
1
)
;
const
int
local_pixel_idx
=
get_local_id
(
2
)
;
const
float
*input_ptr
=
input
+
idx
;
const
float
new_scale
=
scale[channel]
*
rsqrt
(
var[channel]
+
*epsilon
)
;
const
float
new_offset
=
offset[channel]
-
mean[channel]
*
new_scale
;
float
*output_ptr
=
output
+
idx
;
*output_ptr
=
new_scale
*
*input_ptr
+
new_offset
;
if
(
local_pixel_idx
==
0
)
{
new_scale[local_channel]
=
scale[channel]
*
rsqrt
(
var[channel]
+
*epsilon
)
;
new_offset[local_channel]
=
offset[channel]
-
mean[channel]
*
new_scale[local_channel]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
const
int
sample_offset
=
(
batch
*
channels
+
channel
)
*
pixels
+
pixel_offset
;
const
float
*input_ptr
=
input
+
sample_offset
;
float
*output_ptr
=
output
+
sample_offset
;
*output_ptr
=
new_scale[local_channel]
*
*input_ptr
+
new_offset[local_channel]
;
}
mace/ops/BUILD
浏览文件 @
e2ae6261
...
...
@@ -17,6 +17,7 @@ cc_library(
],
deps
=
[
"//mace/core"
,
"//mace/core:opencl_runtime"
,
"@gtest//:gtest"
,
],
)
...
...
@@ -39,7 +40,6 @@ cc_library(
"-fopenmp"
,
],
deps
=
[
"//mace/core"
,
"//mace/kernels"
,
"//mace/proto:cc_proto"
,
],
...
...
@@ -72,7 +72,6 @@ cc_test(
deps
=
[
":ops"
,
":test"
,
"//mace/core"
,
"//mace/core:test_benchmark_main"
,
],
)
mace/ops/batch_norm_benchmark.cc
浏览文件 @
e2ae6261
...
...
@@ -34,11 +34,13 @@ static void BatchNorm(
// Warm-up
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
net
.
RunOp
(
D
);
net
.
Sync
();
}
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
net
.
RunOp
(
D
);
net
.
Sync
();
}
}
...
...
mace/ops/batch_norm_test.cc
浏览文件 @
e2ae6261
...
...
@@ -208,6 +208,7 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
// Run NEON
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
// Check
Tensor
expected
;
...
...
mace/ops/ops_test_util.h
浏览文件 @
e2ae6261
...
...
@@ -11,6 +11,7 @@
#include "mace/core/common.h"
#include "mace/core/net.h"
#include "mace/core/tensor.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
namespace
mace
{
...
...
@@ -152,6 +153,12 @@ class OpsTestNet {
return
ws_
.
GetTensor
(
output_name
);
}
void
Sync
()
{
if
(
net_
)
{
OpenCLRuntime
::
Get
()
->
command_queue
().
finish
();
}
}
public:
Workspace
ws_
;
OperatorDef
op_def_
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录