Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
ed99a2d1
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
ed99a2d1
编写于
10月 30, 2017
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Change the time statistic strategy for tunner.
上级
1bbf62ee
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
43 addition
and
64 deletion
+43
-64
mace/kernels/opencl/batch_norm_opencl.cc
mace/kernels/opencl/batch_norm_opencl.cc
+18
-35
mace/utils/tuner.h
mace/utils/tuner.h
+24
-29
mace/utils/tuner_test.cc
mace/utils/tuner_test.cc
+1
-0
未找到文件。
mace/kernels/opencl/batch_norm_opencl.cc
浏览文件 @
ed99a2d1
...
...
@@ -44,42 +44,25 @@ void BatchNormFunctor<DeviceType::OPENCL, float>::operator()(
bm_kernel
.
setArg
(
idx
++
,
lws
[
1
]
*
sizeof
(
float
),
nullptr
);
bm_kernel
.
setArg
(
idx
++
,
lws
[
1
]
*
sizeof
(
float
),
nullptr
);
std
::
function
<
std
::
vector
<
std
::
vector
<
uint32_t
>>
()
>
params_generator
=
nullptr
;
std
::
function
<
cl_int
(
const
std
::
vector
<
uint32_t
>&
params
)
>
func
;
if
(
Tuning
())
{
params_generator
=
[
&
kwg_size
]()
->
std
::
vector
<
std
::
vector
<
uint32_t
>>
{
return
{{
1
,
1
,
64
},
{
1
,
1
,
128
},
{
1
,
kwg_size
/
32
,
32
},
{
1
,
kwg_size
/
64
,
64
},
{
1
,
kwg_size
/
128
,
128
},
{
1
,
1
,
kwg_size
},
{
1
,
kwg_size
,
1
}};
};
func
=
[
&
](
const
std
::
vector
<
uint32_t
>&
params
)
->
cl_int
{
cl
::
Event
event
;
cl_int
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
bm_kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
gws
[
0
],
gws
[
1
],
gws
[
2
]),
cl
::
NDRange
(
params
[
0
],
params
[
1
],
params
[
2
]),
nullptr
,
&
event
);
auto
params_generator
=
[
&
kwg_size
]()
->
std
::
vector
<
std
::
vector
<
uint32_t
>>
{
return
{{
1
,
1
,
64
},
{
1
,
1
,
128
},
{
1
,
kwg_size
/
16
,
16
},
{
1
,
kwg_size
/
32
,
32
},
{
1
,
kwg_size
/
64
,
64
},
{
1
,
kwg_size
/
128
,
128
},
{
1
,
1
,
kwg_size
},
{
1
,
kwg_size
,
1
}};
};
auto
func
=
[
&
](
const
std
::
vector
<
uint32_t
>&
params
)
->
cl_int
{
cl_int
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
bm_kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
gws
[
0
],
gws
[
1
],
gws
[
2
]),
cl
::
NDRange
(
params
[
0
],
params
[
1
],
params
[
2
]));
MACE_CHECK
(
error
==
CL_SUCCESS
);
event
.
wait
();
return
error
;
};
}
else
{
func
=
[
&
](
const
std
::
vector
<
uint32_t
>&
params
)
->
cl_int
{
cl_int
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
bm_kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
gws
[
0
],
gws
[
1
],
gws
[
2
]),
cl
::
NDRange
(
params
[
0
],
params
[
1
],
params
[
2
]));
MACE_CHECK
(
error
==
CL_SUCCESS
);
return
error
;
};
}
MACE_CHECK
(
error
==
CL_SUCCESS
);
return
error
;
};
std
::
stringstream
ss
;
ss
<<
"batch_norm_opencl_kernel_"
<<
input
->
dim
(
0
)
<<
"_"
...
...
mace/utils/tuner.h
浏览文件 @
ed99a2d1
...
...
@@ -10,18 +10,14 @@
#include <string>
#include <unordered_map>
#include <fstream>
#include <thread>
#include <limits>
#include "mace/core/logging.h"
#include "mace/utils/utils.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
namespace
mace
{
bool
Tuning
()
{
const
char
*
tuning
=
getenv
(
"MACE_TUNING"
);
return
tuning
!=
nullptr
&&
tuning
[
0
]
==
'1'
;
}
template
<
typename
param_type
>
class
Tuner
{
...
...
@@ -34,22 +30,22 @@ class Tuner {
template
<
typename
RetType
>
RetType
TuneOrRun
(
const
std
::
string
param_key
,
const
std
::
vector
<
param_type
>
&
default_param
,
const
std
::
function
<
std
::
vector
<
std
::
vector
<
param_type
>>
()
>
param_generator
,
const
std
::
function
<
RetType
(
const
std
::
vector
<
param_type
>
&
)
>
&
func
)
{
const
std
::
function
<
std
::
vector
<
std
::
vector
<
param_type
>>
()
>
&
param_generator
,
const
std
::
function
<
RetType
(
const
std
::
vector
<
param_type
>
&
)
>
&
func
)
{
if
(
param_generator
==
nullptr
)
{
if
(
IsTuning
())
{
// tune
std
::
vector
<
param_type
>
opt_param
=
default_param
;
RetType
res
=
Tune
<
RetType
>
(
param_generator
,
func
,
opt_param
);
param_table_
[
param_key
]
=
opt_param
;
return
res
;
}
else
{
// run
if
(
param_table_
.
find
(
param_key
)
!=
param_table_
.
end
())
{
return
func
(
param_table_
[
param_key
]);
}
else
{
return
func
(
default_param
);
}
}
else
{
// tune
std
::
vector
<
param_type
>
opt_param
=
default_param
;
RetType
res
=
Tune
<
RetType
>
(
param_generator
,
func
,
opt_param
);
param_table_
[
param_key
]
=
opt_param
;
return
res
;
}
}
...
...
@@ -66,6 +62,11 @@ class Tuner {
Tuner
(
const
Tuner
&
)
=
delete
;
Tuner
&
operator
=
(
const
Tuner
&
)
=
delete
;
inline
bool
IsTuning
()
{
const
char
*
tuning
=
getenv
(
"MACE_TUNING"
);
return
tuning
!=
nullptr
&&
strlen
(
tuning
)
==
1
&&
tuning
[
0
]
==
'1'
;
}
inline
void
WriteRunParameters
()
{
VLOG
(
0
)
<<
path_
;
if
(
path_
!=
nullptr
)
{
...
...
@@ -127,24 +128,18 @@ class Tuner {
inline
RetType
Run
(
const
std
::
function
<
RetType
(
const
std
::
vector
<
param_type
>
&
)
>
&
func
,
const
std
::
vector
<
param_type
>
&
params
,
int
num_runs
,
int64_t
sleep_millisecond
,
double
&
time_us
)
{
RetType
res
;
int64_t
total_time_us
=
0
;
int64_t
actual_num_runs
=
0
;
bool
util_max_time
=
(
num_runs
<=
0
);
for
(
int
i
=
0
;
util_max_time
||
i
<
num_runs
;
++
i
)
{
const
int64_t
start_time
=
NowInMicroSec
();
const
int64_t
start_time
=
NowInMicroSec
();
for
(
int
i
=
0
;
i
<
num_runs
;
++
i
)
{
res
=
func
(
params
);
const
int64_t
end_time
=
NowInMicroSec
();
total_time_us
+=
end_time
-
start_time
;
++
(
actual_num_runs
);
if
(
sleep_millisecond
>
0
)
{
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
sleep_millisecond
));
}
}
time_us
=
total_time_us
*
1.0
/
actual_num_runs
;
OpenCLRuntime
::
Get
()
->
command_queue
().
finish
();
const
int64_t
end_time
=
NowInMicroSec
();
total_time_us
+=
end_time
-
start_time
;
time_us
=
total_time_us
*
1.0
/
num_runs
;
return
res
;
}
...
...
@@ -158,10 +153,10 @@ class Tuner {
for
(
const
auto
&
param
:
params
)
{
double
tmp_time
=
0.0
;
// warm up
Run
<
RetType
>
(
func
,
param
,
2
,
10
,
tmp_time
);
Run
<
RetType
>
(
func
,
param
,
2
,
tmp_time
);
// run
RetType
tmp_res
=
Run
<
RetType
>
(
func
,
param
,
10
,
10
,
tmp_time
);
RetType
tmp_res
=
Run
<
RetType
>
(
func
,
param
,
10
,
tmp_time
);
// Check the execution time
if
(
tmp_time
<
opt_time
)
{
...
...
mace/utils/tuner_test.cc
浏览文件 @
ed99a2d1
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <thread>
#include "gtest/gtest.h"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录