Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
91f13066
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
91f13066
编写于
3月 21, 2018
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'code-style' into 'master'
Fix opencl runtime code format. See merge request !317
上级
ce9b5e33
201983c6
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
18 addition
and
14 deletion
+18
-14
mace/core/runtime/opencl/opencl_runtime.cc
mace/core/runtime/opencl/opencl_runtime.cc
+7
-7
mace/core/runtime/opencl/opencl_runtime.h
mace/core/runtime/opencl/opencl_runtime.h
+3
-3
mace/kernels/opencl/fully_connected_opencl.cc
mace/kernels/opencl/fully_connected_opencl.cc
+4
-2
mace/kernels/opencl/helper.cc
mace/kernels/opencl/helper.cc
+4
-2
未找到文件。
mace/core/runtime/opencl/opencl_runtime.cc
浏览文件 @
91f13066
...
...
@@ -323,23 +323,23 @@ void OpenCLRuntime::GetCallStats(const cl::Event &event, CallStats *stats) {
}
}
uint
32
_t
OpenCLRuntime
::
GetDeviceMaxWorkGroupSize
()
{
uint
64
_t
OpenCLRuntime
::
GetDeviceMaxWorkGroupSize
()
{
uint64_t
size
=
0
;
device_
->
getInfo
(
CL_DEVICE_MAX_WORK_GROUP_SIZE
,
&
size
);
return
s
tatic_cast
<
uint32_t
>
(
size
)
;
return
s
ize
;
}
uint
32
_t
OpenCLRuntime
::
GetKernelMaxWorkGroupSize
(
const
cl
::
Kernel
&
kernel
)
{
uint
64
_t
OpenCLRuntime
::
GetKernelMaxWorkGroupSize
(
const
cl
::
Kernel
&
kernel
)
{
uint64_t
size
=
0
;
kernel
.
getWorkGroupInfo
(
*
device_
,
CL_KERNEL_WORK_GROUP_SIZE
,
&
size
);
return
s
tatic_cast
<
uint32_t
>
(
size
)
;
return
s
ize
;
}
// TODO(liuqi): not compatible with mali gpu.
uint
32
_t
OpenCLRuntime
::
GetKernelWaveSize
(
const
cl
::
Kernel
&
kernel
)
{
u
nsigned
long
long
size
=
0
;
uint
64
_t
OpenCLRuntime
::
GetKernelWaveSize
(
const
cl
::
Kernel
&
kernel
)
{
u
int64_t
size
=
0
;
kernel
.
getWorkGroupInfo
(
*
device_
,
CL_KERNEL_WAVE_SIZE_QCOM
,
&
size
);
return
s
tatic_cast
<
uint32_t
>
(
size
)
;
return
s
ize
;
}
}
// namespace mace
mace/core/runtime/opencl/opencl_runtime.h
浏览文件 @
91f13066
...
...
@@ -46,9 +46,9 @@ class OpenCLRuntime {
cl
::
CommandQueue
&
command_queue
();
void
GetCallStats
(
const
cl
::
Event
&
event
,
CallStats
*
stats
);
uint
32
_t
GetDeviceMaxWorkGroupSize
();
uint
32
_t
GetKernelMaxWorkGroupSize
(
const
cl
::
Kernel
&
kernel
);
uint
32
_t
GetKernelWaveSize
(
const
cl
::
Kernel
&
kernel
);
uint
64
_t
GetDeviceMaxWorkGroupSize
();
uint
64
_t
GetKernelMaxWorkGroupSize
(
const
cl
::
Kernel
&
kernel
);
uint
64
_t
GetKernelWaveSize
(
const
cl
::
Kernel
&
kernel
);
cl
::
Kernel
BuildKernel
(
const
std
::
string
&
program_name
,
const
std
::
string
&
kernel_name
,
const
std
::
set
<
std
::
string
>
&
build_options
);
...
...
mace/kernels/opencl/fully_connected_opencl.cc
浏览文件 @
91f13066
...
...
@@ -62,11 +62,13 @@ void FCWXKernel(cl::Kernel *kernel,
const
index_t
batch
=
output
->
dim
(
0
);
const
index_t
output_size
=
output
->
dim
(
3
);
const
index_t
output_blocks
=
RoundUpDiv4
(
output_size
);
const
uint32_t
wave_size
=
runtime
->
GetKernelWaveSize
(
*
kernel
);
const
uint32_t
wave_size
=
static_cast
<
uint32_t
>
(
runtime
->
GetKernelWaveSize
(
*
kernel
));
*
gws
=
{
4
,
(
wave_size
/
4
),
static_cast
<
uint32_t
>
(
batch
*
output_blocks
)};
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
*
kernel
);
const
uint32_t
kwg_size
=
static_cast
<
uint32_t
>
(
runtime
->
GetKernelMaxWorkGroupSize
(
*
kernel
));
const
uint32_t
inter_local_blks
=
kwg_size
/
((
*
gws
)[
0
]
*
(
*
gws
)[
1
]);
*
lws
=
{(
*
gws
)[
0
],
(
*
gws
)[
1
],
inter_local_blks
};
}
...
...
mace/kernels/opencl/helper.cc
浏览文件 @
91f13066
...
...
@@ -201,7 +201,8 @@ void TuningOrRun3DKernel(const cl::Kernel &kernel,
StatsFuture
*
future
)
{
auto
runtime
=
OpenCLRuntime
::
Global
();
auto
params_generator
=
[
&
]()
->
std
::
vector
<
std
::
vector
<
uint32_t
>>
{
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
kernel
);
const
uint32_t
kwg_size
=
static_cast
<
uint32_t
>
(
runtime
->
GetKernelMaxWorkGroupSize
(
kernel
));
std
::
vector
<
uint32_t
>
local_ws
(
3
,
0
);
local_ws
[
0
]
=
std
::
min
<
uint32_t
>
(
gws
[
0
],
kwg_size
);
local_ws
[
1
]
=
std
::
min
<
uint32_t
>
(
gws
[
1
],
kwg_size
/
local_ws
[
0
]);
...
...
@@ -304,7 +305,8 @@ void TuningOrRun2DKernel(const cl::Kernel &kernel,
StatsFuture
*
future
)
{
auto
runtime
=
OpenCLRuntime
::
Global
();
auto
params_generator
=
[
&
]()
->
std
::
vector
<
std
::
vector
<
uint32_t
>>
{
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
kernel
);
const
uint32_t
kwg_size
=
static_cast
<
uint32_t
>
(
runtime
->
GetKernelMaxWorkGroupSize
(
kernel
));
uint32_t
local_ws
[
2
];
local_ws
[
0
]
=
std
::
min
<
uint32_t
>
(
gws
[
0
],
kwg_size
);
local_ws
[
1
]
=
std
::
min
<
uint32_t
>
(
gws
[
1
],
kwg_size
/
local_ws
[
0
]);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录