Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
77ea99f5
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
“0436bb8e70aa2408618a3ab934d7d0e1baddf3b6”上不存在“tools/python/micro/jinja2_files/micro_graph_data.h.jinja2”
提交
77ea99f5
编写于
11月 13, 2017
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add dynamic build opencl kernel logic.
上级
156c128d
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
96 addition
and
17 deletion
+96
-17
mace/core/runtime/opencl/opencl_runtime.cc
mace/core/runtime/opencl/opencl_runtime.cc
+64
-1
mace/core/runtime/opencl/opencl_runtime.h
mace/core/runtime/opencl/opencl_runtime.h
+15
-2
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+11
-9
mace/kernels/opencl/batch_norm_opencl.cc
mace/kernels/opencl/batch_norm_opencl.cc
+5
-5
mace/proto/mace.proto
mace/proto/mace.proto
+1
-0
未找到文件。
mace/core/runtime/opencl/opencl_runtime.cc
浏览文件 @
77ea99f5
...
@@ -83,6 +83,7 @@ bool BuildProgram(OpenCLRuntime *runtime,
...
@@ -83,6 +83,7 @@ bool BuildProgram(OpenCLRuntime *runtime,
}
// namespace
}
// namespace
OpenCLRuntime
*
OpenCLRuntime
::
Get
()
{
OpenCLRuntime
*
OpenCLRuntime
::
Get
()
{
static
std
::
once_flag
init_once
;
static
std
::
once_flag
init_once
;
static
OpenCLRuntime
*
instance
=
nullptr
;
static
OpenCLRuntime
*
instance
=
nullptr
;
...
@@ -140,7 +141,10 @@ OpenCLRuntime *OpenCLRuntime::Get() {
...
@@ -140,7 +141,10 @@ OpenCLRuntime *OpenCLRuntime::Get() {
OpenCLRuntime
::
OpenCLRuntime
(
cl
::
Context
context
,
OpenCLRuntime
::
OpenCLRuntime
(
cl
::
Context
context
,
cl
::
Device
device
,
cl
::
Device
device
,
cl
::
CommandQueue
command_queue
)
cl
::
CommandQueue
command_queue
)
:
context_
(
context
),
device_
(
device
),
command_queue_
(
command_queue
)
{}
:
context_
(
context
),
device_
(
device
),
command_queue_
(
command_queue
)
{
const
char
*
kernel_path
=
getenv
(
"MACE_KERNEL_PATH"
);
kernel_path_
=
std
::
string
(
kernel_path
==
nullptr
?
""
:
kernel_path
)
+
"/"
;
}
OpenCLRuntime
::~
OpenCLRuntime
()
{}
OpenCLRuntime
::~
OpenCLRuntime
()
{}
...
@@ -162,6 +166,65 @@ cl::Program &OpenCLRuntime::program() {
...
@@ -162,6 +166,65 @@ cl::Program &OpenCLRuntime::program() {
return
program_
;
return
program_
;
}
}
const
std
::
unodered_map
<
std
::
string
,
std
::
string
>
OpenCLRuntime
::
kernel_program_map_
=
{
{
"BatchNorm"
,
"batch_norm.cl"
}
};
bool
OpenCLRuntime
::
BuildProgram
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
build_options
,
cl
::
Program
*
program
)
{
MACE_CHECK_NOTNULL
(
program
);
cl
::
Program
::
Sources
sources
;
std
::
string
filename
=
kernel_path_
+
kernel_name
;
std
::
string
kernel_source
;
MACE_CHECK
(
ReadSourceFile
(
filename
,
&
kernel_source
));
sources
.
push_back
({
kernel_source
.
c_str
(),
kernel_source
.
length
()});
*
program
=
cl
::
Program
(
this
->
context
(),
sources
);
build_options
+=
" -Werror -cl-mad-enable -cl-fast-relaxed-math -I"
+
path
;
// TODO(heliangliang) -cl-unsafe-math-optimizations -cl-fast-relaxed-math
cl_int
ret
=
program
->
build
({
runtime
->
device
()},
build_options
.
c_str
());
if
(
ret
!=
CL_SUCCESS
)
{
if
(
program
->
getBuildInfo
<
CL_PROGRAM_BUILD_STATUS
>
(
runtime
->
device
())
==
CL_BUILD_ERROR
)
{
std
::
string
build_log
=
program
->
getBuildInfo
<
CL_PROGRAM_BUILD_LOG
>
(
runtime
->
device
());
LOG
(
INFO
)
<<
"Program build log: "
<<
build_log
;
}
LOG
(
FATAL
)
<<
"Build program failed: "
<<
ret
;
}
return
true
;
}
cl
::
Kernel
OpenCLRuntime
::
BuildKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
set
<
std
::
string
>
&
build_options
)
{
auto
kernel_program_it
=
kernel_program_map_
.
find
(
kernel_name
);
if
(
kernel_program_it
==
kernel_program_map_
.
end
())
{
MACE_CHECK
(
false
,
kernel_name
,
" opencl kernel doesn't exist."
);
}
std
::
string
program_name
=
kernel_program_it
->
second
;
std
::
string
build_options_str
;
for
(
auto
&
option
:
build_options
)
{
build_options_str
+=
" "
+
option
;
}
std
::
string
built_program_key
=
program_name
+
build_options_str
;
auto
built_program_it
=
built_program_map_
.
find
(
built_program_key
);
cl
::
Program
program
;
if
(
built_program_it
!=
built_program_map_
.
end
())
{
program
=
built_program_it
->
second
;
}
else
{
this
->
BuildProgram
(
kernel_name
,
build_options_str
,
&
program
);
built_program_map_
.
emplace
(
built_program_key
,
std
::
move
(
program
));
}
return
cl
::
Kernel
(
kernel_name
,
program
);
}
uint32_t
OpenCLRuntime
::
GetDeviceMaxWorkGroupSize
()
{
uint32_t
OpenCLRuntime
::
GetDeviceMaxWorkGroupSize
()
{
unsigned
long
long
size
=
0
;
unsigned
long
long
size
=
0
;
device_
.
getInfo
(
CL_DEVICE_MAX_WORK_GROUP_SIZE
,
&
size
);
device_
.
getInfo
(
CL_DEVICE_MAX_WORK_GROUP_SIZE
,
&
size
);
...
...
mace/core/runtime/opencl/opencl_runtime.h
浏览文件 @
77ea99f5
...
@@ -7,6 +7,7 @@
...
@@ -7,6 +7,7 @@
#include <map>
#include <map>
#include <mutex>
#include <mutex>
#include <unordered_map>
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_wrapper.h"
#include "mace/core/runtime/opencl/opencl_wrapper.h"
...
@@ -17,12 +18,15 @@ class OpenCLRuntime {
...
@@ -17,12 +18,15 @@ class OpenCLRuntime {
public:
public:
static
OpenCLRuntime
*
Get
();
static
OpenCLRuntime
*
Get
();
uint32_t
GetDeviceMaxWorkGroupSize
();
uint32_t
GetKernelMaxWorkGroupSize
(
const
cl
::
Kernel
&
kernel
);
cl
::
Context
&
context
();
cl
::
Context
&
context
();
cl
::
Device
&
device
();
cl
::
Device
&
device
();
cl
::
CommandQueue
&
command_queue
();
cl
::
CommandQueue
&
command_queue
();
cl
::
Program
&
program
();
cl
::
Program
&
program
();
uint32_t
GetDeviceMaxWorkGroupSize
();
uint32_t
GetKernelMaxWorkGroupSize
(
const
cl
::
Kernel
&
kernel
);
cl
::
Kernel
BuildKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
set
<
std
::
string
>
&
build_options
);
private:
private:
OpenCLRuntime
(
cl
::
Context
context
,
OpenCLRuntime
(
cl
::
Context
context
,
cl
::
Device
device
,
cl
::
Device
device
,
...
@@ -31,12 +35,21 @@ class OpenCLRuntime {
...
@@ -31,12 +35,21 @@ class OpenCLRuntime {
OpenCLRuntime
(
const
OpenCLRuntime
&
)
=
delete
;
OpenCLRuntime
(
const
OpenCLRuntime
&
)
=
delete
;
OpenCLRuntime
&
operator
=
(
const
OpenCLRuntime
&
)
=
delete
;
OpenCLRuntime
&
operator
=
(
const
OpenCLRuntime
&
)
=
delete
;
bool
BuildProgram
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
build_options
,
cl
::
Program
*
program
);
private:
private:
cl
::
Context
context_
;
cl
::
Context
context_
;
cl
::
Device
device_
;
cl
::
Device
device_
;
cl
::
CommandQueue
command_queue_
;
cl
::
CommandQueue
command_queue_
;
cl
::
Program
program_
;
cl
::
Program
program_
;
std
::
once_flag
build_flag_
;
std
::
once_flag
build_flag_
;
std
::
string
kernel_path_
;
static
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
kernel_program_map_
;
mutable
std
::
unordered_map
<
std
::
string
,
cl
::
Program
>
built_program_map_
;
};
};
}
// namespace mace
}
// namespace mace
...
...
mace/kernels/batch_norm.h
浏览文件 @
77ea99f5
...
@@ -76,15 +76,17 @@ void BatchNormFunctor<DeviceType::NEON, float>::operator()(
...
@@ -76,15 +76,17 @@ void BatchNormFunctor<DeviceType::NEON, float>::operator()(
const
Tensor
*
epsilon
,
const
Tensor
*
epsilon
,
Tensor
*
output
);
Tensor
*
output
);
template
<
>
template
<
typename
T
>
void
BatchNormFunctor
<
DeviceType
::
OPENCL
,
float
>::
operator
()(
struct
BatchNormFunctor
<
DeviceType
::
OPENCL
,
T
>
{
const
Tensor
*
input
,
void
operator
()(
const
Tensor
*
scale
,
const
Tensor
*
input
,
const
Tensor
*
offset
,
const
Tensor
*
scale
,
const
Tensor
*
mean
,
const
Tensor
*
offset
,
const
Tensor
*
var
,
const
Tensor
*
mean
,
const
Tensor
*
epsilon
,
const
Tensor
*
var
,
Tensor
*
output
);
const
Tensor
*
epsilon
,
Tensor
*
output
);
};
}
// namepsace kernels
}
// namepsace kernels
}
// namespace mace
}
// namespace mace
...
...
mace/kernels/opencl/batch_norm_opencl.cc
浏览文件 @
77ea99f5
...
@@ -10,8 +10,8 @@
...
@@ -10,8 +10,8 @@
namespace
mace
{
namespace
mace
{
namespace
kernels
{
namespace
kernels
{
template
<
>
template
<
typename
T
>
void
BatchNormFunctor
<
DeviceType
::
OPENCL
,
float
>::
operator
()(
void
BatchNormFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input
,
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
const
Tensor
*
offset
,
...
@@ -27,10 +27,10 @@ void BatchNormFunctor<DeviceType::OPENCL, float>::operator()(
...
@@ -27,10 +27,10 @@ void BatchNormFunctor<DeviceType::OPENCL, float>::operator()(
static_cast
<
uint32_t
>
(
input
->
dim
(
1
)),
static_cast
<
uint32_t
>
(
input
->
dim
(
1
)),
static_cast
<
uint32_t
>
(
blocks
)};
static_cast
<
uint32_t
>
(
blocks
)};
auto
runtime
=
OpenCLRuntime
::
Get
();
auto
runtime
=
OpenCLRuntime
::
Get
();
auto
program
=
runtime
->
program
();
std
::
set
<
std
::
string
>
built_options
;
auto
bm_kernel
=
cl
::
Kernel
(
program
,
"batch_norm"
);
built_options
.
emplace
(
"-DDataType="
+
GetDataTypeFromEnum
(
input
->
dtype
()));
auto
bm_kernel
=
runtime
->
CreateKernel
(
"batch_norm"
);
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
bm_kernel
);
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
bm_kernel
);
const
std
::
vector
<
uint32_t
>
lws
=
{
1
,
1
,
kwg_size
};
const
std
::
vector
<
uint32_t
>
lws
=
{
1
,
1
,
kwg_size
};
...
...
mace/proto/mace.proto
浏览文件 @
77ea99f5
...
@@ -23,6 +23,7 @@ enum DataType {
...
@@ -23,6 +23,7 @@ enum DataType {
DT_INT64
=
8
;
DT_INT64
=
8
;
DT_UINT16
=
9
;
DT_UINT16
=
9
;
DT_BOOL
=
10
;
DT_BOOL
=
10
;
DT_HALF
=
19
;
}
}
message
TensorProto
{
message
TensorProto
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录