Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
e54825c5
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
e54825c5
编写于
4月 10, 2018
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'concat-dead' into 'master'
Fix concat test bad performance. See merge request !358
上级
968fedc9
365e4fe3
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
109 addition
and
20 deletion
+109
-20
mace/core/runtime/opencl/opencl_runtime.cc
mace/core/runtime/opencl/opencl_runtime.cc
+6
-1
mace/core/runtime/opencl/opencl_runtime.h
mace/core/runtime/opencl/opencl_runtime.h
+2
-0
mace/core/runtime/opencl/opencl_wrapper.cc
mace/core/runtime/opencl/opencl_wrapper.cc
+22
-0
mace/kernels/opencl/concat.cc
mace/kernels/opencl/concat.cc
+35
-5
mace/kernels/opencl/slice.cc
mace/kernels/opencl/slice.cc
+35
-8
mace/ops/concat_test.cc
mace/ops/concat_test.cc
+9
-6
未找到文件。
mace/core/runtime/opencl/opencl_runtime.cc
浏览文件 @
e54825c5
...
...
@@ -236,7 +236,7 @@ void GetAdrenoContextProperties(std::vector<cl_context_properties> *properties,
OpenCLRuntime
::
OpenCLRuntime
(
GPUPerfHint
gpu_perf_hint
,
GPUPriorityHint
gpu_priority_hint
)
:
storage_
(
nullptr
)
{
storage_
(
nullptr
)
,
is_profiling_enabled_
(
false
)
{
LoadOpenCLLibrary
();
std
::
vector
<
cl
::
Platform
>
all_platforms
;
...
...
@@ -286,6 +286,7 @@ OpenCLRuntime::OpenCLRuntime(GPUPerfHint gpu_perf_hint,
if
(
Tuner
<
uint32_t
>::
Get
()
->
IsTuning
()
||
(
profiling
!=
nullptr
&&
strlen
(
profiling
)
==
1
&&
profiling
[
0
]
==
'1'
))
{
properties
|=
CL_QUEUE_PROFILING_ENABLE
;
is_profiling_enabled_
=
true
;
}
cl_int
err
;
...
...
@@ -590,4 +591,8 @@ const bool OpenCLRuntime::IsOutOfRangeCheckEnabled() const {
return
out_of_range_check_
;
}
const
bool
OpenCLRuntime
::
is_profiling_enabled
()
const
{
return
is_profiling_enabled_
;
}
}
// namespace mace
mace/core/runtime/opencl/opencl_runtime.h
浏览文件 @
e54825c5
...
...
@@ -77,6 +77,7 @@ class OpenCLRuntime {
const
GPUType
ParseGPUType
(
const
std
::
string
&
device_name
);
const
std
::
string
ParseDeviceVersion
(
const
std
::
string
&
device_version
);
void
SaveBuiltCLProgram
();
const
bool
is_profiling_enabled
()
const
;
private:
OpenCLRuntime
(
GPUPerfHint
,
GPUPriorityHint
);
...
...
@@ -116,6 +117,7 @@ class OpenCLRuntime {
std
::
string
platform_info_
;
bool
program_map_changed_
;
std
::
unique_ptr
<
KVStorage
>
storage_
;
bool
is_profiling_enabled_
;
static
GPUPerfHint
kGPUPerfHint
;
static
GPUPriorityHint
kGPUPriorityHint
;
...
...
mace/core/runtime/opencl/opencl_wrapper.cc
浏览文件 @
e54825c5
...
...
@@ -168,6 +168,11 @@ class OpenCLLibraryImpl final {
size_t
,
void
*
,
size_t
*
);
using
clGetEventInfoFunc
=
cl_int
(
*
)(
cl_event
event
,
cl_event_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
);
using
clGetEventProfilingInfoFunc
=
cl_int
(
*
)(
cl_event
event
,
cl_profiling_info
param_name
,
size_t
param_value_size
,
...
...
@@ -221,6 +226,7 @@ class OpenCLLibraryImpl final {
MACE_CL_DEFINE_FUNC_PTR
(
clReleaseDevice
);
MACE_CL_DEFINE_FUNC_PTR
(
clRetainEvent
);
MACE_CL_DEFINE_FUNC_PTR
(
clGetKernelWorkGroupInfo
);
MACE_CL_DEFINE_FUNC_PTR
(
clGetEventInfo
);
MACE_CL_DEFINE_FUNC_PTR
(
clGetEventProfilingInfo
);
MACE_CL_DEFINE_FUNC_PTR
(
clGetImageInfo
);
...
...
@@ -344,6 +350,7 @@ void *OpenCLLibraryImpl::LoadFromPath(const std::string &path) {
MACE_CL_ASSIGN_FROM_DLSYM
(
clReleaseDevice
);
MACE_CL_ASSIGN_FROM_DLSYM
(
clRetainEvent
);
MACE_CL_ASSIGN_FROM_DLSYM
(
clGetKernelWorkGroupInfo
);
MACE_CL_ASSIGN_FROM_DLSYM
(
clGetEventInfo
);
MACE_CL_ASSIGN_FROM_DLSYM
(
clGetEventProfilingInfo
);
MACE_CL_ASSIGN_FROM_DLSYM
(
clGetImageInfo
);
...
...
@@ -881,6 +888,21 @@ CL_API_ENTRY cl_int clReleaseEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0 {
return
func
(
event
);
}
// Event API
CL_API_ENTRY
cl_int
clGetEventInfo
(
cl_event
event
,
cl_event_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
)
CL_API_SUFFIX__VERSION_1_0
{
MACE_CHECK_NOTNULL
(
mace
::
openclLibraryImpl
);
auto
func
=
mace
::
openclLibraryImpl
->
clGetEventInfo
;
MACE_CHECK_NOTNULL
(
func
);
MACE_LATENCY_LOGGER
(
3
,
"clGetEventInfo"
);
return
func
(
event
,
param_name
,
param_value_size
,
param_value
,
param_value_size_ret
);
}
// Profiling APIs
CL_API_ENTRY
cl_int
clGetEventProfilingInfo
(
cl_event
event
,
cl_profiling_info
param_name
,
...
...
mace/kernels/opencl/concat.cc
浏览文件 @
e54825c5
...
...
@@ -137,6 +137,9 @@ static void ConcatN(cl::Kernel *kernel,
const
int
inputs_count
=
input_list
.
size
();
index_t
chan_blk_offset
=
0
;
cl
::
Event
event
;
CallStats
call_stats
{
INT64_MAX
,
0
};
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
*
kwg_size
/
64
,
8
,
1
};
for
(
int
i
=
0
;
i
<
inputs_count
;
++
i
)
{
const
Tensor
*
input
=
input_list
[
i
];
index_t
input_channel_blk
=
input
->
dim
(
3
)
/
4
;
...
...
@@ -160,18 +163,45 @@ static void ConcatN(cl::Kernel *kernel,
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
chan_blk_offset
+=
input_channel_blk
;
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
*
kwg_size
/
64
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
"concat_n_opencl_kernel_"
<<
input_channel_blk
<<
"_"
<<
width
<<
"_"
<<
batch
*
height
;
TuningOrRun3DKernel
(
*
kernel
,
ss
.
str
(),
gws
,
lws
,
future
);
cl_int
error
;
if
(
runtime
->
IsNonUniformWorkgroupsSupported
())
{
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
*
kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
gws
[
0
],
gws
[
1
],
gws
[
2
]),
cl
::
NDRange
(
lws
[
0
],
lws
[
1
],
lws
[
2
]),
nullptr
,
&
event
);
}
else
{
std
::
vector
<
uint32_t
>
roundup_gws
(
lws
.
size
());
for
(
size_t
j
=
0
;
j
<
3
;
++
j
)
{
roundup_gws
[
j
]
=
RoundUp
(
gws
[
j
],
lws
[
j
]);
}
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
*
kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
roundup_gws
[
0
],
roundup_gws
[
1
],
roundup_gws
[
2
]),
cl
::
NDRange
(
lws
[
0
],
lws
[
1
],
lws
[
2
]),
nullptr
,
&
event
);
}
MACE_CHECK_CL_SUCCESS
(
error
);
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
(
*
kernel_error
)
->
Map
(
nullptr
);
char
*
kerror_code
=
(
*
kernel_error
)
->
mutable_data
<
char
>
();
MACE_CHECK
(
*
kerror_code
==
0
)
<<
"Kernel error code: "
<<
*
kerror_code
;
(
*
kernel_error
)
->
UnMap
();
}
if
(
runtime
->
is_profiling_enabled
())
{
CallStats
tmp_stats
;
runtime
->
GetCallStats
(
event
,
&
tmp_stats
);
call_stats
.
start_micros
=
std
::
min
<
int64_t
>
(
tmp_stats
.
start_micros
,
call_stats
.
start_micros
);
call_stats
.
end_micros
+=
tmp_stats
.
end_micros
-
tmp_stats
.
start_micros
;
}
}
if
(
future
!=
nullptr
)
{
future
->
wait_fn
=
[
runtime
,
event
,
call_stats
](
CallStats
*
stats
)
{
event
.
wait
();
if
(
stats
!=
nullptr
)
{
stats
->
start_micros
=
call_stats
.
start_micros
;
stats
->
end_micros
=
stats
->
start_micros
+
call_stats
.
end_micros
;
}
};
}
}
...
...
mace/kernels/opencl/slice.cc
浏览文件 @
e54825c5
...
...
@@ -63,13 +63,8 @@ void SliceFunctor<DeviceType::OPENCL, T>::operator()(
};
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
kwg_size_
/
64
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
"slice_opencl_kernel_"
<<
input
->
dim
(
0
)
<<
"_"
<<
input
->
dim
(
1
)
<<
"_"
<<
input
->
dim
(
2
)
<<
"_"
<<
input_channels
<<
"_"
<<
outputs_count
;
cl
::
Event
event
;
CallStats
call_stats
{
INT64_MAX
,
0
};
for
(
int
i
=
0
;
i
<
outputs_count
;
++
i
)
{
uint32_t
idx
=
0
;
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
...
...
@@ -85,13 +80,45 @@ void SliceFunctor<DeviceType::OPENCL, T>::operator()(
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
channel_blk
*
i
));
kernel_
.
setArg
(
idx
++
,
*
(
output_list
[
i
]
->
opencl_image
()));
TuningOrRun3DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
cl_int
error
;
if
(
runtime
->
IsNonUniformWorkgroupsSupported
())
{
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
kernel_
,
cl
::
NullRange
,
cl
::
NDRange
(
gws
[
0
],
gws
[
1
],
gws
[
2
]),
cl
::
NDRange
(
lws
[
0
],
lws
[
1
],
lws
[
2
]),
nullptr
,
&
event
);
}
else
{
std
::
vector
<
uint32_t
>
roundup_gws
(
lws
.
size
());
for
(
size_t
j
=
0
;
j
<
3
;
++
j
)
{
roundup_gws
[
j
]
=
RoundUp
(
gws
[
j
],
lws
[
j
]);
}
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
kernel_
,
cl
::
NullRange
,
cl
::
NDRange
(
roundup_gws
[
0
],
roundup_gws
[
1
],
roundup_gws
[
2
]),
cl
::
NDRange
(
lws
[
0
],
lws
[
1
],
lws
[
2
]),
nullptr
,
&
event
);
}
MACE_CHECK_CL_SUCCESS
(
error
);
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
kernel_error_
->
Map
(
nullptr
);
char
*
kerror_code
=
kernel_error_
->
mutable_data
<
char
>
();
MACE_CHECK
(
*
kerror_code
==
0
)
<<
"Kernel error code: "
<<
*
kerror_code
;
kernel_error_
->
UnMap
();
}
if
(
runtime
->
is_profiling_enabled
())
{
CallStats
tmp_stats
;
runtime
->
GetCallStats
(
event
,
&
tmp_stats
);
call_stats
.
start_micros
=
std
::
min
<
int64_t
>
(
tmp_stats
.
start_micros
,
call_stats
.
start_micros
);
call_stats
.
end_micros
+=
tmp_stats
.
end_micros
-
tmp_stats
.
start_micros
;
}
}
if
(
future
!=
nullptr
)
{
future
->
wait_fn
=
[
runtime
,
event
,
call_stats
](
CallStats
*
stats
)
{
event
.
wait
();
if
(
stats
!=
nullptr
)
{
stats
->
start_micros
=
call_stats
.
start_micros
;
stats
->
end_micros
=
stats
->
start_micros
+
call_stats
.
end_micros
;
}
};
}
}
...
...
mace/ops/concat_test.cc
浏览文件 @
e54825c5
...
...
@@ -151,12 +151,17 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
int
num_inputs
=
shapes
.
size
();
int
concat_axis_size
=
0
;
// Construct graph
std
::
vector
<
std
::
vector
<
float
>>
inputs
(
num_inputs
,
std
::
vector
<
float
>
());
std
::
vector
<
const
float
*>
input_ptrs
(
num_inputs
);
OpsTestNet
net
;
for
(
int
i
=
0
;
i
<
num_inputs
;
++
i
)
{
const
std
::
string
input_name
=
MakeString
(
"Input"
,
i
);
const
std
::
string
image_name
=
MakeString
(
"InputImage"
,
i
);
concat_axis_size
+=
shapes
[
i
][
axis
];
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
input_name
,
shapes
[
i
]);
GenerateRandomRealTypeData
(
shapes
[
i
],
&
inputs
[
i
]);
input_ptrs
[
i
]
=
inputs
[
i
].
data
();
net
.
AddInputFromArray
<
DeviceType
::
OPENCL
,
float
>
(
input_name
,
shapes
[
i
],
inputs
[
i
]);
BufferToImage
<
DeviceType
::
OPENCL
,
T
>
(
&
net
,
input_name
,
image_name
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
}
...
...
@@ -186,17 +191,15 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
Tensor
::
MappingGuard
output_mapper
(
output
);
const
float
*
output_ptr
=
output
->
data
<
float
>
();
const
float
*
output_ptr_end
=
output_ptr
+
output
->
size
();
int
k
=
0
;
while
(
output_ptr
!=
(
output
->
data
<
float
>
()
+
output
->
size
())
)
{
while
(
output_ptr
!=
output_ptr_end
)
{
for
(
int
i
=
0
;
i
<
num_inputs
;
++
i
)
{
index_t
num_elements
=
std
::
accumulate
(
shapes
[
i
].
begin
()
+
axis
,
shapes
[
i
].
end
(),
1
,
std
::
multiplies
<
index_t
>
());
const
std
::
string
input_name
=
MakeString
(
"Input"
,
i
);
const
Tensor
*
input_tensor
=
net
.
GetTensor
(
input_name
.
data
());
Tensor
::
MappingGuard
input_guard
(
input_tensor
);
const
float
*
input_ptr
=
input_tensor
->
data
<
float
>
()
+
k
*
num_elements
;
const
float
*
input_ptr
=
input_ptrs
[
i
]
+
k
*
num_elements
;
for
(
int
j
=
0
;
j
<
num_elements
;
++
j
)
{
EXPECT_NEAR
(
*
(
input_ptr
+
j
),
*
output_ptr
++
,
1e-2
)
<<
"With index: "
<<
i
<<
", "
<<
j
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录