Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
f763da2b
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
f763da2b
编写于
7月 17, 2018
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Remove unused tensor of GPU.
上级
691331bd
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
125 addition
and
37 deletion
+125
-37
mace/core/runtime/opencl/opencl_runtime.cc
mace/core/runtime/opencl/opencl_runtime.cc
+6
-0
mace/core/runtime/opencl/opencl_runtime.h
mace/core/runtime/opencl/opencl_runtime.h
+1
-0
mace/core/runtime_failure_mock.cc
mace/core/runtime_failure_mock.cc
+5
-3
mace/core/tensor.h
mace/core/tensor.h
+10
-0
mace/core/workspace.cc
mace/core/workspace.cc
+89
-32
mace/core/workspace.h
mace/core/workspace.h
+2
-0
mace/kernels/opencl/buffer_to_image.cc
mace/kernels/opencl/buffer_to_image.cc
+3
-0
mace/libmace/mace.cc
mace/libmace/mace.cc
+4
-1
mace/public/mace_runtime.h
mace/public/mace_runtime.h
+5
-1
未找到文件。
mace/core/runtime/opencl/opencl_runtime.cc
浏览文件 @
f763da2b
...
...
@@ -675,6 +675,12 @@ uint64_t OpenCLRuntime::GetDeviceMaxWorkGroupSize() {
return
size
;
}
uint64_t
OpenCLRuntime
::
GetDeviceMaxMemAllocSize
()
{
uint64_t
size
=
0
;
device_
->
getInfo
(
CL_DEVICE_MAX_MEM_ALLOC_SIZE
,
&
size
);
return
size
;
}
uint64_t
OpenCLRuntime
::
GetKernelMaxWorkGroupSize
(
const
cl
::
Kernel
&
kernel
)
{
uint64_t
size
=
0
;
kernel
.
getWorkGroupInfo
(
*
device_
,
CL_KERNEL_WORK_GROUP_SIZE
,
&
size
);
...
...
mace/core/runtime/opencl/opencl_runtime.h
浏览文件 @
f763da2b
...
...
@@ -85,6 +85,7 @@ class OpenCLRuntime {
void
GetCallStats
(
const
cl
::
Event
&
event
,
CallStats
*
stats
);
uint64_t
GetDeviceMaxWorkGroupSize
();
uint64_t
GetDeviceMaxMemAllocSize
();
uint64_t
GetKernelMaxWorkGroupSize
(
const
cl
::
Kernel
&
kernel
);
uint64_t
GetKernelWaveSize
(
const
cl
::
Kernel
&
kernel
);
bool
IsNonUniformWorkgroupsSupported
()
const
;
...
...
mace/core/runtime_failure_mock.cc
浏览文件 @
f763da2b
...
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <
cstdlib
>
#include <
random
>
#include <string>
#include "mace/core/runtime_failure_mock.h"
...
...
@@ -35,10 +35,12 @@ inline float GetRuntimeFailureRatioFromEnv() {
}
// namespace
bool
ShouldMockRuntimeFailure
()
{
static
unsigned
int
seed
=
time
(
NULL
);
static
float
mock_runtime_failure_ratio
=
GetRuntimeFailureRatioFromEnv
();
if
(
mock_runtime_failure_ratio
>
1e-6
)
{
float
random_ratio
=
rand_r
(
&
seed
)
/
static_cast
<
float
>
(
RAND_MAX
);
std
::
random_device
rd
;
std
::
mt19937
gen
(
rd
());
std
::
uniform_real_distribution
<
float
>
dis
(
0.0
,
1.0
);
float
random_ratio
=
dis
(
gen
);
if
(
random_ratio
<
mock_runtime_failure_ratio
)
{
VLOG
(
0
)
<<
"Mock runtime failure."
;
return
true
;
...
...
mace/core/tensor.h
浏览文件 @
f763da2b
...
...
@@ -105,18 +105,21 @@ class Tensor {
dtype_
(
type
),
buffer_
(
nullptr
),
is_buffer_owner_
(
true
),
unused_
(
false
),
name_
(
""
)
{}
Tensor
(
BufferBase
*
buffer
,
DataType
dtype
)
:
dtype_
(
dtype
),
buffer_
(
buffer
),
is_buffer_owner_
(
false
),
unused_
(
false
),
name_
(
""
)
{}
Tensor
(
const
BufferSlice
&
buffer_slice
,
DataType
dtype
)
:
dtype_
(
dtype
),
buffer_slice_
(
buffer_slice
),
is_buffer_owner_
(
false
),
unused_
(
false
),
name_
(
""
)
{
buffer_
=
&
buffer_slice_
;
}
...
...
@@ -133,6 +136,8 @@ class Tensor {
inline
void
SetDtype
(
DataType
dtype
)
{
dtype_
=
dtype
;
}
inline
bool
unused
()
const
{
return
unused_
;
}
inline
const
std
::
vector
<
index_t
>
&
shape
()
const
{
return
shape_
;
}
inline
index_t
dim_size
()
const
{
return
shape_
.
size
();
}
...
...
@@ -195,6 +200,10 @@ class Tensor {
return
static_cast
<
T
*>
(
buffer_
->
raw_mutable_data
());
}
inline
void
MarkUnused
()
{
unused_
=
true
;
}
inline
void
Clear
()
{
MACE_CHECK_NOTNULL
(
buffer_
);
buffer_
->
Clear
(
raw_size
());
...
...
@@ -362,6 +371,7 @@ class Tensor {
BufferBase
*
buffer_
;
BufferSlice
buffer_slice_
;
bool
is_buffer_owner_
;
bool
unused_
;
std
::
string
name_
;
MACE_DISABLE_COPY_AND_ASSIGN
(
Tensor
);
...
...
mace/core/workspace.cc
浏览文件 @
f763da2b
...
...
@@ -18,6 +18,9 @@
#include <utility>
#include "mace/core/arg_helper.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/opencl_runtime.h"
#endif
#include "mace/core/workspace.h"
#include "mace/utils/timer.h"
...
...
@@ -85,6 +88,46 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def,
VLOG
(
3
)
<<
"Model data size: "
<<
model_data_size
;
if
(
model_data_size
>
0
)
{
#ifdef MACE_ENABLE_OPENCL
if
(
type
==
DeviceType
::
GPU
&&
OpenCLRuntime
::
Global
()
->
GetDeviceMaxMemAllocSize
()
<=
static_cast
<
uint64_t
>
(
model_data_size
))
{
for
(
auto
&
const_tensor
:
net_def
.
tensors
())
{
MACE_LATENCY_LOGGER
(
2
,
"Load tensor "
,
const_tensor
.
name
());
VLOG
(
3
)
<<
"Tensor name: "
<<
const_tensor
.
name
()
<<
", data type: "
<<
const_tensor
.
data_type
()
<<
", shape: "
<<
MakeString
(
std
::
vector
<
index_t
>
(
const_tensor
.
dims
().
begin
(),
const_tensor
.
dims
().
end
()));
std
::
vector
<
index_t
>
dims
;
for
(
const
index_t
d
:
const_tensor
.
dims
())
{
dims
.
push_back
(
d
);
}
std
::
unique_ptr
<
Tensor
>
tensor
(
new
Tensor
(
GetDeviceAllocator
(
type
),
const_tensor
.
data_type
()));
tensor
->
Resize
(
dims
);
MACE_CHECK
(
tensor
->
size
()
==
const_tensor
.
data_size
(),
"Tensor's data_size not equal with the shape"
);
MACE_CHECK
(
const_tensor
.
offset
()
+
tensor
->
raw_size
()
<=
model_data_size
,
"buffer offset + length ("
,
const_tensor
.
offset
(),
" + "
,
tensor
->
raw_size
(),
") should <= "
,
model_data_size
);
tensor
->
CopyBytes
(
model_data
+
const_tensor
.
offset
(),
const_tensor
.
data_size
()
*
GetEnumTypeSize
(
const_tensor
.
data_type
()));
tensor_map_
[
const_tensor
.
name
()]
=
std
::
move
(
tensor
);
}
}
else
{
#else
{
#endif
if
(
type
==
DeviceType
::
CPU
)
{
tensor_buffer_
=
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
type
),
...
...
@@ -99,8 +142,6 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def,
0
,
model_data_size
);
tensor_buffer_
->
UnMap
();
}
}
for
(
auto
&
const_tensor
:
net_def
.
tensors
())
{
MACE_LATENCY_LOGGER
(
2
,
"Load tensor "
,
const_tensor
.
name
());
VLOG
(
3
)
<<
"Tensor name: "
<<
const_tensor
.
name
()
...
...
@@ -113,7 +154,8 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def,
}
std
::
unique_ptr
<
Tensor
>
tensor
(
new
Tensor
(
BufferSlice
(
tensor_buffer_
.
get
(),
const_tensor
.
offset
(),
new
Tensor
(
BufferSlice
(
tensor_buffer_
.
get
(),
const_tensor
.
offset
(),
const_tensor
.
data_size
()
*
GetEnumTypeSize
(
const_tensor
.
data_type
())),
const_tensor
.
data_type
()));
...
...
@@ -121,6 +163,8 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def,
tensor
->
Reshape
(
dims
);
tensor_map_
[
const_tensor
.
name
()]
=
std
::
move
(
tensor
);
}
}
}
if
(
type
==
DeviceType
::
CPU
||
type
==
DeviceType
::
GPU
)
{
MaceStatus
status
=
CreateOutputTensorBuffer
(
net_def
,
type
);
...
...
@@ -232,4 +276,17 @@ ScratchBuffer *Workspace::GetScratchBuffer(DeviceType device_type) {
}
}
void
Workspace
::
RemoveUnusedBuffer
()
{
auto
iter
=
tensor_map_
.
begin
();
auto
end_iter
=
tensor_map_
.
end
();
while
(
iter
!=
end_iter
)
{
auto
old_iter
=
iter
++
;
if
(
old_iter
->
second
->
unused
())
{
tensor_map_
.
erase
(
old_iter
);
}
}
tensor_buffer_
.
reset
(
nullptr
);
}
}
// namespace mace
mace/core/workspace.h
浏览文件 @
f763da2b
...
...
@@ -53,6 +53,8 @@ class Workspace {
ScratchBuffer
*
GetScratchBuffer
(
DeviceType
device_type
);
void
RemoveUnusedBuffer
();
private:
MaceStatus
CreateOutputTensorBuffer
(
const
NetDef
&
net_def
,
DeviceType
device_type
);
...
...
mace/kernels/opencl/buffer_to_image.cc
浏览文件 @
f763da2b
...
...
@@ -180,6 +180,9 @@ MaceStatus BufferToImageFunctor<DeviceType::GPU, T>::operator()(
};
}
// Mark the buffer unused.
const_cast
<
Tensor
*>
(
buffer
)
->
MarkUnused
();
return
MACE_SUCCESS
;
}
...
...
mace/libmace/mace.cc
浏览文件 @
f763da2b
...
...
@@ -222,6 +222,9 @@ MaceStatus MaceEngine::Impl::Init(
#ifdef MACE_ENABLE_HEXAGON
}
#endif
if
(
device_type_
==
DeviceType
::
GPU
)
{
ws_
->
RemoveUnusedBuffer
();
}
return
MaceStatus
::
MACE_SUCCESS
;
}
...
...
@@ -240,7 +243,7 @@ MaceStatus MaceEngine::Impl::Init(
}
model_data_
=
LoadModelData
(
model_data_file
,
model_data_size_
);
Init
(
net_def
,
input_nodes
,
output_nodes
,
model_data_
);
MACE_RETURN_IF_ERROR
(
Init
(
net_def
,
input_nodes
,
output_nodes
,
model_data_
)
);
if
(
device_type_
==
DeviceType
::
GPU
||
device_type_
==
DeviceType
::
HEXAGON
)
{
UnloadModelData
(
model_data_
,
model_data_size_
);
...
...
mace/public/mace_runtime.h
浏览文件 @
f763da2b
...
...
@@ -66,6 +66,7 @@ class KVStorageFactory {
class
__attribute__
((
visibility
(
"default"
)))
FileStorageFactory
:
public
KVStorageFactory
{
public:
// You have to make sure your APP have read and write permission of the path.
explicit
FileStorageFactory
(
const
std
::
string
&
path
);
~
FileStorageFactory
();
...
...
@@ -77,7 +78,10 @@ class __attribute__((visibility("default"))) FileStorageFactory
std
::
unique_ptr
<
Impl
>
impl_
;
};
// Set KV store factory used as OpenCL cache. (Call Once)
// Set Key-Value store factory. (Call Once)
// Now KVStorage is used to store the built OpenCL binaries to file,
// which could speed up the GPU initialization and first run.
// If do not call this API, the initialization maybe slow for GPU.
__attribute__
((
visibility
(
"default"
)))
void
SetKVStorageFactory
(
std
::
shared_ptr
<
KVStorageFactory
>
storage_factory
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录