Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
bcbcbf7a
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
bcbcbf7a
编写于
3月 05, 2018
作者:
李
李寅
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refactor tensor, accelerate gpu initialization time
上级
d548b461
变更
24
显示空白变更内容
内联
并排
Showing
24 changed file
with
623 addition
and
526 deletion
+623
-526
mace/core/allocator.h
mace/core/allocator.h
+18
-16
mace/core/buffer.h
mace/core/buffer.h
+356
-0
mace/core/mace.cc
mace/core/mace.cc
+0
-1
mace/core/net.cc
mace/core/net.cc
+4
-4
mace/core/net.h
mace/core/net.h
+3
-3
mace/core/preallocated_pooled_allocator.h
mace/core/preallocated_pooled_allocator.h
+17
-7
mace/core/runtime/hexagon/hexagon_control_wrapper.h
mace/core/runtime/hexagon/hexagon_control_wrapper.h
+0
-3
mace/core/runtime/opencl/opencl_allocator.cc
mace/core/runtime/opencl/opencl_allocator.cc
+12
-12
mace/core/runtime/opencl/opencl_allocator.h
mace/core/runtime/opencl/opencl_allocator.h
+8
-8
mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.cc
...re/runtime/opencl/opencl_preallocated_pooled_allocator.cc
+0
-30
mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h
...ore/runtime/opencl/opencl_preallocated_pooled_allocator.h
+0
-45
mace/core/serializer.cc
mace/core/serializer.cc
+0
-82
mace/core/serializer.h
mace/core/serializer.h
+0
-29
mace/core/tensor.h
mace/core/tensor.h
+93
-199
mace/core/workspace.cc
mace/core/workspace.cc
+64
-48
mace/core/workspace.h
mace/core/workspace.h
+11
-12
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+1
-1
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+1
-1
mace/kernels/depthwise_conv2d.h
mace/kernels/depthwise_conv2d.h
+1
-1
mace/kernels/fully_connected.h
mace/kernels/fully_connected.h
+1
-1
mace/kernels/opencl/buffer_to_image.cc
mace/kernels/opencl/buffer_to_image.cc
+7
-4
mace/kernels/opencl/cl/buffer_to_image.cl
mace/kernels/opencl/cl/buffer_to_image.cl
+19
-10
mace/kernels/opencl/conv_2d_opencl.cc
mace/kernels/opencl/conv_2d_opencl.cc
+1
-2
mace/kernels/opencl/resize_bilinear_opencl.cc
mace/kernels/opencl/resize_bilinear_opencl.cc
+6
-7
未找到文件。
mace/core/allocator.h
浏览文件 @
bcbcbf7a
...
...
@@ -26,17 +26,17 @@ class Allocator {
public:
Allocator
()
{}
virtual
~
Allocator
()
noexcept
{}
virtual
void
*
New
(
size_t
nbytes
)
=
0
;
virtual
void
*
New
(
size_t
nbytes
)
const
=
0
;
virtual
void
*
NewImage
(
const
std
::
vector
<
size_t
>
&
image_shape
,
const
DataType
dt
)
=
0
;
virtual
void
Delete
(
void
*
data
)
=
0
;
virtual
void
DeleteImage
(
void
*
data
)
=
0
;
virtual
void
*
Map
(
void
*
buffer
,
size_t
nbytes
)
=
0
;
const
DataType
dt
)
const
=
0
;
virtual
void
Delete
(
void
*
data
)
const
=
0
;
virtual
void
DeleteImage
(
void
*
data
)
const
=
0
;
virtual
void
*
Map
(
void
*
buffer
,
size_t
offset
,
size_t
nbytes
)
const
=
0
;
virtual
void
*
MapImage
(
void
*
buffer
,
const
std
::
vector
<
size_t
>
&
image_shape
,
std
::
vector
<
size_t
>
&
mapped_image_pitch
)
=
0
;
virtual
void
Unmap
(
void
*
buffer
,
void
*
mapper_ptr
)
=
0
;
virtual
bool
OnHost
()
=
0
;
std
::
vector
<
size_t
>
*
mapped_image_pitch
)
const
=
0
;
virtual
void
Unmap
(
void
*
buffer
,
void
*
mapper_ptr
)
const
=
0
;
virtual
bool
OnHost
()
const
=
0
;
template
<
typename
T
>
T
*
New
(
size_t
num_elements
)
{
...
...
@@ -52,7 +52,7 @@ class Allocator {
class
CPUAllocator
:
public
Allocator
{
public:
~
CPUAllocator
()
override
{}
void
*
New
(
size_t
nbytes
)
override
{
void
*
New
(
size_t
nbytes
)
const
override
{
VLOG
(
3
)
<<
"Allocate CPU buffer: "
<<
nbytes
;
void
*
data
=
nullptr
;
#ifdef __ANDROID__
...
...
@@ -67,27 +67,29 @@ class CPUAllocator : public Allocator {
}
void
*
NewImage
(
const
std
::
vector
<
size_t
>
&
shape
,
const
DataType
dt
)
override
{
const
DataType
dt
)
const
override
{
LOG
(
FATAL
)
<<
"Allocate CPU image"
;
return
nullptr
;
}
void
Delete
(
void
*
data
)
override
{
void
Delete
(
void
*
data
)
const
override
{
VLOG
(
3
)
<<
"Free CPU buffer"
;
free
(
data
);
}
void
DeleteImage
(
void
*
data
)
override
{
void
DeleteImage
(
void
*
data
)
const
override
{
LOG
(
FATAL
)
<<
"Free CPU image"
;
free
(
data
);
};
void
*
Map
(
void
*
buffer
,
size_t
nbytes
)
override
{
return
buffer
;
}
void
*
Map
(
void
*
buffer
,
size_t
offset
,
size_t
nbytes
)
const
override
{
return
(
char
*
)
buffer
+
offset
;
}
void
*
MapImage
(
void
*
buffer
,
const
std
::
vector
<
size_t
>
&
image_shape
,
std
::
vector
<
size_t
>
&
mapped_image_pitch
)
override
{
std
::
vector
<
size_t
>
*
mapped_image_pitch
)
const
override
{
return
buffer
;
}
void
Unmap
(
void
*
buffer
,
void
*
mapper_ptr
)
override
{}
bool
OnHost
()
override
{
return
true
;
}
void
Unmap
(
void
*
buffer
,
void
*
mapper_ptr
)
const
override
{}
bool
OnHost
()
const
override
{
return
true
;
}
};
std
::
map
<
int32_t
,
Allocator
*>
*
gAllocatorRegistry
();
...
...
mace/core/buffer.h
0 → 100644
浏览文件 @
bcbcbf7a
//
// Copyright (c) 2018 XiaoMi All rights reserved.
//
#ifndef MACE_CORE_BUFFER_H_
#define MACE_CORE_BUFFER_H_
#include "mace/core/types.h"
#include "mace/core/allocator.h"
#include <vector>
namespace
mace
{
class
BufferBase
{
public:
BufferBase
()
:
size_
(
0
)
{}
BufferBase
(
index_t
size
)
:
size_
(
size
)
{}
virtual
~
BufferBase
()
{}
virtual
void
*
buffer
()
=
0
;
virtual
const
void
*
raw_data
()
const
=
0
;
virtual
void
*
raw_mutable_data
()
=
0
;
virtual
void
*
Map
(
index_t
offset
,
index_t
length
,
std
::
vector
<
size_t
>
*
pitch
)
const
=
0
;
virtual
void
UnMap
(
void
*
mapped_ptr
)
const
=
0
;
virtual
void
Map
(
std
::
vector
<
size_t
>
*
pitch
)
=
0
;
virtual
void
UnMap
()
=
0
;
virtual
void
Resize
(
index_t
size
)
=
0
;
virtual
void
Copy
(
void
*
src
,
index_t
offset
,
index_t
length
)
=
0
;
virtual
bool
OnHost
()
const
=
0
;
virtual
index_t
offset
()
const
{
return
0
;
};
template
<
typename
T
>
const
T
*
data
()
const
{
return
reinterpret_cast
<
const
T
*>
(
raw_data
());
}
template
<
typename
T
>
T
*
mutable_data
()
{
return
reinterpret_cast
<
T
*>
(
raw_mutable_data
());
}
index_t
size
()
const
{
return
size_
;
}
protected:
index_t
size_
;
};
class
Buffer
:
public
BufferBase
{
public:
Buffer
(
Allocator
*
allocator
)
:
BufferBase
(
0
),
allocator_
(
allocator
),
buf_
(
nullptr
),
mapped_buf_
(
nullptr
),
is_data_owner_
(
true
)
{}
Buffer
(
Allocator
*
allocator
,
index_t
size
)
:
BufferBase
(
size
),
allocator_
(
allocator
),
mapped_buf_
(
nullptr
),
is_data_owner_
(
true
)
{
buf_
=
allocator
->
New
(
size
);
}
Buffer
(
Allocator
*
allocator
,
void
*
data
,
index_t
size
)
:
BufferBase
(
size
),
allocator_
(
allocator
),
buf_
(
data
),
mapped_buf_
(
nullptr
),
is_data_owner_
(
false
)
{}
virtual
~
Buffer
()
{
if
(
mapped_buf_
!=
nullptr
)
{
UnMap
();
}
if
(
is_data_owner_
&&
buf_
!=
nullptr
)
{
allocator_
->
Delete
(
buf_
);
}
}
void
*
buffer
()
{
MACE_CHECK_NOTNULL
(
buf_
);
return
buf_
;
};
const
void
*
raw_data
()
const
{
if
(
OnHost
())
{
MACE_CHECK_NOTNULL
(
buf_
);
return
buf_
;
}
else
{
MACE_CHECK_NOTNULL
(
mapped_buf_
);
return
mapped_buf_
;
}
}
void
*
raw_mutable_data
()
{
if
(
OnHost
())
{
MACE_CHECK_NOTNULL
(
buf_
);
return
buf_
;
}
else
{
MACE_CHECK_NOTNULL
(
mapped_buf_
);
return
mapped_buf_
;
}
}
void
*
Map
(
index_t
offset
,
index_t
length
,
std
::
vector
<
size_t
>
*
pitch
)
const
{
MACE_CHECK_NOTNULL
(
buf_
);
return
allocator_
->
Map
(
buf_
,
offset
,
length
);
}
void
UnMap
(
void
*
mapped_ptr
)
const
{
MACE_CHECK_NOTNULL
(
buf_
);
MACE_CHECK_NOTNULL
(
mapped_ptr
);
allocator_
->
Unmap
(
buf_
,
mapped_ptr
);
}
void
Map
(
std
::
vector
<
size_t
>
*
pitch
)
{
MACE_CHECK
(
mapped_buf_
==
nullptr
,
"buf has been already mapped"
);
mapped_buf_
=
Map
(
0
,
size_
,
pitch
);
};
void
UnMap
()
{
UnMap
(
mapped_buf_
);
mapped_buf_
=
nullptr
;
}
void
Resize
(
index_t
size
)
{
MACE_CHECK
(
is_data_owner_
,
"data is not owned by this buffer, cannot resize"
);
if
(
size
!=
size_
)
{
if
(
buf_
!=
nullptr
)
{
allocator_
->
Delete
(
buf_
);
}
size_
=
size
;
buf_
=
allocator_
->
New
(
size
);
}
}
void
Copy
(
void
*
src
,
index_t
offset
,
index_t
length
)
{
MACE_CHECK_NOTNULL
(
mapped_buf_
);
MACE_CHECK
(
length
<=
size_
,
"out of buffer"
);
memcpy
(
mapped_buf_
,
(
char
*
)
src
+
offset
,
length
);
}
bool
OnHost
()
const
{
return
allocator_
->
OnHost
();
}
private:
Allocator
*
allocator_
;
void
*
buf_
;
void
*
mapped_buf_
;
bool
is_data_owner_
;
DISABLE_COPY_AND_ASSIGN
(
Buffer
);
};
class
Image
:
public
BufferBase
{
public:
Image
()
:
BufferBase
(
0
),
allocator_
(
GetDeviceAllocator
(
OPENCL
)),
buf_
(
nullptr
),
mapped_buf_
(
nullptr
)
{}
Image
(
std
::
vector
<
size_t
>
shape
,
DataType
data_type
)
:
BufferBase
(
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
std
::
multiplies
<
index_t
>
())
*
GetEnumTypeSize
(
data_type
)),
allocator_
(
GetDeviceAllocator
(
OPENCL
)),
mapped_buf_
(
nullptr
)
{
shape_
=
shape
;
data_type_
=
data_type
;
buf_
=
allocator_
->
NewImage
(
shape
,
data_type
);
}
virtual
~
Image
()
{
if
(
mapped_buf_
!=
nullptr
)
{
UnMap
();
}
if
(
buf_
!=
nullptr
)
{
allocator_
->
DeleteImage
(
buf_
);
}
}
void
*
buffer
()
{
MACE_CHECK_NOTNULL
(
buf_
);
return
buf_
;
};
const
void
*
raw_data
()
const
{
MACE_CHECK_NOTNULL
(
mapped_buf_
);
return
mapped_buf_
;
}
void
*
raw_mutable_data
()
{
MACE_CHECK_NOTNULL
(
mapped_buf_
);
return
mapped_buf_
;
}
std
::
vector
<
size_t
>
image_shape
()
const
{
return
shape_
;
}
void
*
Map
(
index_t
offset
,
index_t
length
,
std
::
vector
<
size_t
>
*
pitch
)
const
{
MACE_NOT_IMPLEMENTED
;
return
nullptr
;
}
void
UnMap
(
void
*
mapped_ptr
)
const
{
MACE_CHECK_NOTNULL
(
buf_
);
MACE_CHECK_NOTNULL
(
mapped_ptr
);
allocator_
->
Unmap
(
buf_
,
mapped_ptr
);
}
void
Map
(
std
::
vector
<
size_t
>
*
pitch
)
{
MACE_CHECK_NOTNULL
(
buf_
);
MACE_CHECK
(
mapped_buf_
==
nullptr
,
"buf has been already mapped"
);
MACE_CHECK_NOTNULL
(
pitch
);
mapped_buf_
=
allocator_
->
MapImage
(
buf_
,
shape_
,
pitch
);
};
void
UnMap
()
{
UnMap
(
mapped_buf_
);
mapped_buf_
=
nullptr
;
};
void
Resize
(
index_t
size
)
{}
void
Copy
(
void
*
src
,
index_t
offset
,
index_t
length
)
{
MACE_NOT_IMPLEMENTED
;
}
bool
OnHost
()
const
{
return
allocator_
->
OnHost
();
}
private:
Allocator
*
allocator_
;
std
::
vector
<
size_t
>
shape_
;
DataType
data_type_
;
void
*
buf_
;
void
*
mapped_buf_
;
DISABLE_COPY_AND_ASSIGN
(
Image
);
};
class
BufferSlice
:
public
BufferBase
{
public:
BufferSlice
()
{}
BufferSlice
(
BufferBase
*
buffer
,
index_t
offset
,
index_t
length
)
:
BufferBase
(
buffer
->
size
()),
buffer_
(
buffer
),
mapped_buf_
(
nullptr
),
offset_
(
offset
),
length_
(
length
)
{
MACE_CHECK
(
offset
>=
0
,
"buffer slice offset should >= 0"
);
MACE_CHECK
(
offset
+
length
<=
size_
,
"buffer slice offset + length ("
,
offset
,
" + "
,
length
,
") should <= "
,
size_
);
}
BufferSlice
(
const
BufferSlice
&
other
)
:
BufferSlice
(
other
.
buffer_
,
other
.
offset_
,
other
.
length_
)
{}
~
BufferSlice
()
{
if
(
mapped_buf_
!=
nullptr
)
{
UnMap
();
}
}
void
*
buffer
()
{
return
buffer_
->
buffer
();
};
const
void
*
raw_data
()
const
{
if
(
OnHost
())
{
MACE_CHECK_NOTNULL
(
buffer_
);
return
(
char
*
)
buffer_
->
raw_data
()
+
offset_
;
}
else
{
MACE_CHECK_NOTNULL
(
mapped_buf_
);
return
mapped_buf_
;
}
}
void
*
raw_mutable_data
()
{
MACE_NOT_IMPLEMENTED
;
return
nullptr
;
}
void
*
Map
(
index_t
offset
,
index_t
length
,
std
::
vector
<
size_t
>
*
pitch
)
const
{
MACE_NOT_IMPLEMENTED
;
return
nullptr
;
}
void
UnMap
(
void
*
mapped_ptr
)
const
{
MACE_NOT_IMPLEMENTED
;
}
void
Map
(
std
::
vector
<
size_t
>
*
pitch
)
{
MACE_CHECK_NOTNULL
(
buffer_
);
MACE_CHECK
(
mapped_buf_
==
nullptr
,
"mapped buf is not null"
);
mapped_buf_
=
buffer_
->
Map
(
offset_
,
length_
,
pitch
);
};
void
UnMap
()
{
MACE_CHECK_NOTNULL
(
mapped_buf_
);
buffer_
->
UnMap
(
mapped_buf_
);
mapped_buf_
=
nullptr
;
};
void
Resize
(
index_t
size
)
{
}
void
Copy
(
void
*
src
,
index_t
offset
,
index_t
length
)
{
MACE_NOT_IMPLEMENTED
;
}
index_t
offset
()
const
{
return
offset_
;
}
bool
OnHost
()
const
{
return
buffer_
->
OnHost
();
}
private:
BufferBase
*
buffer_
;
void
*
mapped_buf_
;
index_t
offset_
;
index_t
length_
;
};
}
#endif // MACE_CORE_BUFFER_H_
mace/core/mace.cc
浏览文件 @
bcbcbf7a
...
...
@@ -542,7 +542,6 @@ MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type) :
if
(
!
net
->
Run
())
{
LOG
(
FATAL
)
<<
"Net init run failed"
;
}
ws_
->
RemoveUnsedTensor
();
net_
=
std
::
move
(
CreateNet
(
op_registry_
,
*
net_def
,
ws_
.
get
(),
device_type
));
}
}
...
...
mace/core/net.cc
浏览文件 @
bcbcbf7a
...
...
@@ -15,14 +15,14 @@ NetBase::NetBase(const std::shared_ptr<const OperatorRegistry> op_registry,
DeviceType
type
)
:
op_registry_
(
op_registry
),
name_
(
net_def
->
name
())
{}
S
impleNet
::
Simple
Net
(
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
S
erialNet
::
Serial
Net
(
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
const
std
::
shared_ptr
<
const
NetDef
>
net_def
,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
)
:
NetBase
(
op_registry
,
net_def
,
ws
,
type
),
device_type_
(
type
)
{
MACE_LATENCY_LOGGER
(
1
,
"Constructing S
imple
Net "
,
net_def
->
name
());
MACE_LATENCY_LOGGER
(
1
,
"Constructing S
erial
Net "
,
net_def
->
name
());
for
(
int
idx
=
0
;
idx
<
net_def
->
op_size
();
++
idx
)
{
const
auto
&
operator_def
=
net_def
->
op
(
idx
);
VLOG
(
3
)
<<
"Creating operator "
<<
operator_def
.
name
()
<<
"("
...
...
@@ -36,7 +36,7 @@ SimpleNet::SimpleNet(const std::shared_ptr<const OperatorRegistry> op_registry,
}
}
bool
S
imple
Net
::
Run
(
RunMetadata
*
run_metadata
)
{
bool
S
erial
Net
::
Run
(
RunMetadata
*
run_metadata
)
{
MACE_MEMORY_LOGGING_GUARD
();
MACE_LATENCY_LOGGER
(
1
,
"Running net"
);
for
(
auto
iter
=
operators_
.
begin
();
iter
!=
operators_
.
end
();
++
iter
)
{
...
...
@@ -99,7 +99,7 @@ std::unique_ptr<NetBase> CreateNet(
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
)
{
std
::
unique_ptr
<
NetBase
>
net
(
new
S
imple
Net
(
op_registry
,
net_def
,
ws
,
type
,
mode
));
std
::
unique_ptr
<
NetBase
>
net
(
new
S
erial
Net
(
op_registry
,
net_def
,
ws
,
type
,
mode
));
return
net
;
}
...
...
mace/core/net.h
浏览文件 @
bcbcbf7a
...
...
@@ -33,9 +33,9 @@ class NetBase {
DISABLE_COPY_AND_ASSIGN
(
NetBase
);
};
class
S
imple
Net
:
public
NetBase
{
class
S
erial
Net
:
public
NetBase
{
public:
S
imple
Net
(
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
S
erial
Net
(
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
const
std
::
shared_ptr
<
const
NetDef
>
net_def
,
Workspace
*
ws
,
DeviceType
type
,
...
...
@@ -47,7 +47,7 @@ class SimpleNet : public NetBase {
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>
>
operators_
;
DeviceType
device_type_
;
DISABLE_COPY_AND_ASSIGN
(
S
imple
Net
);
DISABLE_COPY_AND_ASSIGN
(
S
erial
Net
);
};
std
::
unique_ptr
<
NetBase
>
CreateNet
(
...
...
mace/core/preallocated_pooled_allocator.h
浏览文件 @
bcbcbf7a
...
...
@@ -5,6 +5,7 @@
#ifndef MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
#define MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
#include <unordered_map>
#include "mace/core/allocator.h"
namespace
mace
{
...
...
@@ -13,17 +14,26 @@ class PreallocatedPooledAllocator {
public:
PreallocatedPooledAllocator
()
{}
virtual
~
PreallocatedPooledAllocator
()
noexcept
{}
~
PreallocatedPooledAllocator
()
noexcept
{}
v
irtual
void
PreallocateImage
(
int
mem_id
,
const
std
::
vector
<
size_t
>
&
image_shape
,
DataType
data_type
)
=
0
;
v
oid
SetBuffer
(
int
mem_id
,
std
::
unique_ptr
<
BufferBase
>
&&
buffer
)
{
buffers_
[
mem_id
]
=
std
::
move
(
buffer
);
}
virtual
void
*
GetImage
(
int
mem_id
)
=
0
;
BufferBase
*
GetBuffer
(
int
mem_id
)
{
if
(
buffers_
.
find
(
mem_id
)
!=
buffers_
.
end
())
{
return
buffers_
[
mem_id
].
get
();
}
else
{
return
nullptr
;
}
}
virtual
bool
HasImage
(
int
mem_id
)
=
0
;
virtual
bool
HasBuffer
(
int
mem_id
)
{
return
buffers_
.
find
(
mem_id
)
!=
buffers_
.
end
();
}
virtual
std
::
vector
<
size_t
>
GetImageSize
(
int
mem_id
)
=
0
;
private:
std
::
unordered_map
<
int
,
std
::
unique_ptr
<
BufferBase
>>
buffers_
;
};
}
// namespace mace
...
...
mace/core/runtime/hexagon/hexagon_control_wrapper.h
浏览文件 @
bcbcbf7a
...
...
@@ -11,7 +11,6 @@
#include "mace/core/runtime/hexagon/quantize.h"
#include "mace/core/tensor.h"
#include "mace/public/mace.h"
#include "mace/core/serializer.h"
namespace
mace
{
...
...
@@ -37,7 +36,6 @@ class HexagonControlWrapper {
void
SetGraphMode
(
int
mode
);
private:
// CAVEAT: Need offset as HVX library reserves some ids
static
constexpr
int
NODE_ID_OFFSET
=
10000
;
inline
uint32_t
node_id
(
uint32_t
nodeid
)
{
...
...
@@ -45,7 +43,6 @@ class HexagonControlWrapper {
}
int
nn_id_
;
Serializer
serializer_
;
Quantizer
quantizer_
;
std
::
vector
<
std
::
vector
<
index_t
>>
input_shapes_
;
...
...
mace/core/runtime/opencl/opencl_allocator.cc
浏览文件 @
bcbcbf7a
...
...
@@ -35,7 +35,7 @@ static cl_channel_type DataTypeToCLChannelType(const DataType t) {
OpenCLAllocator
::
OpenCLAllocator
()
{}
OpenCLAllocator
::~
OpenCLAllocator
()
{}
void
*
OpenCLAllocator
::
New
(
size_t
nbytes
)
{
void
*
OpenCLAllocator
::
New
(
size_t
nbytes
)
const
{
VLOG
(
3
)
<<
"Allocate OpenCL buffer: "
<<
nbytes
;
cl_int
error
;
cl
::
Buffer
*
buffer
=
new
cl
::
Buffer
(
OpenCLRuntime
::
Global
()
->
context
(),
...
...
@@ -47,7 +47,7 @@ void *OpenCLAllocator::New(size_t nbytes) {
}
void
*
OpenCLAllocator
::
NewImage
(
const
std
::
vector
<
size_t
>
&
image_shape
,
const
DataType
dt
)
{
const
DataType
dt
)
const
{
MACE_CHECK
(
image_shape
.
size
()
==
2
)
<<
"Image shape's size must equal 2"
;
VLOG
(
3
)
<<
"Allocate OpenCL image: "
<<
image_shape
[
0
]
<<
", "
<<
image_shape
[
1
];
...
...
@@ -67,7 +67,7 @@ void *OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
return
cl_image
;
}
void
OpenCLAllocator
::
Delete
(
void
*
buffer
)
{
void
OpenCLAllocator
::
Delete
(
void
*
buffer
)
const
{
VLOG
(
3
)
<<
"Free OpenCL buffer"
;
if
(
buffer
!=
nullptr
)
{
cl
::
Buffer
*
cl_buffer
=
static_cast
<
cl
::
Buffer
*>
(
buffer
);
...
...
@@ -75,7 +75,7 @@ void OpenCLAllocator::Delete(void *buffer) {
}
}
void
OpenCLAllocator
::
DeleteImage
(
void
*
buffer
)
{
void
OpenCLAllocator
::
DeleteImage
(
void
*
buffer
)
const
{
VLOG
(
3
)
<<
"Free OpenCL image"
;
if
(
buffer
!=
nullptr
)
{
cl
::
Image2D
*
cl_image
=
static_cast
<
cl
::
Image2D
*>
(
buffer
);
...
...
@@ -83,13 +83,13 @@ void OpenCLAllocator::DeleteImage(void *buffer) {
}
}
void
*
OpenCLAllocator
::
Map
(
void
*
buffer
,
size_t
nbytes
)
{
void
*
OpenCLAllocator
::
Map
(
void
*
buffer
,
size_t
offset
,
size_t
nbytes
)
const
{
auto
cl_buffer
=
static_cast
<
cl
::
Buffer
*>
(
buffer
);
auto
queue
=
OpenCLRuntime
::
Global
()
->
command_queue
();
// TODO(heliangliang) Non-blocking call
cl_int
error
;
void
*
mapped_ptr
=
queue
.
enqueueMapBuffer
(
*
cl_buffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
queue
.
enqueueMapBuffer
(
*
cl_buffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
offset
,
nbytes
,
nullptr
,
nullptr
,
&
error
);
MACE_CHECK
(
error
==
CL_SUCCESS
);
return
mapped_ptr
;
...
...
@@ -98,33 +98,33 @@ void *OpenCLAllocator::Map(void *buffer, size_t nbytes) {
// TODO : there is something wrong with half type.
void
*
OpenCLAllocator
::
MapImage
(
void
*
buffer
,
const
std
::
vector
<
size_t
>
&
image_shape
,
std
::
vector
<
size_t
>
&
mapped_image_pitch
)
{
std
::
vector
<
size_t
>
*
mapped_image_pitch
)
const
{
MACE_CHECK
(
image_shape
.
size
()
==
2
)
<<
"Just support map 2d image"
;
auto
cl_image
=
static_cast
<
cl
::
Image2D
*>
(
buffer
);
std
::
array
<
size_t
,
3
>
origin
=
{
0
,
0
,
0
};
std
::
array
<
size_t
,
3
>
region
=
{
image_shape
[
0
],
image_shape
[
1
],
1
};
mapped_image_pitch
.
resize
(
2
);
mapped_image_pitch
->
resize
(
2
);
cl_int
error
;
void
*
mapped_ptr
=
OpenCLRuntime
::
Global
()
->
command_queue
().
enqueueMapImage
(
*
cl_image
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
origin
,
region
,
&
mapped_image_pitch
[
0
]
,
&
mapped_image_pitch
[
1
]
,
mapped_image_pitch
->
data
()
,
mapped_image_pitch
->
data
()
+
1
,
nullptr
,
nullptr
,
&
error
);
MACE_CHECK
(
error
==
CL_SUCCESS
)
<<
error
;
return
mapped_ptr
;
}
void
OpenCLAllocator
::
Unmap
(
void
*
buffer
,
void
*
mapped_ptr
)
{
void
OpenCLAllocator
::
Unmap
(
void
*
buffer
,
void
*
mapped_ptr
)
const
{
auto
cl_buffer
=
static_cast
<
cl
::
Buffer
*>
(
buffer
);
auto
queue
=
OpenCLRuntime
::
Global
()
->
command_queue
();
MACE_CHECK
(
queue
.
enqueueUnmapMemObject
(
*
cl_buffer
,
mapped_ptr
,
nullptr
,
nullptr
)
==
CL_SUCCESS
);
}
bool
OpenCLAllocator
::
OnHost
()
{
return
false
;
}
bool
OpenCLAllocator
::
OnHost
()
const
{
return
false
;
}
}
// namespace mace
mace/core/runtime/opencl/opencl_allocator.h
浏览文件 @
bcbcbf7a
...
...
@@ -15,7 +15,7 @@ class OpenCLAllocator : public Allocator {
~
OpenCLAllocator
()
override
;
void
*
New
(
size_t
nbytes
)
override
;
void
*
New
(
size_t
nbytes
)
const
override
;
/*
* Use Image2D with RGBA (128-bit) format to represent the image.
...
...
@@ -23,21 +23,21 @@ class OpenCLAllocator : public Allocator {
* @ shape : [depth, ..., height, width ].
*/
void
*
NewImage
(
const
std
::
vector
<
size_t
>
&
image_shape
,
const
DataType
dt
)
override
;
const
DataType
dt
)
const
override
;
void
Delete
(
void
*
buffer
)
override
;
void
Delete
(
void
*
buffer
)
const
override
;
void
DeleteImage
(
void
*
buffer
)
override
;
void
DeleteImage
(
void
*
buffer
)
const
override
;
void
*
Map
(
void
*
buffer
,
size_t
nbytes
)
override
;
void
*
Map
(
void
*
buffer
,
size_t
offset
,
size_t
nbytes
)
const
override
;
void
*
MapImage
(
void
*
buffer
,
const
std
::
vector
<
size_t
>
&
image_shape
,
std
::
vector
<
size_t
>
&
mapped_image_pitch
)
override
;
std
::
vector
<
size_t
>
*
mapped_image_pitch
)
const
override
;
void
Unmap
(
void
*
buffer
,
void
*
mapped_ptr
)
override
;
void
Unmap
(
void
*
buffer
,
void
*
mapped_ptr
)
const
override
;
bool
OnHost
()
override
;
bool
OnHost
()
const
override
;
};
}
// namespace mace
...
...
mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.cc
已删除
100644 → 0
浏览文件 @
d548b461
//
// Copyright (c) 2018 XiaoMi All rights reserved.
//
#include "mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h"
namespace
mace
{
OpenCLPreallocatedPooledAllocator
::
OpenCLPreallocatedPooledAllocator
()
:
allocator
(
GetDeviceAllocator
(
DeviceType
::
OPENCL
))
{
}
OpenCLPreallocatedPooledAllocator
::~
OpenCLPreallocatedPooledAllocator
()
{
}
void
OpenCLPreallocatedPooledAllocator
::
PreallocateImage
(
int
mem_id
,
const
std
::
vector
<
size_t
>
&
image_shape
,
DataType
data_type
)
{
MACE_CHECK
(
!
this
->
HasImage
(
mem_id
),
"Memory already exists: "
,
mem_id
);
VLOG
(
2
)
<<
"Preallocate OpenCL image: "
<<
mem_id
<<
" "
<<
image_shape
[
0
]
<<
", "
<<
image_shape
[
1
];
images_
[
mem_id
]
=
std
::
move
(
std
::
unique_ptr
<
void
,
std
::
function
<
void
(
void
*
)
>>
(
allocator
->
NewImage
(
image_shape
,
data_type
),
[
this
](
void
*
p
)
{
this
->
allocator
->
DeleteImage
(
p
);
}));
image_shapes_
[
mem_id
]
=
image_shape
;
}
}
// namespace mace
mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h
已删除
100644 → 0
浏览文件 @
d548b461
//
// Copyright (c) 2018 XiaoMi All rights reserved.
//
#ifndef MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_
#define MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_
#include "mace/core/preallocated_pooled_allocator.h"
#include <unordered_map>
namespace
mace
{
class
OpenCLPreallocatedPooledAllocator
:
public
PreallocatedPooledAllocator
{
public:
OpenCLPreallocatedPooledAllocator
();
~
OpenCLPreallocatedPooledAllocator
()
override
;
void
PreallocateImage
(
int
mem_id
,
const
std
::
vector
<
size_t
>
&
image_shape
,
DataType
data_type
)
override
;
inline
void
*
GetImage
(
int
mem_id
)
override
{
MACE_CHECK
(
HasImage
(
mem_id
),
"image does not exist"
);
return
images_
[
mem_id
].
get
();
}
inline
bool
HasImage
(
int
mem_id
)
override
{
return
images_
.
find
(
mem_id
)
!=
images_
.
end
();
}
inline
std
::
vector
<
size_t
>
GetImageSize
(
int
mem_id
)
override
{
return
image_shapes_
[
mem_id
];
}
private:
std
::
unordered_map
<
int
,
std
::
unique_ptr
<
void
,
std
::
function
<
void
(
void
*
)
>>>
images_
;
std
::
unordered_map
<
int
,
std
::
vector
<
size_t
>>
image_shapes_
;
Allocator
*
allocator
;
};
}
// namepsace mace
#endif // MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_
mace/core/serializer.cc
已删除
100644 → 0
浏览文件 @
d548b461
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/serializer.h"
namespace
mace
{
std
::
unique_ptr
<
ConstTensor
>
Serializer
::
Serialize
(
const
Tensor
&
tensor
,
const
std
::
string
&
name
)
{
MACE_NOT_IMPLEMENTED
;
return
nullptr
;
}
std
::
unique_ptr
<
Tensor
>
Serializer
::
Deserialize
(
const
ConstTensor
&
const_tensor
,
DeviceType
type
)
{
std
::
unique_ptr
<
Tensor
>
tensor
(
new
Tensor
(
GetDeviceAllocator
(
type
),
const_tensor
.
data_type
()));
std
::
vector
<
index_t
>
dims
;
for
(
const
index_t
d
:
const_tensor
.
dims
())
{
dims
.
push_back
(
d
);
}
tensor
->
Resize
(
dims
);
switch
(
const_tensor
.
data_type
())
{
case
DT_HALF
:
tensor
->
Copy
<
half
>
(
reinterpret_cast
<
const
half
*>
(
const_tensor
.
data
()),
const_tensor
.
data_size
());
break
;
case
DT_FLOAT
:
tensor
->
Copy
<
float
>
(
reinterpret_cast
<
const
float
*>
(
const_tensor
.
data
()),
const_tensor
.
data_size
());
break
;
case
DT_DOUBLE
:
tensor
->
Copy
<
double
>
(
reinterpret_cast
<
const
double
*>
(
const_tensor
.
data
()),
const_tensor
.
data_size
());
break
;
case
DT_INT32
:
tensor
->
Copy
<
int32_t
>
(
reinterpret_cast
<
const
int32_t
*>
(
const_tensor
.
data
()),
const_tensor
.
data_size
());
break
;
case
DT_INT64
:
tensor
->
Copy
<
int64_t
>
(
reinterpret_cast
<
const
int64_t
*>
(
const_tensor
.
data
()),
const_tensor
.
data_size
());
break
;
case
DT_UINT8
:
tensor
->
Copy
<
uint8_t
>
(
reinterpret_cast
<
const
uint8_t
*>
(
const_tensor
.
data
()),
const_tensor
.
data_size
());
break
;
case
DT_INT16
:
tensor
->
CopyWithCast
<
int32_t
,
uint16_t
>
(
reinterpret_cast
<
const
int32_t
*>
(
const_tensor
.
data
()),
const_tensor
.
data_size
());
break
;
case
DT_INT8
:
tensor
->
CopyWithCast
<
int32_t
,
int8_t
>
(
reinterpret_cast
<
const
int32_t
*>
(
const_tensor
.
data
()),
const_tensor
.
data_size
());
break
;
case
DT_UINT16
:
tensor
->
CopyWithCast
<
int32_t
,
int16_t
>
(
reinterpret_cast
<
const
int32_t
*>
(
const_tensor
.
data
()),
const_tensor
.
data_size
());
break
;
case
DT_BOOL
:
tensor
->
CopyWithCast
<
int32_t
,
bool
>
(
reinterpret_cast
<
const
int32_t
*>
(
const_tensor
.
data
()),
const_tensor
.
data_size
());
break
;
default:
MACE_NOT_IMPLEMENTED
;
break
;
}
return
tensor
;
}
}
// namespace mace
mace/core/serializer.h
已删除
100644 → 0
浏览文件 @
d548b461
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_CORE_SERIALIZER_H_
#define MACE_CORE_SERIALIZER_H_
#include "mace/core/tensor.h"
#include "mace/public/mace.h"
namespace
mace
{
class
Serializer
{
public:
Serializer
()
{}
~
Serializer
()
{}
std
::
unique_ptr
<
ConstTensor
>
Serialize
(
const
Tensor
&
tensor
,
const
std
::
string
&
name
);
std
::
unique_ptr
<
Tensor
>
Deserialize
(
const
ConstTensor
&
const_tensor
,
DeviceType
type
);
DISABLE_COPY_AND_ASSIGN
(
Serializer
);
};
}
// namespace mace
#endif // MACE_CORE_SERIALIZER_H_
mace/core/tensor.h
浏览文件 @
bcbcbf7a
...
...
@@ -5,7 +5,7 @@
#ifndef MACE_CORE_TENSOR_H_
#define MACE_CORE_TENSOR_H_
#include "mace/core/
allocato
r.h"
#include "mace/core/
buffe
r.h"
#include "mace/utils/logging.h"
#include "mace/core/types.h"
#include "mace/public/mace.h"
...
...
@@ -46,7 +46,6 @@ namespace mace {
CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, LOG(FATAL) << "Type not set"; \
, LOG(FATAL) << "Unexpected type: " << TYPE_ENUM;)
namespace
numerical_chars
{
inline
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
char
c
)
{
return
std
::
is_signed
<
char
>::
value
?
os
<<
static_cast
<
int
>
(
c
)
...
...
@@ -64,27 +63,30 @@ inline std::ostream &operator<<(std::ostream &os, unsigned char c) {
class
Tensor
{
public:
Tensor
()
:
alloc_
(
GetDeviceAllocator
(
DeviceType
::
CPU
)),
size_
(
0
),
dtype_
(
DT_FLOAT
),
buffer_
(
nullptr
),
data_
(
nullptr
),
unused_
(
false
),
is_image_
(
false
){
};
Tensor
(
Allocator
*
alloc
,
DataType
type
)
:
alloc_
(
alloc
),
size_
(
0
),
:
allocator_
(
alloc
),
dtype_
(
type
),
buffer_
(
nullptr
),
data_
(
nullptr
),
unused_
(
false
),
is_image_
(
false
){
};
is_buffer_owner_
(
true
)
{};
Tensor
(
BufferBase
*
buffer
,
DataType
dtype
)
:
dtype_
(
dtype
),
buffer_
(
buffer
),
is_buffer_owner_
(
false
)
{}
Tensor
(
const
BufferSlice
&
buffer_slice
,
DataType
dtype
)
:
dtype_
(
dtype
),
buffer_slice_
(
buffer_slice
),
is_buffer_owner_
(
false
)
{
buffer_
=
&
buffer_slice_
;
}
Tensor
()
:
Tensor
(
GetDeviceAllocator
(
CPU
),
DT_FLOAT
)
{}
~
Tensor
()
{
if
(
is_buffer_owner_
&&
buffer_
!=
nullptr
)
{
delete
buffer_
;
}
}
inline
DataType
dtype
()
const
{
return
dtype_
;
}
...
...
@@ -93,10 +95,6 @@ class Tensor {
inline
const
std
::
vector
<
index_t
>
&
shape
()
const
{
return
shape_
;
}
inline
const
std
::
vector
<
size_t
>
&
image_shape
()
const
{
return
image_shape_
;
}
inline
const
bool
is_image
()
const
{
return
is_image_
;
}
inline
index_t
dim_size
()
const
{
return
shape_
.
size
();
}
inline
index_t
dim
(
unsigned
int
index
)
const
{
...
...
@@ -105,170 +103,105 @@ class Tensor {
return
shape_
[
index
];
}
inline
index_t
size
()
const
{
return
size_
;
}
inline
index_t
raw_size
()
const
{
return
size_
*
SizeOfType
();
}
inline
const
bool
unused
()
const
{
return
unused_
;
}
inline
int64_t
NumElements
()
const
{
inline
index_t
size
()
const
{
return
std
::
accumulate
(
shape_
.
begin
(),
shape_
.
end
(),
1
,
std
::
multiplies
<
int64_t
>
());
}
inline
const
bool
OnHost
()
const
{
return
alloc_
->
OnHost
();
}
/*
* Map the device buffer as CPU buffer to access the data, unmap must be
* called later
*/
inline
void
Map
()
const
{
if
(
!
OnHost
())
{
MACE_CHECK
(
buffer_
!=
nullptr
&&
data_
==
nullptr
);
data_
=
alloc_
->
Map
(
buffer_
.
get
(),
size_
*
SizeOfType
());
}
inline
index_t
raw_size
()
const
{
return
size
()
*
SizeOfType
();
}
inline
void
MapImage
(
std
::
vector
<
size_t
>
&
mapped_image_pitch
)
const
{
MACE_CHECK
(
!
OnHost
()
&&
buffer_
!=
nullptr
&&
data_
==
nullptr
);
data_
=
alloc_
->
MapImage
(
buffer_
.
get
(),
image_shape_
,
mapped_image_pitch
);
inline
void
*
buffer
()
const
{
MACE_CHECK
(
buffer_
!=
nullptr
&&
buffer_
->
buffer
()
!=
nullptr
,
"buffer is null"
);
return
buffer_
->
buffer
();
}
/*
* Unmap the device buffer
*/
inline
void
Unmap
()
const
{
if
(
!
OnHost
())
{
MACE_CHECK
(
buffer_
!=
nullptr
&&
data_
!=
nullptr
);
alloc_
->
Unmap
(
buffer_
.
get
(),
data_
);
data_
=
nullptr
;
inline
index_t
buffer_offset
()
const
{
return
buffer_
->
offset
();
}
}
void
*
buffer
()
const
{
return
buffer_
.
get
();
}
inline
const
void
*
raw_data
()
const
{
void
*
data
=
MappedBuffer
();
MACE_CHECK
(
data
!=
nullptr
||
size_
==
0
,
"The tensor is of non-zero shape, but its data is not allocated "
"or mapped yet."
);
return
data
;
MACE_CHECK
(
buffer_
!=
nullptr
,
"buffer is null"
);
return
buffer_
->
raw_data
();
}
template
<
typename
T
>
template
<
typename
T
>
inline
const
T
*
data
()
const
{
return
static_cast
<
const
T
*>
(
raw_data
());
MACE_CHECK
(
buffer_
!=
nullptr
,
"buffer is null"
);
return
buffer_
->
data
<
T
>
();
}
inline
void
*
raw_mutable_data
()
{
void
*
data
=
MappedBuffer
();
MACE_CHECK
(
data
!=
nullptr
||
size_
==
0
,
"The tensor is of non-zero shape, but its data is not allocated "
"or mapped yet."
);
return
data
;
MACE_CHECK
(
buffer_
!=
nullptr
,
"buffer is null"
);
return
buffer_
->
raw_mutable_data
();
}
template
<
typename
T
>
template
<
typename
T
>
inline
T
*
mutable_data
()
{
return
static_cast
<
T
*>
(
raw_mutable_data
());
MACE_CHECK
(
buffer_
!=
nullptr
,
"buffer is null"
);
return
static_cast
<
T
*>
(
buffer_
->
raw_mutable_data
());
}
inline
void
Resize
(
const
std
::
vector
<
index_t
>
&
shape
)
{
MACE_CHECK
(
!
is_image_
||
buffer_
==
nullptr
,
"Resize is not for image, use ResizeImage instead."
);
is_image_
=
false
;
inline
void
Reshape
(
const
std
::
vector
<
index_t
>
&
shape
)
{
shape_
=
shape
;
index_t
size
=
NumElements
();
if
(
size_
!=
size
)
{
size_
=
size
;
MACE_CHECK
(
data_
==
nullptr
,
"Buffer must be unmapped before resize"
);
CASES
(
dtype_
,
(
buffer_
=
std
::
move
(
std
::
unique_ptr
<
void
,
std
::
function
<
void
(
void
*
)
>>
(
alloc_
->
New
(
size_
*
sizeof
(
T
)),
[
this
](
void
*
p
)
{
this
->
alloc_
->
Delete
(
p
);
})
)));
}
MACE_CHECK
(
raw_size
()
<=
buffer_
->
size
());
}
inline
void
ResizeImage
(
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
size_t
>
&
image_shape
)
{
MACE_CHECK
(
is_image_
||
buffer_
==
nullptr
,
"ResizeImage is not for buffer, use Resize instead."
);
is_image_
=
true
;
inline
void
Resize
(
const
std
::
vector
<
index_t
>
&
shape
)
{
shape_
=
shape
;
index_t
size
=
NumElements
();
if
(
size_
!=
size
)
{
size_
=
size
;
image_shape_
=
image_shape
;
if
(
!
preallocated_image_shape_
.
empty
())
{
MACE_CHECK
(
preallocated_image_shape_
[
0
]
>=
image_shape
[
0
]
&&
preallocated_image_shape_
[
1
]
>=
image_shape
[
1
],
"image shape not large enough: preallocated "
,
preallocated_image_shape_
[
0
],
" "
,
preallocated_image_shape_
[
1
],
"apply for "
,
image_shape
[
0
],
" "
,
image_shape
[
1
]);
if
(
buffer_
!=
nullptr
)
{
buffer_
->
Resize
(
raw_size
());
}
else
{
buffer_
=
std
::
move
(
std
::
unique_ptr
<
void
,
std
::
function
<
void
(
void
*
)
>>
(
alloc_
->
NewImage
(
image_shape
,
dtype_
),
[
this
](
void
*
p
)
{
this
->
alloc_
->
DeleteImage
(
p
);
}));
preallocated_image_shape_
=
image_shape
;
}
buffer_
=
new
Buffer
(
allocator_
,
raw_size
());
is_buffer_owner_
=
true
;
}
}
inline
void
ResizeLike
(
const
Tensor
&
other
)
{
if
(
other
.
is_image
())
{
ResizeImage
(
other
.
shape
(),
other
.
image_shape
());
}
else
{
Resize
(
other
.
shape
());
}
}
inline
void
ResizeLike
(
const
Tensor
*
other
)
{
if
(
other
->
is_image
())
{
ResizeImage
(
other
->
shape
(),
other
->
image_shape
());
}
else
{
Resize
(
other
->
shape
());
}
inline
void
ResizeImage
(
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
size_t
>
&
image_shape
)
{
shape_
=
shape
;
if
(
buffer_
==
nullptr
)
{
buffer_
=
new
Image
(
image_shape
,
dtype_
);
is_buffer_owner_
=
true
;
}
}
inline
void
PreallocateImage
(
void
*
image
,
const
std
::
vector
<
size_t
>&
image_shape
)
{
is_image_
=
true
;
buffer_
=
std
::
move
(
std
::
unique_ptr
<
void
,
std
::
function
<
void
(
void
*
)
>>
(
image
,
[](
void
*
p
)
{
// tensor does not have ownership of preallocated memory
}));
preallocated_image_shape_
=
image_shape
;
inline
void
CopyBytes
(
const
void
*
src
,
size_t
size
)
{
MappingGuard
guard
(
this
);
memcpy
(
buffer_
->
raw_mutable_data
(),
src
,
size
);
}
template
<
typename
T
>
inline
void
Copy
(
const
T
*
src
,
index_t
size
)
{
MACE_CHECK
(
size
==
size_
,
"copy src and dst with different size."
);
CopyBytes
(
static_cast
<
const
void
*>
(
src
),
sizeof
(
T
)
*
size
);
template
<
typename
T
>
inline
void
Copy
(
const
T
*
src
,
index_t
length
)
{
MACE_CHECK
(
length
==
size
()
,
"copy src and dst with different size."
);
CopyBytes
(
static_cast
<
const
void
*>
(
src
),
sizeof
(
T
)
*
length
);
}
template
<
typename
SrcType
,
typename
DstType
>
inline
void
CopyWithCast
(
const
SrcType
*
src
,
size_t
size
)
{
MACE_CHECK
(
static_cast
<
index_t
>
(
size
)
==
size_
,
"copy src and dst with different size."
);
std
::
unique_ptr
<
DstType
[]
>
buffer
(
new
DstType
[
size
]);
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
buffer
[
i
]
=
static_cast
<
DstType
>
(
src
[
i
]);
inline
void
Copy
(
const
Tensor
&
other
)
{
dtype_
=
other
.
dtype_
;
ResizeLike
(
other
);
MappingGuard
map_other
(
&
other
);
CopyBytes
(
other
.
raw_data
(),
other
.
size
()
*
SizeOfType
());
}
CopyBytes
(
static_cast
<
const
void
*>
(
buffer
.
get
()),
sizeof
(
DstType
)
*
size
);
inline
size_t
SizeOfType
()
const
{
size_t
type_size
=
0
;
CASES
(
dtype_
,
type_size
=
sizeof
(
T
));
return
type_size
;
}
inline
void
CopyBytes
(
const
void
*
src
,
size_t
size
)
{
MappingGuard
map_this
(
this
);
memcpy
(
raw_mutable_data
(),
src
,
size
);
inline
BufferBase
*
UnderlyingBuffer
()
const
{
return
buffer_
;
}
inline
void
DebugPrint
()
const
{
...
...
@@ -281,8 +214,8 @@ class Tensor {
os
.
str
(
""
);
os
.
clear
();
MappingGuard
guard
(
this
);
for
(
int
i
=
0
;
i
<
size
_
;
++
i
)
{
if
(
i
!=
0
&&
i
%
shape_
[
3
]
==
0
)
{
for
(
int
i
=
0
;
i
<
size
()
;
++
i
)
{
if
(
i
!=
0
&&
i
%
shape_
[
3
]
==
0
)
{
os
<<
"
\n
"
;
}
CASES
(
dtype_
,
(
os
<<
(
this
->
data
<
T
>
()[
i
])
<<
", "
));
...
...
@@ -291,37 +224,11 @@ class Tensor {
<<
dim
(
2
)
<<
", "
<<
dim
(
3
)
<<
"], content:
\n
"
<<
os
.
str
();
}
inline
size_t
SizeOfType
()
const
{
size_t
type_size
=
0
;
CASES
(
dtype_
,
type_size
=
sizeof
(
T
));
return
type_size
;
}
inline
void
Copy
(
const
Tensor
&
other
)
{
alloc_
=
other
.
alloc_
;
dtype_
=
other
.
dtype_
;
ResizeLike
(
other
);
MappingGuard
map_other
(
&
other
);
if
(
is_image_
)
{
LOG
(
FATAL
)
<<
"Not support copy image tensor, please use Copy API."
;
}
else
{
CopyBytes
(
other
.
raw_data
(),
size_
*
SizeOfType
());
}
}
inline
void
MarkUnused
()
{
this
->
unused_
=
true
;
}
class
MappingGuard
{
public:
MappingGuard
(
const
Tensor
*
tensor
)
:
tensor_
(
tensor
)
{
if
(
tensor_
!=
nullptr
)
{
if
(
tensor_
->
is_image
())
{
tensor_
->
MapImage
(
mapped_image_pitch_
);
}
else
{
tensor_
->
Map
();
}
tensor_
->
buffer_
->
Map
(
&
mapped_image_pitch_
);
}
}
...
...
@@ -331,10 +238,12 @@ class Tensor {
}
~
MappingGuard
()
{
if
(
tensor_
!=
nullptr
)
tensor_
->
Unm
ap
();
if
(
tensor_
!=
nullptr
)
tensor_
->
buffer_
->
UnM
ap
();
}
inline
const
std
::
vector
<
size_t
>
&
mapped_image_pitch
()
const
{
return
mapped_image_pitch_
;
}
inline
const
std
::
vector
<
size_t
>
&
mapped_image_pitch
()
const
{
return
mapped_image_pitch_
;
}
private:
const
Tensor
*
tensor_
;
...
...
@@ -344,27 +253,12 @@ class Tensor {
};
private:
inline
void
*
MappedBuffer
()
const
{
if
(
OnHost
())
{
return
buffer_
.
get
();
}
return
data_
;
}
Allocator
*
alloc_
;
index_t
size_
;
Allocator
*
allocator_
;
DataType
dtype_
;
// Raw buffer, must be mapped as host accessable data before
// read or write
std
::
unique_ptr
<
void
,
std
::
function
<
void
(
void
*
)
>>
buffer_
;
// Mapped buffer
mutable
void
*
data_
;
std
::
vector
<
index_t
>
shape_
;
// Image for opencl
bool
unused_
;
bool
is_image_
;
std
::
vector
<
size_t
>
image_shape_
;
std
::
vector
<
size_t
>
preallocated_image_shape_
;
BufferBase
*
buffer_
;
BufferSlice
buffer_slice_
;
bool
is_buffer_owner_
;
DISABLE_COPY_AND_ASSIGN
(
Tensor
);
};
...
...
mace/core/workspace.cc
浏览文件 @
bcbcbf7a
...
...
@@ -6,21 +6,11 @@
#include <vector>
#include "mace/core/workspace.h"
#include "mace/core/serializer.h"
#include "mace/core/arg_helper.h"
#include "mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h"
#include "mace/utils/timer.h"
namespace
mace
{
std
::
vector
<
std
::
string
>
Workspace
::
Tensors
()
const
{
std
::
vector
<
std
::
string
>
names
;
for
(
auto
&
entry
:
tensor_map_
)
{
names
.
push_back
(
entry
.
first
);
}
return
names
;
}
Tensor
*
Workspace
::
CreateTensor
(
const
std
::
string
&
name
,
Allocator
*
alloc
,
DataType
type
)
{
...
...
@@ -28,21 +18,12 @@ Tensor *Workspace::CreateTensor(const std::string &name,
VLOG
(
3
)
<<
"Tensor "
<<
name
<<
" already exists. Skipping."
;
}
else
{
VLOG
(
3
)
<<
"Creating Tensor "
<<
name
;
tensor_map_
[
name
]
=
std
::
move
(
std
::
unique_ptr
<
Tensor
>
(
new
Tensor
(
alloc
,
type
)));
tensor_map_
[
name
]
=
std
::
move
(
std
::
unique_ptr
<
Tensor
>
(
new
Tensor
(
alloc
,
type
)));
}
return
GetTensor
(
name
);
}
bool
Workspace
::
RemoveTensor
(
const
std
::
string
&
name
)
{
auto
it
=
tensor_map_
.
find
(
name
);
if
(
it
!=
tensor_map_
.
end
())
{
VLOG
(
3
)
<<
"Removing blob "
<<
name
<<
" from this workspace."
;
tensor_map_
.
erase
(
it
);
return
true
;
}
return
false
;
}
const
Tensor
*
Workspace
::
GetTensor
(
const
std
::
string
&
name
)
const
{
if
(
tensor_map_
.
count
(
name
))
{
return
tensor_map_
.
at
(
name
).
get
();
...
...
@@ -52,26 +33,51 @@ const Tensor *Workspace::GetTensor(const std::string &name) const {
return
nullptr
;
}
void
Workspace
::
RemoveUnsedTensor
()
{
auto
iter
=
tensor_map_
.
begin
();
auto
end_iter
=
tensor_map_
.
end
();
while
(
iter
!=
end_iter
)
{
auto
old_iter
=
iter
++
;
if
(
old_iter
->
second
->
unused
())
{
tensor_map_
.
erase
(
old_iter
);
}
}
}
Tensor
*
Workspace
::
GetTensor
(
const
std
::
string
&
name
)
{
return
const_cast
<
Tensor
*>
(
static_cast
<
const
Workspace
*>
(
this
)
->
GetTensor
(
name
));
}
std
::
vector
<
std
::
string
>
Workspace
::
Tensors
()
const
{
std
::
vector
<
std
::
string
>
names
;
for
(
auto
&
entry
:
tensor_map_
)
{
names
.
push_back
(
entry
.
first
);
}
return
names
;
}
void
Workspace
::
LoadModelTensor
(
const
NetDef
&
net_def
,
DeviceType
type
)
{
MACE_LATENCY_LOGGER
(
1
,
"Load model tensors"
);
Serializer
serializer
;
index_t
model_data_size
=
0
;
unsigned
char
*
model_data_ptr
=
nullptr
;
for
(
auto
&
const_tensor
:
net_def
.
tensors
())
{
if
(
model_data_ptr
==
nullptr
||
reinterpret_cast
<
long
long
>
(
const_tensor
.
data
())
<
reinterpret_cast
<
long
long
>
(
model_data_ptr
))
{
model_data_ptr
=
const_cast
<
unsigned
char
*>
(
const_tensor
.
data
());
}
}
for
(
auto
&
const_tensor
:
net_def
.
tensors
())
{
model_data_size
=
std
::
max
(
model_data_size
,
static_cast
<
index_t
>
(
(
reinterpret_cast
<
long
long
>
(
const_tensor
.
data
())
-
reinterpret_cast
<
long
long
>
(
model_data_ptr
))
+
const_tensor
.
data_size
()
*
GetEnumTypeSize
(
const_tensor
.
data_type
())));
}
VLOG
(
3
)
<<
"Model data size: "
<<
model_data_size
;
if
(
type
==
DeviceType
::
CPU
)
{
tensor_buffer_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
type
),
model_data_ptr
,
model_data_size
)));
}
else
{
tensor_buffer_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
type
),
model_data_size
)));
tensor_buffer_
->
Map
(
nullptr
);
tensor_buffer_
->
Copy
(
model_data_ptr
,
0
,
model_data_size
);
tensor_buffer_
->
UnMap
();
}
for
(
auto
&
const_tensor
:
net_def
.
tensors
())
{
MACE_LATENCY_LOGGER
(
2
,
"Load tensor "
,
const_tensor
.
name
());
VLOG
(
3
)
<<
"Tensor name: "
<<
const_tensor
.
name
()
...
...
@@ -79,9 +85,24 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
<<
", shape: "
<<
MakeString
(
std
::
vector
<
index_t
>
(
const_tensor
.
dims
().
begin
(),
const_tensor
.
dims
().
end
()));
tensor_map_
[
const_tensor
.
name
()]
=
serializer
.
Deserialize
(
const_tensor
,
type
);
std
::
vector
<
index_t
>
dims
;
for
(
const
index_t
d
:
const_tensor
.
dims
())
{
dims
.
push_back
(
d
);
}
index_t
offset
=
(
long
long
)
const_tensor
.
data
()
-
(
long
long
)
model_data_ptr
;
std
::
unique_ptr
<
Tensor
>
tensor
(
new
Tensor
(
BufferSlice
(
tensor_buffer_
.
get
(),
offset
,
const_tensor
.
data_size
()
*
GetEnumTypeSize
(
const_tensor
.
data_type
())),
const_tensor
.
data_type
()));
tensor
->
Reshape
(
dims
);
tensor_map_
[
const_tensor
.
name
()]
=
std
::
move
(
tensor
);
}
if
(
type
==
DeviceType
::
OPENCL
)
{
CreateImageOutputTensor
(
net_def
);
}
...
...
@@ -91,9 +112,6 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) {
if
(
!
net_def
.
has_mem_arena
()
||
net_def
.
mem_arena
().
mem_block_size
()
==
0
)
{
return
;
}
preallocated_allocator_
=
std
::
move
(
std
::
unique_ptr
<
PreallocatedPooledAllocator
>
(
new
OpenCLPreallocatedPooledAllocator
));
DataType
dtype
=
DataType
::
DT_INVALID
;
// We use the data type of the first op (with mem id, must be image),
...
...
@@ -116,18 +134,16 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) {
}
MACE_CHECK
(
dtype
!=
DataType
::
DT_INVALID
,
"data type is invalid."
);
for
(
auto
&
mem_block
:
net_def
.
mem_arena
().
mem_block
())
{
preallocated_allocator_
->
PreallocateImage
(
mem_block
.
mem_id
(),
{
mem_block
.
x
(),
mem_block
.
y
()},
dtype
);
std
::
unique_ptr
<
BufferBase
>
image_buf
(
new
Image
({
mem_block
.
x
(),
mem_block
.
y
()},
dtype
));
preallocated_allocator_
.
SetBuffer
(
mem_block
.
mem_id
(),
std
::
move
(
image_buf
)
);
}
VLOG
(
3
)
<<
"Preallocate image to tensors"
;
auto
allocator
=
GetDeviceAllocator
(
DeviceType
::
OPENCL
);
for
(
auto
&
op
:
net_def
.
op
())
{
if
(
op
.
has_mem_id
())
{
CreateTensor
(
op
.
output
(
0
),
allocator
,
dtype
);
tensor_map_
[
op
.
output
(
0
)]
->
PreallocateImage
(
preallocated_allocator_
->
GetImage
(
op
.
mem_id
()),
preallocated_allocator_
->
GetImageSize
(
op
.
mem_id
()));
std
::
unique_ptr
<
Tensor
>
tensor
(
new
Tensor
(
preallocated_allocator_
.
GetBuffer
(
op
.
mem_id
()),
dtype
));
tensor_map_
[
op
.
output
(
0
)]
=
std
::
move
(
tensor
);
}
}
}
...
...
mace/core/workspace.h
浏览文件 @
bcbcbf7a
...
...
@@ -15,26 +15,23 @@ class Workspace {
public:
typedef
std
::
map
<
std
::
string
,
std
::
unique_ptr
<
Tensor
>>
TensorMap
;
Workspace
()
:
preallocated_allocator_
(
nullptr
)
{}
Workspace
()
{}
~
Workspace
()
{}
std
::
vector
<
std
::
string
>
Tensors
()
const
;
Tensor
*
CreateTensor
(
const
std
::
string
&
name
,
Allocator
*
alloc
,
DataType
type
);
bool
RemoveTensor
(
const
std
::
string
&
name
);
void
RemoveUnsedTensor
();
Tensor
*
CreateTensor
(
const
std
::
string
&
name
,
Allocator
*
alloc
,
DataType
type
);
inline
bool
HasTensor
(
const
std
::
string
&
name
)
const
{
return
tensor_map_
.
count
(
name
);
return
tensor_map_
.
find
(
name
)
!=
tensor_map_
.
end
(
);
}
const
Tensor
*
GetTensor
(
const
std
::
string
&
name
)
const
;
Tensor
*
GetTensor
(
const
std
::
string
&
name
);
std
::
vector
<
std
::
string
>
Tensors
()
const
;
void
LoadModelTensor
(
const
NetDef
&
net_def
,
DeviceType
type
);
private:
...
...
@@ -42,7 +39,9 @@ class Workspace {
TensorMap
tensor_map_
;
std
::
unique_ptr
<
PreallocatedPooledAllocator
>
preallocated_allocator_
;
std
::
unique_ptr
<
BufferBase
>
tensor_buffer_
;
PreallocatedPooledAllocator
preallocated_allocator_
;
DISABLE_COPY_AND_ASSIGN
(
Workspace
);
};
...
...
mace/kernels/batch_norm.h
浏览文件 @
bcbcbf7a
...
...
@@ -127,7 +127,7 @@ struct BatchNormFunctor : BatchNormFunctorBase {
}
}
}
DoActivation
(
output_ptr
,
output_ptr
,
output
->
NumElements
(),
activation_
,
DoActivation
(
output_ptr
,
output_ptr
,
output
->
size
(),
activation_
,
relux_max_limit_
);
}
};
...
...
mace/kernels/conv_2d.h
浏览文件 @
bcbcbf7a
...
...
@@ -616,7 +616,7 @@ struct Conv2dFunctor : Conv2dFunctorBase {
}
}
}
DoActivation
(
output_data
,
output_data
,
output
->
NumElements
(),
activation_
,
DoActivation
(
output_data
,
output_data
,
output
->
size
(),
activation_
,
relux_max_limit_
);
}
};
...
...
mace/kernels/depthwise_conv2d.h
浏览文件 @
bcbcbf7a
...
...
@@ -402,7 +402,7 @@ struct DepthwiseConv2dFunctor : public DepthwiseConv2dFunctorBase {
valid_h_stop
,
valid_w_start
,
valid_w_stop
);
output_ptr
=
output
->
mutable_data
<
T
>
();
DoActivation
(
output_ptr
,
output_ptr
,
output
->
NumElements
(),
activation_
,
DoActivation
(
output_ptr
,
output_ptr
,
output
->
size
(),
activation_
,
relux_max_limit_
);
}
};
...
...
mace/kernels/fully_connected.h
浏览文件 @
bcbcbf7a
...
...
@@ -65,7 +65,7 @@ struct FullyConnectedFunctor : FullyConnectedBase {
}
}
DoActivation
(
output_ptr
,
output_ptr
,
output
->
NumElements
(),
activation_
,
DoActivation
(
output_ptr
,
output_ptr
,
output
->
size
(),
activation_
,
relux_max_limit_
);
}
};
...
...
mace/kernels/opencl/buffer_to_image.cc
浏览文件 @
bcbcbf7a
...
...
@@ -14,7 +14,6 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
const
BufferType
type
,
Tensor
*
image
,
StatsFuture
*
future
)
{
MACE_CHECK
(
!
buffer
->
is_image
())
<<
"buffer must be buffer-type"
;
std
::
vector
<
size_t
>
image_shape
;
if
(
!
i2b_
)
{
CalImage2DShape
(
buffer
->
shape
(),
type
,
image_shape
);
...
...
@@ -25,9 +24,9 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
}
else
{
image
->
ResizeImage
(
buffer
->
shape
(),
image_shape
);
}
buffer
->
MarkUnused
();
}
else
{
image_shape
=
image
->
image_shape
();
Image
*
image_buf
=
dynamic_cast
<
Image
*>
(
image
->
UnderlyingBuffer
());
image_shape
=
image_buf
->
image_shape
();
buffer
->
Resize
(
image
->
shape
());
}
...
...
@@ -78,7 +77,11 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
built_options
);
uint32_t
idx
=
0
;
b2f_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
buffer
->
buffer
())));
b2f_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Buffer
*>
(
buffer
->
buffer
())));
if
(
!
i2b_
)
{
MACE_CHECK
(
buffer
->
buffer_offset
()
%
GetEnumTypeSize
(
buffer
->
dtype
())
==
0
,
"buffer offset not aligned"
);
b2f_kernel
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
buffer
->
buffer_offset
()
/
GetEnumTypeSize
(
buffer
->
dtype
())));
}
if
(
type
==
ARGUMENT
)
{
b2f_kernel
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
buffer
->
dim
(
0
)));
}
else
if
(
type
==
WEIGHT_HEIGHT
)
{
...
...
mace/kernels/opencl/cl/buffer_to_image.cl
浏览文件 @
bcbcbf7a
#
include
<common.h>
__kernel
void
filter_buffer_to_image
(
__global
const
DATA_TYPE
*input,
/*
h,
w,
oc,
ic
*/
__private
const
int
input_offset,
__private
const
int
filter_w,
__private
const
int
out_channel,
__private
const
int
in_channel,
...
...
@@ -13,7 +14,7 @@ __kernel void filter_buffer_to_image(__global const DATA_TYPE *input, /* h, w, o
const
int
in_channel_idx
=
w
%
rounded_in_channel
;
const
int
h_idx
=
hw_idx
/
filter_w
;
const
int
w_idx
=
hw_idx
%
filter_w
;
const
int
offset
=
((
h_idx
*
filter_w
+
w_idx
)
*
out_channel
+
out_channel_idx
)
*
in_channel
const
int
offset
=
input_offset
+
((
h_idx
*
filter_w
+
w_idx
)
*
out_channel
+
out_channel_idx
)
*
in_channel
+
in_channel_idx
;
VEC_DATA_TYPE
(
DATA_TYPE,
4
)
values
=
0
;
...
...
@@ -79,6 +80,7 @@ __kernel void filter_image_to_buffer(__global DATA_TYPE *output, /* h, w, oc, ic
}
__kernel
void
dw_filter_buffer_to_image
(
__global
const
DATA_TYPE
*input,
/*
h,
w,
ic,
m
*/
__private
const
int
input_offset,
__private
const
int
filter_w,
__private
const
int
in_channel,
__private
const
int
multiplier,
...
...
@@ -92,7 +94,7 @@ __kernel void dw_filter_buffer_to_image(__global const DATA_TYPE *input, /* h, w
const
int
h_idx
=
w
/
filter_w
;
const
int
w_idx
=
w
%
filter_w
;
const
int
offset
=
mad24
(
mad24
(
h_idx,
filter_w,
w_idx
)
,
const
int
offset
=
input_offset
+
mad24
(
mad24
(
h_idx,
filter_w,
w_idx
)
,
in_channel,
in_channel_idx
)
;
const
int
size
=
in_channel
-
in_channel_idx
;
...
...
@@ -117,7 +119,7 @@ __kernel void dw_filter_buffer_to_image(__global const DATA_TYPE *input, /* h, w
const
int
h_idx
=
hw_idx
/
filter_w
;
const
int
w_idx
=
hw_idx
%
filter_w
;
const
int
offset
=
mad24
(
mad24
(
mad24
(
h_idx,
filter_w,
w_idx
)
,
const
int
offset
=
input_offset
+
mad24
(
mad24
(
mad24
(
h_idx,
filter_w,
w_idx
)
,
in_channel,
in_channel_idx
)
,
multiplier,
m
)
;
//
TODO
support
multiplier
>
1
...
...
@@ -128,6 +130,7 @@ __kernel void dw_filter_buffer_to_image(__global const DATA_TYPE *input, /* h, w
}
__kernel
void
in_out_buffer_to_image
(
__global
const
DATA_TYPE
*input,
/*
nhwc
*/
__private
const
int
input_offset,
__private
const
int
height,
__private
const
int
width,
__private
const
int
channels,
...
...
@@ -138,7 +141,7 @@ __kernel void in_out_buffer_to_image(__global const DATA_TYPE *input, /* nhwc */
const
int
height_idx
=
h
%
height
;
const
int
width_idx
=
w
%
width
;
const
int
channel_idx
=
w
/
width
*
4
;
const
int
offset
=
((
batch_idx
*
height
+
height_idx
)
*
width
+
width_idx
)
*
channels
const
int
offset
=
input_offset
+
((
batch_idx
*
height
+
height_idx
)
*
width
+
width_idx
)
*
channels
+
channel_idx
;
const
int
size
=
channels
-
channel_idx
;
...
...
@@ -191,13 +194,16 @@ __kernel void in_out_image_to_buffer(__global DATA_TYPE *output, /* nhwc */
}
__kernel
void
arg_buffer_to_image
(
__global
const
DATA_TYPE
*input,
/*
nhwc
*/
__private
const
int
input_offset,
__private
const
int
count,
__write_only
image2d_t
output
)
{
int
w
=
get_global_id
(
0
)
;
int
h
=
get_global_id
(
1
)
;
const
int
offset
=
w
*
4
;
const
int
size
=
count
-
offset
;
const
int
offset
=
input_offset
+
w
*
4
;
const
int
size
=
count
-
w
*
4
;
VEC_DATA_TYPE
(
DATA_TYPE,
4
)
values
=
0
;
if
(
size
<
4
)
{
switch
(
size
)
{
...
...
@@ -241,6 +247,7 @@ __kernel void arg_image_to_buffer(__global DATA_TYPE *output, /* nhwc */
__kernel
void
in_out_height_buffer_to_image
(
__global
const
DATA_TYPE
*input,
//nhwc
__private
const
int
input_offset,
__private
const
int
height,
__private
const
int
width,
__private
const
int
channels,
...
...
@@ -253,7 +260,7 @@ __kernel void in_out_height_buffer_to_image(__global const DATA_TYPE *input, //n
const
int
height_idx
=
(
h
%
height_blks
)
<<
2
;
const
int
width_idx
=
w
%
width
;
const
int
channel_idx
=
w
/
width
;
int
offset
=
((
batch_idx
*
height
+
height_idx
)
*
width
+
width_idx
)
*
channels
int
offset
=
input_offset
+
((
batch_idx
*
height
+
height_idx
)
*
width
+
width_idx
)
*
channels
+
channel_idx
;
int
size
=
height
-
height_idx
;
...
...
@@ -304,6 +311,7 @@ __kernel void in_out_height_image_to_buffer(__global DATA_TYPE *output, //nhwc
__kernel
void
in_out_width_buffer_to_image
(
__global
const
DATA_TYPE
*input,
/*
nhwc
*/
__private
const
int
input_offset,
__private
const
int
height,
__private
const
int
width,
__private
const
int
channels,
...
...
@@ -314,7 +322,7 @@ __kernel void in_out_width_buffer_to_image(__global const DATA_TYPE *input, /* n
const
int
height_idx
=
h
%
height
;
const
int
width_idx
=
(
w
%
width
)
<<
2
;
const
int
channel_idx
=
w
/
width
;
const
int
offset
=
((
batch_idx
*
height
+
height_idx
)
*
width
+
width_idx
)
*
channels
const
int
offset
=
input_offset
+
((
batch_idx
*
height
+
height_idx
)
*
width
+
width_idx
)
*
channels
+
channel_idx
;
int
size
=
width
-
width_idx
;
...
...
@@ -336,6 +344,7 @@ __kernel void in_out_width_buffer_to_image(__global const DATA_TYPE *input, /* n
//
only
support
3x3
now
__kernel
void
winograd_filter_buffer_to_image
(
__global
const
DATA_TYPE
*input,
//Oc,
Ic,
H,
W
__private
const
int
input_offset,
__private
const
int
in_channels,
__private
const
int
height,
__private
const
int
width,
...
...
@@ -345,7 +354,7 @@ __kernel void winograd_filter_buffer_to_image(__global const DATA_TYPE *input, /
const
int
out_channels
=
get_global_size
(
1
)
;
const
int
out_channel_idx
=
h
;
const
int
in_channel_idx
=
w
<<
2
;
const
int
offset
=
(
out_channel_idx
*
in_channels
+
in_channel_idx
)
*
height
*
width
;
const
int
offset
=
input_offset
+
(
out_channel_idx
*
in_channels
+
in_channel_idx
)
*
height
*
width
;
const
int
length
=
min
((
in_channels
-
in_channel_idx
)
*
9
,
36
)
;
DATA_TYPE
in[36]
=
{0}
;
DATA_TYPE4
tt
;
...
...
mace/kernels/opencl/conv_2d_opencl.cc
浏览文件 @
bcbcbf7a
...
...
@@ -65,13 +65,12 @@ void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
index_t
kernel_h
=
filter
->
dim
(
0
);
index_t
kernel_w
=
filter
->
dim
(
1
);
if
(
!
input
->
is_image
()
||
strides_
[
0
]
!=
strides_
[
1
]
||
if
(
strides_
[
0
]
!=
strides_
[
1
]
||
(
dilations_
[
0
]
>
1
&&
(
strides_
[
0
]
>
1
||
kernel_h
==
1
)))
{
LOG
(
WARNING
)
<<
"OpenCL conv2d kernel with "
<<
"filter"
<<
kernel_h
<<
"x"
<<
kernel_w
<<
","
<<
" stride "
<<
strides_
[
0
]
<<
"x"
<<
strides_
[
1
]
<<
",dilations "
<<
dilations_
[
0
]
<<
"x"
<<
dilations_
[
1
]
<<
" and input image: "
<<
input
->
is_image
()
<<
" is not implemented yet."
;
MACE_NOT_IMPLEMENTED
;
}
...
...
mace/kernels/opencl/resize_bilinear_opencl.cc
浏览文件 @
bcbcbf7a
...
...
@@ -27,13 +27,12 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
if
(
kernel_
.
get
()
==
nullptr
)
{
MACE_CHECK
(
out_height
>
0
&&
out_width
>
0
);
std
::
vector
<
index_t
>
output_shape
{
batch
,
out_height
,
out_width
,
channels
};
if
(
input
->
is_image
())
{
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
}
else
{
output
->
Resize
(
output_shape
);
}
float
height_scale
=
CalculateResizeScale
(
in_height
,
out_height
,
align_corners_
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录