Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
毕竟曾有刹那
Mace
提交
e7a89f6f
Mace
项目概览
毕竟曾有刹那
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
e7a89f6f
编写于
5月 18, 2018
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add Init api in MaceEngine for return error code.
上级
9e2ab0d1
变更
40
隐藏空白更改
内联
并排
Showing
40 changed file
with
340 addition
and
175 deletion
+340
-175
docker/Dockerfile
docker/Dockerfile
+2
-1
docs/getting_started/how_to_build.rst
docs/getting_started/how_to_build.rst
+40
-24
mace/core/allocator.h
mace/core/allocator.h
+32
-20
mace/core/buffer.h
mace/core/buffer.h
+80
-33
mace/core/mace.cc
mace/core/mace.cc
+31
-21
mace/core/runtime/opencl/opencl_allocator.cc
mace/core/runtime/opencl/opencl_allocator.cc
+28
-10
mace/core/runtime/opencl/opencl_allocator.h
mace/core/runtime/opencl/opencl_allocator.h
+4
-3
mace/core/tensor.h
mace/core/tensor.h
+18
-11
mace/core/workspace.cc
mace/core/workspace.cc
+25
-8
mace/core/workspace.h
mace/core/workspace.h
+3
-2
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+0
-3
mace/kernels/opencl/activation.cc
mace/kernels/opencl/activation.cc
+2
-1
mace/kernels/opencl/addn.cc
mace/kernels/opencl/addn.cc
+2
-1
mace/kernels/opencl/batch_norm.cc
mace/kernels/opencl/batch_norm.cc
+2
-1
mace/kernels/opencl/bias_add.cc
mace/kernels/opencl/bias_add.cc
+2
-1
mace/kernels/opencl/buffer_to_image.cc
mace/kernels/opencl/buffer_to_image.cc
+2
-1
mace/kernels/opencl/channel_shuffle.cc
mace/kernels/opencl/channel_shuffle.cc
+2
-1
mace/kernels/opencl/concat.cc
mace/kernels/opencl/concat.cc
+4
-2
mace/kernels/opencl/conv_2d_1x1.cc
mace/kernels/opencl/conv_2d_1x1.cc
+2
-1
mace/kernels/opencl/conv_2d_3x3.cc
mace/kernels/opencl/conv_2d_3x3.cc
+2
-1
mace/kernels/opencl/conv_2d_general.cc
mace/kernels/opencl/conv_2d_general.cc
+2
-1
mace/kernels/opencl/deconv_2d_opencl.cc
mace/kernels/opencl/deconv_2d_opencl.cc
+2
-1
mace/kernels/opencl/depth_to_space.cc
mace/kernels/opencl/depth_to_space.cc
+2
-1
mace/kernels/opencl/depthwise_conv.cc
mace/kernels/opencl/depthwise_conv.cc
+2
-1
mace/kernels/opencl/eltwise.cc
mace/kernels/opencl/eltwise.cc
+2
-1
mace/kernels/opencl/fully_connected.cc
mace/kernels/opencl/fully_connected.cc
+4
-2
mace/kernels/opencl/image_to_buffer.cc
mace/kernels/opencl/image_to_buffer.cc
+2
-1
mace/kernels/opencl/matmul.cc
mace/kernels/opencl/matmul.cc
+2
-1
mace/kernels/opencl/out_of_range_check_test.cc
mace/kernels/opencl/out_of_range_check_test.cc
+2
-1
mace/kernels/opencl/pad.cc
mace/kernels/opencl/pad.cc
+2
-1
mace/kernels/opencl/pooling.cc
mace/kernels/opencl/pooling.cc
+2
-1
mace/kernels/opencl/resize_bilinear.cc
mace/kernels/opencl/resize_bilinear.cc
+2
-1
mace/kernels/opencl/slice.cc
mace/kernels/opencl/slice.cc
+2
-1
mace/kernels/opencl/softmax.cc
mace/kernels/opencl/softmax.cc
+2
-1
mace/kernels/opencl/space_to_batch.cc
mace/kernels/opencl/space_to_batch.cc
+2
-1
mace/kernels/opencl/winograd_transform.cc
mace/kernels/opencl/winograd_transform.cc
+4
-2
mace/public/mace.h
mace/public/mace.h
+10
-5
mace/python/tools/mace_engine_factory.h.jinja2
mace/python/tools/mace_engine_factory.h.jinja2
+5
-4
mace/test/mace_api_mt_test.cc
mace/test/mace_api_mt_test.cc
+3
-1
mace/test/mace_api_test.cc
mace/test/mace_api_test.cc
+3
-1
未找到文件。
docker/Dockerfile
浏览文件 @
e7a89f6f
...
...
@@ -110,7 +110,8 @@ RUN apt-get install -y --no-install-recommends \
# Install tools
RUN
pip
install
-i
http://pypi.douban.com/simple/
--trusted-host
pypi.douban.com setuptools
RUN
pip
install
-i
http://pypi.douban.com/simple/
--trusted-host
pypi.douban.com
tensorflow
==
1.6.0
\
RUN
pip
install
-i
http://pypi.douban.com/simple/
--trusted-host
pypi.douban.com
tensorflow
==
1.7.0
\
numpy>
=
1.14.0
\
scipy
\
jinja2
\
pyyaml
\
...
...
docs/getting_started/how_to_build.rst
浏览文件 @
e7a89f6f
...
...
@@ -33,11 +33,13 @@ How to build
+=====================+=================+===================================================================================================+
| bazel | >= 0.5.4 | - |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
| android-ndk | r1
2c
| - |
| android-ndk | r1
5c,r16b
| - |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
| adb | >= 1.0.32 | apt install -y android-tools-adb |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
| tensorflow | 1.4.0 | pip install tensorflow==1.4.0 |
| tensorflow | 1.7.0 | pip install tensorflow==1.7.0 |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
| numpy | >= 1.14.0 | pip install numpy |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
| scipy | >= 1.0.0 | pip install scipy |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
...
...
@@ -45,6 +47,10 @@ How to build
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
| PyYaml | >= 3.12 | pip install pyyaml |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
| sh | >= 1.12.14 | pip install sh |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
| filelock | >= 3.0.0 | pip install filelock |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
| docker(for caffe) | >= 17.09.0-ce | `install doc <https://docs.docker.com/install/linux/docker-ce/ubuntu/#set-up-the-repository>`__ |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
...
...
@@ -229,29 +235,47 @@ Caffe目前只支持最新版本,旧版本请使用Caffe的工具进行升级
// 引入头文件
#include "mace/public/mace.h"
#include "mace/public/
{MODEL_TAG}
.h"
#include "mace/public/
mace_engine_factory
.h"
// 0. 设置内部存储
// 0. 设置内部存储
(设置一次即可)
const std::string file_path ="/path/to/store/internel/files";
std::shared_ptr<KVStorageFactory> storage_factory(
new FileStorageFactory(file_path));
ConfigKVStorageFactory(storage_factory);
//1. 从文件或代码中Load模型数据,也可通过自定义的方式来Load (例如可自己实现压缩加密等)
// 如果使用的是数据嵌入的方式,将参数设为nullptr。
unsigned char *model_data = mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str());
//1. 声明设备类型(必须与build时指定的runtime一致)
DeviceType device_type = DeviceType::GPU;
//2. 创建net对象
NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data);
//3. 声明设备类型(必须与build时指定的runtime一致)
DeviceType device_type = DeviceType::OPENCL;
//4. 定义输入输出名称数组
//2. 定义输入输出名称数组
std::vector<std::string> input_names = {...};
std::vector<std::string> output_names = {...};
//5. 创建输入输出对象
//3. 创建MaceEngine对象
std::shared_ptr<mace::MaceEngine> engine;
MaceStatus create_engine_status;
// Create Engine
if (model_data_file.empty()) {
create_engine_status =
CreateMaceEngine(model_name.c_str(),
nullptr,
input_names,
output_names,
device_type,
&engine);
} else {
create_engine_status =
CreateMaceEngine(model_name.c_str(),
model_data_file.c_str(),
input_names,
output_names,
device_type,
&engine);
}
if (create_engine_status != MaceStatus::MACE_SUCCESS) {
// do something
}
//4. 创建输入输出对象
std::map<std::string, mace::MaceTensor> inputs;
std::map<std::string, mace::MaceTensor> outputs;
for (size_t i = 0; i < input_count; ++i) {
...
...
@@ -276,14 +300,6 @@ Caffe目前只支持最新版本,旧版本请使用Caffe的工具进行升级
outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out);
}
//6. 创建MaceEngine对象
mace::MaceEngine engine(&net_def, device_type, input_names, output_names);
//7. 如果设备类型是OPENCL或HEXAGON,可以在此释放model_data
if (device_type == DeviceType::OPENCL || device_type == DeviceType::HEXAGON) {
mace::MACE_MODEL_TAG::UnloadModelData(model_data);
}
//8. 执行模型,得到结果
//5. 执行模型,得到结果
engine.Run(inputs, &outputs);
mace/core/allocator.h
浏览文件 @
e7a89f6f
...
...
@@ -16,6 +16,7 @@
#define MACE_CORE_ALLOCATOR_H_
#include <stdlib.h>
#include <string.h>
#include <map>
#include <limits>
#include <vector>
...
...
@@ -42,9 +43,10 @@ class Allocator {
public:
Allocator
()
{}
virtual
~
Allocator
()
noexcept
{}
virtual
void
*
New
(
size_t
nbytes
)
const
=
0
;
virtual
void
*
NewImage
(
const
std
::
vector
<
size_t
>
&
image_shape
,
const
DataType
dt
)
const
=
0
;
virtual
MaceStatus
New
(
size_t
nbytes
,
void
**
result
)
const
=
0
;
virtual
MaceStatus
NewImage
(
const
std
::
vector
<
size_t
>
&
image_shape
,
const
DataType
dt
,
void
**
result
)
const
=
0
;
virtual
void
Delete
(
void
*
data
)
const
=
0
;
virtual
void
DeleteImage
(
void
*
data
)
const
=
0
;
virtual
void
*
Map
(
void
*
buffer
,
size_t
offset
,
size_t
nbytes
)
const
=
0
;
...
...
@@ -53,44 +55,54 @@ class Allocator {
std
::
vector
<
size_t
>
*
mapped_image_pitch
)
const
=
0
;
virtual
void
Unmap
(
void
*
buffer
,
void
*
mapper_ptr
)
const
=
0
;
virtual
bool
OnHost
()
const
=
0
;
template
<
typename
T
>
T
*
New
(
size_t
num_elements
)
{
if
(
num_elements
>
(
std
::
numeric_limits
<
size_t
>::
max
()
/
sizeof
(
T
)))
{
return
nullptr
;
}
void
*
p
=
New
(
sizeof
(
T
)
*
num_elements
);
T
*
typed_p
=
reinterpret_cast
<
T
*>
(
p
);
return
typed_p
;
}
};
class
CPUAllocator
:
public
Allocator
{
public:
~
CPUAllocator
()
override
{}
void
*
New
(
size_t
nbytes
)
const
override
{
MaceStatus
New
(
size_t
nbytes
,
void
**
result
)
const
override
{
VLOG
(
3
)
<<
"Allocate CPU buffer: "
<<
nbytes
;
if
(
nbytes
==
0
)
{
return
MaceStatus
::
MACE_SUCCESS
;
}
void
*
data
=
nullptr
;
#if defined(__ANDROID__) || defined(__hexagon__)
data
=
memalign
(
kMaceAlignment
,
nbytes
);
if
(
data
==
NULL
)
{
LOG
(
WARNING
)
<<
"Allocate CPU Buffer with "
<<
nbytes
<<
" bytes failed because of"
<<
strerror
(
errno
);
*
result
=
nullptr
;
return
MaceStatus
::
MACE_OUT_OF_RESOURCES
;
}
#else
MACE_CHECK
(
posix_memalign
(
&
data
,
kMaceAlignment
,
nbytes
)
==
0
);
int
ret
=
posix_memalign
(
&
data
,
kMaceAlignment
,
nbytes
);
if
(
ret
!=
0
)
{
LOG
(
WARNING
)
<<
"Allocate CPU Buffer with "
<<
nbytes
<<
" bytes failed because of"
<<
strerror
(
errno
);
*
result
=
nullptr
;
return
MaceStatus
::
MACE_OUT_OF_RESOURCES
;
}
#endif
MACE_CHECK_NOTNULL
(
data
);
// TODO(heliangliang) This should be avoided sometimes
memset
(
data
,
0
,
nbytes
);
return
data
;
*
result
=
data
;
return
MaceStatus
::
MACE_SUCCESS
;
}
void
*
NewImage
(
const
std
::
vector
<
size_t
>
&
shape
,
const
DataType
dt
)
const
override
{
MaceStatus
NewImage
(
const
std
::
vector
<
size_t
>
&
shape
,
const
DataType
dt
,
void
**
result
)
const
override
{
MACE_UNUSED
(
shape
);
MACE_UNUSED
(
dt
);
MACE_UNUSED
(
result
);
LOG
(
FATAL
)
<<
"Allocate CPU image"
;
return
nullptr
;
return
MaceStatus
::
MACE_SUCCESS
;
}
void
Delete
(
void
*
data
)
const
override
{
MACE_CHECK_NOTNULL
(
data
);
VLOG
(
3
)
<<
"Free CPU buffer"
;
free
(
data
);
}
...
...
mace/core/buffer.h
浏览文件 @
e7a89f6f
...
...
@@ -38,6 +38,11 @@ class BufferBase {
virtual
void
*
raw_mutable_data
()
=
0
;
virtual
MaceStatus
Allocate
(
index_t
nbytes
)
=
0
;
virtual
MaceStatus
Allocate
(
const
std
::
vector
<
size_t
>
&
shape
,
DataType
data_type
)
=
0
;
virtual
void
*
Map
(
index_t
offset
,
index_t
length
,
std
::
vector
<
size_t
>
*
pitch
)
const
=
0
;
...
...
@@ -48,7 +53,7 @@ class BufferBase {
virtual
void
UnMap
()
=
0
;
virtual
void
Resize
(
index_t
size
)
=
0
;
virtual
MaceStatus
Resize
(
index_t
nbytes
)
=
0
;
virtual
void
Copy
(
void
*
src
,
index_t
offset
,
index_t
length
)
=
0
;
...
...
@@ -83,14 +88,6 @@ class Buffer : public BufferBase {
mapped_buf_
(
nullptr
),
is_data_owner_
(
true
)
{}
Buffer
(
Allocator
*
allocator
,
index_t
size
)
:
BufferBase
(
size
),
allocator_
(
allocator
),
mapped_buf_
(
nullptr
),
is_data_owner_
(
true
)
{
buf_
=
allocator
->
New
(
size
);
}
Buffer
(
Allocator
*
allocator
,
void
*
data
,
index_t
size
)
:
BufferBase
(
size
),
allocator_
(
allocator
),
...
...
@@ -132,6 +129,31 @@ class Buffer : public BufferBase {
}
}
MaceStatus
Allocate
(
index_t
nbytes
)
{
if
(
nbytes
<=
0
)
{
return
MaceStatus
::
MACE_SUCCESS
;
}
MACE_CHECK
(
is_data_owner_
,
"data is not owned by this buffer, cannot reallocate"
);
if
(
mapped_buf_
!=
nullptr
)
{
UnMap
();
}
if
(
buf_
!=
nullptr
)
{
allocator_
->
Delete
(
buf_
);
}
size_
=
nbytes
;
return
allocator_
->
New
(
nbytes
,
&
buf_
);
}
MaceStatus
Allocate
(
const
std
::
vector
<
size_t
>
&
shape
,
DataType
data_type
)
{
if
(
shape
.
empty
())
return
MaceStatus
::
MACE_SUCCESS
;
index_t
nbytes
=
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
std
::
multiplies
<
size_t
>
())
*
GetEnumTypeSize
(
data_type
);
return
this
->
Allocate
(
nbytes
);
}
void
*
Map
(
index_t
offset
,
index_t
length
,
std
::
vector
<
size_t
>
*
pitch
)
const
{
MACE_CHECK_NOTNULL
(
buf_
);
MACE_UNUSED
(
pitch
);
...
...
@@ -154,16 +176,17 @@ class Buffer : public BufferBase {
mapped_buf_
=
nullptr
;
}
void
Resize
(
index_t
size
)
{
MaceStatus
Resize
(
index_t
nbytes
)
{
MACE_CHECK
(
is_data_owner_
,
"data is not owned by this buffer, cannot resize"
);
if
(
size
!=
size_
)
{
if
(
nbytes
!=
size_
)
{
if
(
buf_
!=
nullptr
)
{
allocator_
->
Delete
(
buf_
);
}
size_
=
size
;
buf_
=
allocator_
->
New
(
size
);
size_
=
nbytes
;
return
allocator_
->
New
(
nbytes
,
&
buf_
);
}
return
MaceStatus
::
MACE_SUCCESS
;
}
void
Copy
(
void
*
src
,
index_t
offset
,
index_t
length
)
{
...
...
@@ -195,18 +218,6 @@ class Image : public BufferBase {
buf_
(
nullptr
),
mapped_buf_
(
nullptr
)
{}
Image
(
std
::
vector
<
size_t
>
shape
,
DataType
data_type
)
:
BufferBase
(
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
std
::
multiplies
<
index_t
>
())
*
GetEnumTypeSize
(
data_type
)),
allocator_
(
GetDeviceAllocator
(
GPU
)),
mapped_buf_
(
nullptr
)
{
shape_
=
shape
;
data_type_
=
data_type
;
buf_
=
allocator_
->
NewImage
(
shape
,
data_type
);
}
virtual
~
Image
()
{
if
(
mapped_buf_
!=
nullptr
)
{
UnMap
();
...
...
@@ -233,6 +244,29 @@ class Image : public BufferBase {
std
::
vector
<
size_t
>
image_shape
()
const
{
return
shape_
;
}
MaceStatus
Allocate
(
index_t
nbytes
)
{
MACE_UNUSED
(
nbytes
);
LOG
(
FATAL
)
<<
"Image should not call this allocate function"
;
return
MaceStatus
::
MACE_SUCCESS
;
}
MaceStatus
Allocate
(
const
std
::
vector
<
size_t
>
&
shape
,
DataType
data_type
)
{
index_t
size
=
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
std
::
multiplies
<
index_t
>
())
*
GetEnumTypeSize
(
data_type
);
if
(
mapped_buf_
!=
nullptr
)
{
UnMap
();
}
if
(
buf_
!=
nullptr
)
{
allocator_
->
DeleteImage
(
buf_
);
}
size_
=
size
;
shape_
=
shape
;
data_type_
=
data_type
;
return
allocator_
->
NewImage
(
shape
,
data_type
,
&
buf_
);
}
void
*
Map
(
index_t
offset
,
index_t
length
,
std
::
vector
<
size_t
>
*
pitch
)
const
{
MACE_UNUSED
(
offset
);
MACE_UNUSED
(
length
);
...
...
@@ -259,9 +293,10 @@ class Image : public BufferBase {
mapped_buf_
=
nullptr
;
}
void
Resize
(
index_t
size
)
{
MaceStatus
Resize
(
index_t
size
)
{
MACE_UNUSED
(
size
);
MACE_NOT_IMPLEMENTED
;
return
MaceStatus
::
MACE_SUCCESS
;
}
void
Copy
(
void
*
src
,
index_t
offset
,
index_t
length
)
{
...
...
@@ -339,6 +374,20 @@ class BufferSlice : public BufferBase {
}
}
MaceStatus
Allocate
(
index_t
size
)
{
MACE_UNUSED
(
size
);
LOG
(
FATAL
)
<<
"BufferSlice should not call allocate function"
;
return
MaceStatus
::
MACE_SUCCESS
;
}
MaceStatus
Allocate
(
const
std
::
vector
<
size_t
>
&
shape
,
DataType
data_type
)
{
MACE_UNUSED
(
shape
);
MACE_UNUSED
(
data_type
);
LOG
(
FATAL
)
<<
"BufferSlice should not call allocate function"
;
return
MaceStatus
::
MACE_SUCCESS
;
}
void
*
Map
(
index_t
offset
,
index_t
length
,
std
::
vector
<
size_t
>
*
pitch
)
const
{
MACE_UNUSED
(
offset
);
MACE_UNUSED
(
length
);
...
...
@@ -364,9 +413,10 @@ class BufferSlice : public BufferBase {
mapped_buf_
=
nullptr
;
}
void
Resize
(
index_t
size
)
{
MaceStatus
Resize
(
index_t
size
)
{
MACE_CHECK
(
size
==
size_
,
"resize buffer slice from "
,
size_
,
" to "
,
size
,
" is illegal"
);
return
MaceStatus
::
MACE_SUCCESS
;
}
void
Copy
(
void
*
src
,
index_t
offset
,
index_t
length
)
{
...
...
@@ -396,20 +446,17 @@ class ScratchBuffer: public Buffer {
:
Buffer
(
allocator
),
offset_
(
0
)
{}
ScratchBuffer
(
Allocator
*
allocator
,
index_t
size
)
:
Buffer
(
allocator
,
size
),
offset_
(
0
)
{}
ScratchBuffer
(
Allocator
*
allocator
,
void
*
data
,
index_t
size
)
:
Buffer
(
allocator
,
data
,
size
),
offset_
(
0
)
{}
virtual
~
ScratchBuffer
()
{}
void
GrowSize
(
index_t
size
)
{
MaceStatus
GrowSize
(
index_t
size
)
{
if
(
size
>
size_
)
{
Resize
(
size
);
return
Resize
(
size
);
}
return
MaceStatus
::
MACE_SUCCESS
;
}
BufferSlice
Scratch
(
index_t
size
)
{
...
...
mace/core/mace.cc
浏览文件 @
e7a89f6f
...
...
@@ -82,12 +82,14 @@ std::shared_ptr<float> MaceTensor::data() { return impl_->data; }
// Mace Engine
class
MaceEngine
::
Impl
{
public:
explicit
Impl
(
const
NetDef
*
net_def
,
DeviceType
device_type
,
const
std
::
vector
<
std
::
string
>
&
input_nodes
,
const
std
::
vector
<
std
::
string
>
&
output_nodes
);
explicit
Impl
(
DeviceType
device_type
);
~
Impl
();
MaceStatus
Init
(
const
NetDef
*
net_def
,
const
std
::
vector
<
std
::
string
>
&
input_nodes
,
const
std
::
vector
<
std
::
string
>
&
output_nodes
);
MaceStatus
Run
(
const
std
::
map
<
std
::
string
,
MaceTensor
>
&
inputs
,
std
::
map
<
std
::
string
,
MaceTensor
>
*
outputs
,
RunMetadata
*
run_metadata
);
...
...
@@ -104,10 +106,8 @@ class MaceEngine::Impl {
DISABLE_COPY_AND_ASSIGN
(
Impl
);
};
MaceEngine
::
Impl
::
Impl
(
const
NetDef
*
net_def
,
DeviceType
device_type
,
const
std
::
vector
<
std
::
string
>
&
input_nodes
,
const
std
::
vector
<
std
::
string
>
&
output_nodes
)
MaceEngine
::
Impl
::
Impl
(
DeviceType
device_type
)
:
op_registry_
(
new
OperatorRegistry
()),
device_type_
(
device_type
),
ws_
(
new
Workspace
()),
...
...
@@ -115,7 +115,12 @@ MaceEngine::Impl::Impl(const NetDef *net_def,
#ifdef MACE_ENABLE_HEXAGON
,
hexagon_controller_
(
nullptr
)
#endif
{
{}
MaceStatus
MaceEngine
::
Impl
::
Init
(
const
NetDef
*
net_def
,
const
std
::
vector
<
std
::
string
>
&
input_nodes
,
const
std
::
vector
<
std
::
string
>
&
output_nodes
)
{
LOG
(
INFO
)
<<
"MACE version: "
<<
MaceVersion
();
// Set storage path for internal usage
for
(
auto
input_name
:
input_nodes
)
{
...
...
@@ -127,7 +132,7 @@ MaceEngine::Impl::Impl(const NetDef *net_def,
GetDeviceAllocator
(
device_type_
),
DT_FLOAT
);
}
#ifdef MACE_ENABLE_HEXAGON
if
(
device_type
==
HEXAGON
)
{
if
(
device_type
_
==
HEXAGON
)
{
hexagon_controller_
.
reset
(
new
HexagonControlWrapper
());
MACE_CHECK
(
hexagon_controller_
->
Config
(),
"hexagon config error"
);
MACE_CHECK
(
hexagon_controller_
->
Init
(),
"hexagon init error"
);
...
...
@@ -143,18 +148,22 @@ MaceEngine::Impl::Impl(const NetDef *net_def,
}
}
else
{
#endif
ws_
->
LoadModelTensor
(
*
net_def
,
device_type
);
MaceStatus
status
=
ws_
->
LoadModelTensor
(
*
net_def
,
device_type_
);
if
(
status
!=
MaceStatus
::
MACE_SUCCESS
)
{
return
status
;
}
// Init model
auto
net
=
CreateNet
(
op_registry_
,
*
net_def
,
ws_
.
get
(),
device_type
,
// Init model
auto
net
=
CreateNet
(
op_registry_
,
*
net_def
,
ws_
.
get
(),
device_type
_
,
NetMode
::
INIT
);
if
(
!
net
->
Run
())
{
LOG
(
FATAL
)
<<
"Net init run failed"
;
}
net_
=
CreateNet
(
op_registry_
,
*
net_def
,
ws_
.
get
(),
device_type
);
net_
=
CreateNet
(
op_registry_
,
*
net_def
,
ws_
.
get
(),
device_type
_
);
#ifdef MACE_ENABLE_HEXAGON
}
#endif
return
MaceStatus
::
MACE_SUCCESS
;
}
MaceEngine
::
Impl
::~
Impl
()
{
...
...
@@ -244,16 +253,17 @@ MaceStatus MaceEngine::Impl::Run(
return
MACE_SUCCESS
;
}
MaceEngine
::
MaceEngine
(
const
NetDef
*
net_def
,
DeviceType
device_type
,
const
std
::
vector
<
std
::
string
>
&
input_nodes
,
const
std
::
vector
<
std
::
string
>
&
output_nodes
)
{
impl_
=
std
::
unique_ptr
<
MaceEngine
::
Impl
>
(
new
MaceEngine
::
Impl
(
net_def
,
device_type
,
input_nodes
,
output_nodes
));
}
MaceEngine
::
MaceEngine
(
DeviceType
device_type
)
:
impl_
(
new
MaceEngine
::
Impl
(
device_type
))
{}
MaceEngine
::~
MaceEngine
()
=
default
;
MaceStatus
MaceEngine
::
Init
(
const
NetDef
*
net_def
,
const
std
::
vector
<
std
::
string
>
&
input_nodes
,
const
std
::
vector
<
std
::
string
>
&
output_nodes
)
{
return
impl_
->
Init
(
net_def
,
input_nodes
,
output_nodes
);
}
MaceStatus
MaceEngine
::
Run
(
const
std
::
map
<
std
::
string
,
MaceTensor
>
&
inputs
,
std
::
map
<
std
::
string
,
MaceTensor
>
*
outputs
,
RunMetadata
*
run_metadata
)
{
...
...
mace/core/runtime/opencl/opencl_allocator.cc
浏览文件 @
e7a89f6f
...
...
@@ -44,18 +44,30 @@ static cl_channel_type DataTypeToCLChannelType(const DataType t) {
OpenCLAllocator
::
OpenCLAllocator
()
{}
OpenCLAllocator
::~
OpenCLAllocator
()
{}
void
*
OpenCLAllocator
::
New
(
size_t
nbytes
)
const
{
MaceStatus
OpenCLAllocator
::
New
(
size_t
nbytes
,
void
**
result
)
const
{
if
(
nbytes
==
0
)
{
return
MaceStatus
::
MACE_SUCCESS
;
}
VLOG
(
3
)
<<
"Allocate OpenCL buffer: "
<<
nbytes
;
cl_int
error
;
cl
::
Buffer
*
buffer
=
new
cl
::
Buffer
(
OpenCLRuntime
::
Global
()
->
context
(),
CL_MEM_READ_WRITE
|
CL_MEM_ALLOC_HOST_PTR
,
nbytes
,
nullptr
,
&
error
);
MACE_CHECK_CL_SUCCESS
(
error
);
return
static_cast
<
void
*>
(
buffer
);
if
(
error
!=
CL_SUCCESS
)
{
LOG
(
WARNING
)
<<
"Allocate OpenCL Buffer with "
<<
nbytes
<<
" bytes failed because of"
<<
OpenCLErrorToString
(
error
);
*
result
=
nullptr
;
return
MaceStatus
::
MACE_OUT_OF_RESOURCES
;
}
else
{
*
result
=
buffer
;
return
MaceStatus
::
MACE_SUCCESS
;
}
}
void
*
OpenCLAllocator
::
NewImage
(
const
std
::
vector
<
size_t
>
&
image_shape
,
const
DataType
dt
)
const
{
MaceStatus
OpenCLAllocator
::
NewImage
(
const
std
::
vector
<
size_t
>
&
image_shape
,
const
DataType
dt
,
void
**
result
)
const
{
MACE_CHECK
(
image_shape
.
size
()
==
2
)
<<
"Image shape's size must equal 2"
;
VLOG
(
3
)
<<
"Allocate OpenCL image: "
<<
image_shape
[
0
]
<<
", "
<<
image_shape
[
1
];
...
...
@@ -67,11 +79,17 @@ void *OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
new
cl
::
Image2D
(
OpenCLRuntime
::
Global
()
->
context
(),
CL_MEM_READ_WRITE
|
CL_MEM_ALLOC_HOST_PTR
,
img_format
,
image_shape
[
0
],
image_shape
[
1
],
0
,
nullptr
,
&
error
);
MACE_CHECK_CL_SUCCESS
(
error
)
<<
" with image shape: ["
<<
image_shape
[
0
]
<<
", "
<<
image_shape
[
1
]
<<
"]"
;
return
cl_image
;
if
(
error
!=
CL_SUCCESS
)
{
LOG
(
WARNING
)
<<
"Allocate OpenCL image with shape: ["
<<
image_shape
[
0
]
<<
", "
<<
image_shape
[
1
]
<<
"] failed because of"
<<
OpenCLErrorToString
(
error
);
*
result
=
nullptr
;
return
MaceStatus
::
MACE_OUT_OF_RESOURCES
;
}
else
{
*
result
=
cl_image
;
return
MaceStatus
::
MACE_SUCCESS
;
}
}
void
OpenCLAllocator
::
Delete
(
void
*
buffer
)
const
{
...
...
mace/core/runtime/opencl/opencl_allocator.h
浏览文件 @
e7a89f6f
...
...
@@ -27,15 +27,16 @@ class OpenCLAllocator : public Allocator {
~
OpenCLAllocator
()
override
;
void
*
New
(
size_t
nbytes
)
const
override
;
MaceStatus
New
(
size_t
nbytes
,
void
**
result
)
const
override
;
/*
* Use Image2D with RGBA (128-bit) format to represent the image.
*
* @ shape : [depth, ..., height, width ].
*/
void
*
NewImage
(
const
std
::
vector
<
size_t
>
&
image_shape
,
const
DataType
dt
)
const
override
;
MaceStatus
NewImage
(
const
std
::
vector
<
size_t
>
&
image_shape
,
const
DataType
dt
,
void
**
result
)
const
override
;
void
Delete
(
void
*
buffer
)
const
override
;
...
...
mace/core/tensor.h
浏览文件 @
e7a89f6f
...
...
@@ -216,16 +216,19 @@ class Tensor {
MACE_CHECK
(
raw_size
()
<=
buffer_
->
size
());
}
inline
void
Resize
(
const
std
::
vector
<
index_t
>
&
shape
)
{
inline
MaceStatus
Resize
(
const
std
::
vector
<
index_t
>
&
shape
)
{
shape_
=
shape
;
image_shape_
.
clear
();
if
(
buffer_
!=
nullptr
)
{
MACE_CHECK
(
!
has_opencl_image
(),
"Cannot resize image, use ResizeImage."
);
if
(
raw_size
()
+
EXTRA_BUFFER_PAD_SIZE
>
buffer_
->
size
())
buffer_
->
Resize
(
raw_size
()
+
EXTRA_BUFFER_PAD_SIZE
);
if
(
raw_size
()
+
EXTRA_BUFFER_PAD_SIZE
>
buffer_
->
size
())
{
return
buffer_
->
Resize
(
raw_size
()
+
EXTRA_BUFFER_PAD_SIZE
);
}
return
MaceStatus
::
MACE_SUCCESS
;
}
else
{
MACE_CHECK
(
is_buffer_owner_
);
buffer_
=
new
Buffer
(
allocator_
,
raw_size
()
+
EXTRA_BUFFER_PAD_SIZE
);
buffer_
=
new
Buffer
(
allocator_
);
return
buffer_
->
Allocate
(
raw_size
()
+
EXTRA_BUFFER_PAD_SIZE
);
}
}
...
...
@@ -241,13 +244,14 @@ class Tensor {
is_buffer_owner_
=
false
;
}
inline
void
ResizeImage
(
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
size_t
>
&
image_shape
)
{
inline
MaceStatus
ResizeImage
(
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
size_t
>
&
image_shape
)
{
shape_
=
shape
;
image_shape_
=
image_shape
;
if
(
buffer_
==
nullptr
)
{
MACE_CHECK
(
is_buffer_owner_
);
buffer_
=
new
Image
(
image_shape
,
dtype_
);
buffer_
=
new
Image
();
return
buffer_
->
Allocate
(
image_shape
,
dtype_
);
}
else
{
MACE_CHECK
(
has_opencl_image
(),
"Cannot ResizeImage buffer, use Resize."
);
Image
*
image
=
dynamic_cast
<
Image
*>
(
buffer_
);
...
...
@@ -257,24 +261,27 @@ class Tensor {
"): current physical image shape: "
,
image
->
image_shape
()[
0
],
", "
,
image
->
image_shape
()[
1
],
" < logical image shape: "
,
image_shape
[
0
],
", "
,
image_shape
[
1
]);
return
MaceStatus
::
MACE_SUCCESS
;
}
}
inline
void
ResizeLike
(
const
Tensor
&
other
)
{
ResizeLike
(
&
other
);
}
inline
MaceStatus
ResizeLike
(
const
Tensor
&
other
)
{
return
ResizeLike
(
&
other
);
}
inline
void
ResizeLike
(
const
Tensor
*
other
)
{
inline
MaceStatus
ResizeLike
(
const
Tensor
*
other
)
{
if
(
other
->
has_opencl_image
())
{
if
(
is_buffer_owner_
&&
buffer_
!=
nullptr
&&
!
has_opencl_image
())
{
delete
buffer_
;
buffer_
=
nullptr
;
}
ResizeImage
(
other
->
shape
(),
other
->
image_shape_
);
return
ResizeImage
(
other
->
shape
(),
other
->
image_shape_
);
}
else
{
if
(
is_buffer_owner_
&&
buffer_
!=
nullptr
&&
has_opencl_image
())
{
delete
buffer_
;
buffer_
=
nullptr
;
}
Resize
(
other
->
shape
());
return
Resize
(
other
->
shape
());
}
}
...
...
mace/core/workspace.cc
浏览文件 @
e7a89f6f
...
...
@@ -60,7 +60,7 @@ std::vector<std::string> Workspace::Tensors() const {
return
names
;
}
void
Workspace
::
LoadModelTensor
(
const
NetDef
&
net_def
,
DeviceType
type
)
{
MaceStatus
Workspace
::
LoadModelTensor
(
const
NetDef
&
net_def
,
DeviceType
type
)
{
MACE_LATENCY_LOGGER
(
1
,
"Load model tensors"
);
index_t
model_data_size
=
0
;
unsigned
char
*
model_data_ptr
=
nullptr
;
...
...
@@ -89,7 +89,11 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
model_data_size
));
}
else
{
tensor_buffer_
=
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
type
),
model_data_size
));
new
Buffer
(
GetDeviceAllocator
(
type
)));
MaceStatus
status
=
tensor_buffer_
->
Allocate
(
model_data_size
);
if
(
status
!=
MaceStatus
::
MACE_SUCCESS
)
{
return
status
;
}
tensor_buffer_
->
Map
(
nullptr
);
tensor_buffer_
->
Copy
(
model_data_ptr
,
0
,
model_data_size
);
tensor_buffer_
->
UnMap
();
...
...
@@ -120,14 +124,16 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
}
if
(
type
==
DeviceType
::
CPU
||
type
==
DeviceType
::
GPU
)
{
CreateOutputTensorBuffer
(
net_def
,
type
);
MaceStatus
status
=
CreateOutputTensorBuffer
(
net_def
,
type
);
if
(
status
!=
MaceStatus
::
MACE_SUCCESS
)
return
status
;
}
return
MaceStatus
::
MACE_SUCCESS
;
}
void
Workspace
::
CreateOutputTensorBuffer
(
const
NetDef
&
net_def
,
DeviceType
device_type
)
{
MaceStatus
Workspace
::
CreateOutputTensorBuffer
(
const
NetDef
&
net_def
,
DeviceType
device_type
)
{
if
(
!
net_def
.
has_mem_arena
()
||
net_def
.
mem_arena
().
mem_block_size
()
==
0
)
{
return
;
return
MaceStatus
::
MACE_SUCCESS
;
}
DataType
dtype
=
DataType
::
DT_INVALID
;
...
...
@@ -157,14 +163,24 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
// TODO(liuqi): refactor based on PB
if
(
mem_block
.
mem_id
()
>=
20000
)
{
std
::
unique_ptr
<
BufferBase
>
image_buf
(
new
Image
({
mem_block
.
x
(),
mem_block
.
y
()},
dtype
));
new
Image
());
MaceStatus
status
=
image_buf
->
Allocate
(
{
mem_block
.
x
(),
mem_block
.
y
()},
dtype
);
if
(
status
!=
MaceStatus
::
MACE_SUCCESS
)
{
return
status
;
}
preallocated_allocator_
.
SetBuffer
(
mem_block
.
mem_id
(),
std
::
move
(
image_buf
));
}
}
else
{
if
(
mem_block
.
mem_id
()
<
20000
)
{
std
::
unique_ptr
<
BufferBase
>
tensor_buf
(
new
Buffer
(
GetDeviceAllocator
(
device_type
),
mem_block
.
x
()));
new
Buffer
(
GetDeviceAllocator
(
device_type
)));
MaceStatus
status
=
tensor_buf
->
Allocate
(
mem_block
.
x
()
*
GetEnumTypeSize
(
dtype
));
if
(
status
!=
MaceStatus
::
MACE_SUCCESS
)
{
return
status
;
}
preallocated_allocator_
.
SetBuffer
(
mem_block
.
mem_id
(),
std
::
move
(
tensor_buf
));
}
...
...
@@ -201,6 +217,7 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
}
}
}
return
MaceStatus
::
MACE_SUCCESS
;
}
ScratchBuffer
*
Workspace
::
GetScratchBuffer
(
DeviceType
device_type
)
{
...
...
mace/core/workspace.h
浏览文件 @
e7a89f6f
...
...
@@ -47,12 +47,13 @@ class Workspace {
std
::
vector
<
std
::
string
>
Tensors
()
const
;
void
LoadModelTensor
(
const
NetDef
&
net_def
,
DeviceType
type
);
MaceStatus
LoadModelTensor
(
const
NetDef
&
net_def
,
DeviceType
type
);
ScratchBuffer
*
GetScratchBuffer
(
DeviceType
device_type
);
private:
void
CreateOutputTensorBuffer
(
const
NetDef
&
net_def
,
DeviceType
device_type
);
MaceStatus
CreateOutputTensorBuffer
(
const
NetDef
&
net_def
,
DeviceType
device_type
);
TensorMap
tensor_map_
;
...
...
mace/kernels/conv_2d.h
浏览文件 @
e7a89f6f
...
...
@@ -99,7 +99,6 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
#pragma omp parallel for collapse(2)
for
(
index_t
b
=
0
;
b
<
in_shape
[
0
];
b
++
)
{
for
(
index_t
m
=
0
;
m
<
filter_shape
[
0
];
m
+=
4
)
{
const
index_t
in_height
=
in_shape
[
2
];
const
index_t
in_width
=
in_shape
[
3
];
const
index_t
out_height
=
out_shape
[
2
];
const
index_t
out_width
=
out_shape
[
3
];
...
...
@@ -322,8 +321,6 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
index_t
dilation_h
=
dilations_
[
0
];
index_t
dilation_w
=
dilations_
[
1
];
const
index_t
filter_hw
[
2
]
=
{
filter_h
,
filter_w
};
MACE_CHECK
(
batch
==
input_batch
,
"Input/Output batch size mismatch"
);
index_t
padded_input_height
=
input_height
+
paddings
[
0
];
...
...
mace/kernels/opencl/activation.cc
浏览文件 @
e7a89f6f
...
...
@@ -45,7 +45,8 @@ void ActivationFunctor<DeviceType::GPU, T>::operator()(const Tensor *input,
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
kernel_error_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
kernel_error_
->
Allocate
(
1
);
kernel_error_
->
Map
(
nullptr
);
*
(
kernel_error_
->
mutable_data
<
char
>
())
=
0
;
kernel_error_
->
UnMap
();
...
...
mace/kernels/opencl/addn.cc
浏览文件 @
e7a89f6f
...
...
@@ -58,7 +58,8 @@ void AddNFunctor<DeviceType::GPU, T>::operator()(
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
kernel_error_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
kernel_error_
->
Allocate
(
1
);
kernel_error_
->
Map
(
nullptr
);
*
(
kernel_error_
->
mutable_data
<
char
>
())
=
0
;
kernel_error_
->
UnMap
();
...
...
mace/kernels/opencl/batch_norm.cc
浏览文件 @
e7a89f6f
...
...
@@ -56,7 +56,8 @@ void BatchNormFunctor<DeviceType::GPU, T>::operator()(const Tensor *input,
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
kernel_error_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
kernel_error_
->
Allocate
(
1
);
kernel_error_
->
Map
(
nullptr
);
*
(
kernel_error_
->
mutable_data
<
char
>
())
=
0
;
kernel_error_
->
UnMap
();
...
...
mace/kernels/opencl/bias_add.cc
浏览文件 @
e7a89f6f
...
...
@@ -49,7 +49,8 @@ void BiasAddFunctor<DeviceType::GPU, T>::operator()(const Tensor *input,
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
kernel_error_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
kernel_error_
->
Allocate
(
1
);
kernel_error_
->
Map
(
nullptr
);
*
(
kernel_error_
->
mutable_data
<
char
>
())
=
0
;
kernel_error_
->
UnMap
();
...
...
mace/kernels/opencl/buffer_to_image.cc
浏览文件 @
e7a89f6f
...
...
@@ -93,7 +93,8 @@ void BufferToImageFunctor<DeviceType::GPU, T>::operator()(
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
if
(
!
kernel_error_
)
{
kernel_error_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
kernel_error_
->
Allocate
(
1
);
kernel_error_
->
Map
(
nullptr
);
*
(
kernel_error_
->
mutable_data
<
char
>
())
=
0
;
kernel_error_
->
UnMap
();
...
...
mace/kernels/opencl/channel_shuffle.cc
浏览文件 @
e7a89f6f
...
...
@@ -56,7 +56,8 @@ void ChannelShuffleFunctor<DeviceType::GPU, T>::operator()(
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
kernel_error_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
kernel_error_
->
Allocate
(
1
);
kernel_error_
->
Map
(
nullptr
);
*
(
kernel_error_
->
mutable_data
<
char
>
())
=
0
;
kernel_error_
->
UnMap
();
...
...
mace/kernels/opencl/concat.cc
浏览文件 @
e7a89f6f
...
...
@@ -67,7 +67,8 @@ static void Concat2(cl::Kernel *kernel,
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
*
kernel_error
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
(
*
kernel_error
)
->
Allocate
(
1
);
(
*
kernel_error
)
->
Map
(
nullptr
);
*
((
*
kernel_error
)
->
mutable_data
<
char
>
())
=
0
;
(
*
kernel_error
)
->
UnMap
();
...
...
@@ -148,7 +149,8 @@ static void ConcatN(cl::Kernel *kernel,
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
*
kernel_error
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
(
*
kernel_error
)
->
Allocate
(
1
);
(
*
kernel_error
)
->
Map
(
nullptr
);
*
((
*
kernel_error
)
->
mutable_data
<
char
>
())
=
0
;
(
*
kernel_error
)
->
UnMap
();
...
...
mace/kernels/opencl/conv_2d_1x1.cc
浏览文件 @
e7a89f6f
...
...
@@ -100,7 +100,8 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
*
kernel_error
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
(
*
kernel_error
)
->
Allocate
(
1
);
(
*
kernel_error
)
->
Map
(
nullptr
);
*
((
*
kernel_error
)
->
mutable_data
<
char
>
())
=
0
;
(
*
kernel_error
)
->
UnMap
();
...
...
mace/kernels/opencl/conv_2d_3x3.cc
浏览文件 @
e7a89f6f
...
...
@@ -86,7 +86,8 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
*
kernel_error
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
(
*
kernel_error
)
->
Allocate
(
1
);
(
*
kernel_error
)
->
Map
(
nullptr
);
*
((
*
kernel_error
)
->
mutable_data
<
char
>
())
=
0
;
(
*
kernel_error
)
->
UnMap
();
...
...
mace/kernels/opencl/conv_2d_general.cc
浏览文件 @
e7a89f6f
...
...
@@ -94,7 +94,8 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
*
kernel_error
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
(
*
kernel_error
)
->
Allocate
(
1
);
(
*
kernel_error
)
->
Map
(
nullptr
);
*
((
*
kernel_error
)
->
mutable_data
<
char
>
())
=
0
;
(
*
kernel_error
)
->
UnMap
();
...
...
mace/kernels/opencl/deconv_2d_opencl.cc
浏览文件 @
e7a89f6f
...
...
@@ -65,7 +65,8 @@ void Deconv2dOpencl(cl::Kernel *kernel,
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
*
kernel_error
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
(
*
kernel_error
)
->
Allocate
(
1
);
(
*
kernel_error
)
->
Map
(
nullptr
);
*
((
*
kernel_error
)
->
mutable_data
<
char
>
())
=
0
;
(
*
kernel_error
)
->
UnMap
();
...
...
mace/kernels/opencl/depth_to_space.cc
浏览文件 @
e7a89f6f
...
...
@@ -86,7 +86,8 @@ void DepthToSpaceOpFunctor<DeviceType::GPU, T>::operator()(
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
kernel_error_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
kernel_error_
->
Allocate
(
1
);
kernel_error_
->
Map
(
nullptr
);
*
(
kernel_error_
->
mutable_data
<
char
>
())
=
0
;
kernel_error_
->
UnMap
();
...
...
mace/kernels/opencl/depthwise_conv.cc
浏览文件 @
e7a89f6f
...
...
@@ -97,7 +97,8 @@ static void DepthwiseConv2d(cl::Kernel *kernel,
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
*
kernel_error
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
(
*
kernel_error
)
->
Allocate
(
1
);
(
*
kernel_error
)
->
Map
(
nullptr
);
*
((
*
kernel_error
)
->
mutable_data
<
char
>
())
=
0
;
(
*
kernel_error
)
->
UnMap
();
...
...
mace/kernels/opencl/eltwise.cc
浏览文件 @
e7a89f6f
...
...
@@ -97,7 +97,8 @@ void EltwiseFunctor<DeviceType::GPU, T>::operator()(const Tensor *input0,
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
kernel_error_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
kernel_error_
->
Allocate
(
1
);
kernel_error_
->
Map
(
nullptr
);
*
(
kernel_error_
->
mutable_data
<
char
>
())
=
0
;
kernel_error_
->
UnMap
();
...
...
mace/kernels/opencl/fully_connected.cc
浏览文件 @
e7a89f6f
...
...
@@ -74,7 +74,8 @@ void FCWXKernel(cl::Kernel *kernel,
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
*
kernel_error
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
(
*
kernel_error
)
->
Allocate
(
1
);
(
*
kernel_error
)
->
Map
(
nullptr
);
*
((
*
kernel_error
)
->
mutable_data
<
char
>
())
=
0
;
(
*
kernel_error
)
->
UnMap
();
...
...
@@ -200,7 +201,8 @@ void FCWTXKernel(cl::Kernel *kernel,
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
*
kernel_error
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
(
*
kernel_error
)
->
Allocate
(
1
);
(
*
kernel_error
)
->
Map
(
nullptr
);
*
((
*
kernel_error
)
->
mutable_data
<
char
>
())
=
0
;
(
*
kernel_error
)
->
UnMap
();
...
...
mace/kernels/opencl/image_to_buffer.cc
浏览文件 @
e7a89f6f
...
...
@@ -86,7 +86,8 @@ void ImageToBufferFunctor<DeviceType::GPU, T>::operator()(
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
if
(
!
kernel_error_
)
{
kernel_error_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
kernel_error_
->
Allocate
(
1
);
kernel_error_
->
Map
(
nullptr
);
*
(
kernel_error_
->
mutable_data
<
char
>
())
=
0
;
kernel_error_
->
UnMap
();
...
...
mace/kernels/opencl/matmul.cc
浏览文件 @
e7a89f6f
...
...
@@ -54,7 +54,8 @@ void MatMulFunctor<DeviceType::GPU, T>::operator()(const Tensor *A,
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
kernel_error_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
kernel_error_
->
Allocate
(
1
);
kernel_error_
->
Map
(
nullptr
);
*
(
kernel_error_
->
mutable_data
<
char
>
())
=
0
;
kernel_error_
->
UnMap
();
...
...
mace/kernels/opencl/out_of_range_check_test.cc
浏览文件 @
e7a89f6f
...
...
@@ -57,7 +57,8 @@ bool BufferToImageOpImpl(Tensor *buffer,
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
kernel_error
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
kernel_error
->
Allocate
(
1
);
kernel_error
->
Map
(
nullptr
);
*
(
kernel_error
->
mutable_data
<
char
>
())
=
0
;
kernel_error
->
UnMap
();
...
...
mace/kernels/opencl/pad.cc
浏览文件 @
e7a89f6f
...
...
@@ -60,7 +60,8 @@ void PadFunctor<DeviceType::GPU, T>::operator()(
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
kernel_error_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
kernel_error_
->
Allocate
(
1
);
kernel_error_
->
Map
(
nullptr
);
*
(
kernel_error_
->
mutable_data
<
char
>
())
=
0
;
kernel_error_
->
UnMap
();
...
...
mace/kernels/opencl/pooling.cc
浏览文件 @
e7a89f6f
...
...
@@ -72,7 +72,8 @@ void PoolingFunctor<DeviceType::GPU, T>::operator()(const Tensor *input,
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
kernel_error_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
kernel_error_
->
Allocate
(
1
);
kernel_error_
->
Map
(
nullptr
);
*
(
kernel_error_
->
mutable_data
<
char
>
())
=
0
;
kernel_error_
->
UnMap
();
...
...
mace/kernels/opencl/resize_bilinear.cc
浏览文件 @
e7a89f6f
...
...
@@ -78,7 +78,8 @@ void ResizeBilinearFunctor<DeviceType::GPU, T>::operator()(
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
kernel_error_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
kernel_error_
->
Allocate
(
1
);
kernel_error_
->
Map
(
nullptr
);
*
(
kernel_error_
->
mutable_data
<
char
>
())
=
0
;
kernel_error_
->
UnMap
();
...
...
mace/kernels/opencl/slice.cc
浏览文件 @
e7a89f6f
...
...
@@ -51,7 +51,8 @@ void SliceFunctor<DeviceType::GPU, T>::operator()(
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
kernel_error_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
kernel_error_
->
Allocate
(
1
);
kernel_error_
->
Map
(
nullptr
);
*
(
kernel_error_
->
mutable_data
<
char
>
())
=
0
;
kernel_error_
->
UnMap
();
...
...
mace/kernels/opencl/softmax.cc
浏览文件 @
e7a89f6f
...
...
@@ -70,7 +70,8 @@ void SoftmaxFunctor<DeviceType::GPU, T>::operator()(const Tensor *logits,
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
kernel_error_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
kernel_error_
->
Allocate
(
1
);
kernel_error_
->
Map
(
nullptr
);
*
(
kernel_error_
->
mutable_data
<
char
>
())
=
0
;
kernel_error_
->
UnMap
();
...
...
mace/kernels/opencl/space_to_batch.cc
浏览文件 @
e7a89f6f
...
...
@@ -70,7 +70,8 @@ void SpaceToBatchFunctor<DeviceType::GPU, T>::operator()(
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
kernel_error_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
kernel_error_
->
Allocate
(
1
);
kernel_error_
->
Map
(
nullptr
);
*
(
kernel_error_
->
mutable_data
<
char
>
())
=
0
;
kernel_error_
->
UnMap
();
...
...
mace/kernels/opencl/winograd_transform.cc
浏览文件 @
e7a89f6f
...
...
@@ -39,7 +39,8 @@ void WinogradTransformFunctor<DeviceType::GPU, T>::operator()(
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
kernel_error_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
kernel_error_
->
Allocate
(
1
);
kernel_error_
->
Map
(
nullptr
);
*
(
kernel_error_
->
mutable_data
<
char
>
())
=
0
;
kernel_error_
->
UnMap
();
...
...
@@ -138,7 +139,8 @@ void WinogradInverseTransformFunctor<DeviceType::GPU, T>::operator()(
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
built_options
.
emplace
(
"-DOUT_OF_RANGE_CHECK"
);
kernel_error_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
),
1
)));
new
Buffer
(
GetDeviceAllocator
(
DeviceType
::
GPU
))));
kernel_error_
->
Allocate
(
1
);
kernel_error_
->
Map
(
nullptr
);
*
(
kernel_error_
->
mutable_data
<
char
>
())
=
0
;
kernel_error_
->
UnMap
();
...
...
mace/public/mace.h
浏览文件 @
e7a89f6f
...
...
@@ -30,7 +30,11 @@ const char *MaceVersion();
enum
DeviceType
{
CPU
=
0
,
GPU
=
2
,
HEXAGON
=
3
};
enum
MaceStatus
{
MACE_SUCCESS
=
0
,
MACE_INVALID_ARGS
=
1
};
enum
MaceStatus
{
MACE_SUCCESS
=
0
,
MACE_INVALID_ARGS
=
1
,
MACE_OUT_OF_RESOURCES
=
2
};
// MACE input/output tensor
class
MaceTensor
{
...
...
@@ -61,12 +65,13 @@ class RunMetadata;
class
MaceEngine
{
public:
explicit
MaceEngine
(
const
NetDef
*
net_def
,
DeviceType
device_type
,
const
std
::
vector
<
std
::
string
>
&
input_nodes
,
const
std
::
vector
<
std
::
string
>
&
output_nodes
);
explicit
MaceEngine
(
DeviceType
device_type
);
~
MaceEngine
();
MaceStatus
Init
(
const
NetDef
*
net_def
,
const
std
::
vector
<
std
::
string
>
&
input_nodes
,
const
std
::
vector
<
std
::
string
>
&
output_nodes
);
MaceStatus
Run
(
const
std
::
map
<
std
::
string
,
MaceTensor
>
&
inputs
,
std
::
map
<
std
::
string
,
MaceTensor
>
*
outputs
);
...
...
mace/python/tools/mace_engine_factory.h.jinja2
浏览文件 @
e7a89f6f
...
...
@@ -61,24 +61,25 @@ MaceStatus CreateMaceEngine(
}
const unsigned char * model_data = nullptr;
NetDef net_def;
MaceStatus status = MaceStatus::MACE_SUCCESS;
switch (model_name_map[model_name]) {
{% for i in range(model_tags |length) %}
case {{ i }}:
model_data =
mace::{{model_tags[i]}}::LoadModelData(model_data_file);
net_def = mace::{{model_tags[i]}}::CreateNet(model_data);
engine->reset(
new mace::MaceEngine(&net_def, device_type, input_nodes, output_nodes)
);
engine->reset(
new mace::MaceEngine(device_type));
status = (*engine)->Init(&net_def, input_nodes, output_nodes
);
if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) {
mace::{{model_tags[i]}}::UnloadModelData(model_data);
}
break;
{% endfor %}
default:
return
MaceStatus::MACE_INVALID_ARGS;
status =
MaceStatus::MACE_INVALID_ARGS;
}
return
MaceStatus::MACE_SUCCESS
;
return
status
;
}
} // namespace mace
mace/test/mace_api_mt_test.cc
浏览文件 @
e7a89f6f
...
...
@@ -304,7 +304,9 @@ void MaceRunFunc(const int in_out_size) {
new
FileStorageFactory
(
file_path
));
mace
::
SetKVStorageFactory
(
storage_factory
);
MaceEngine
engine
(
&
net_def
,
device
,
input_names
,
output_names
);
MaceEngine
engine
(
device
);
MaceStatus
status
=
engine
.
Init
(
&
net_def
,
input_names
,
output_names
);
ASSERT_EQ
(
status
,
MaceStatus
::
MACE_SUCCESS
);
std
::
map
<
std
::
string
,
mace
::
MaceTensor
>
inputs
;
std
::
map
<
std
::
string
,
mace
::
MaceTensor
>
outputs
;
...
...
mace/test/mace_api_test.cc
浏览文件 @
e7a89f6f
...
...
@@ -308,7 +308,9 @@ void MaceRun(const int in_out_size,
&
net_def
);
}
MaceEngine
engine
(
&
net_def
,
device
,
input_names
,
output_names
);
MaceEngine
engine
(
device
);
MaceStatus
status
=
engine
.
Init
(
&
net_def
,
input_names
,
output_names
);
ASSERT_EQ
(
status
,
MaceStatus
::
MACE_SUCCESS
);
std
::
map
<
std
::
string
,
mace
::
MaceTensor
>
inputs
;
std
::
map
<
std
::
string
,
mace
::
MaceTensor
>
outputs
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录