Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
6b9aafd4
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
6b9aafd4
编写于
3月 08, 2018
作者:
叶
叶剑武
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'cpplint' into 'master'
Reformatting code and enable cpplint See merge request !273
上级
b26187f0
4410ecd2
变更
128
隐藏空白更改
内联
并排
Showing
128 changed file
with
1634 addition
and
1831 deletion
+1634
-1831
.gitlab-ci.yml
.gitlab-ci.yml
+9
-0
mace/core/allocator.h
mace/core/allocator.h
+2
-2
mace/core/arg_helper.cc
mace/core/arg_helper.cc
+2
-2
mace/core/buffer.h
mace/core/buffer.h
+55
-88
mace/core/mace.cc
mace/core/mace.cc
+134
-291
mace/core/net.cc
mace/core/net.cc
+7
-7
mace/core/operator.h
mace/core/operator.h
+2
-2
mace/core/preallocated_pooled_allocator.h
mace/core/preallocated_pooled_allocator.h
+2
-2
mace/core/runtime/hexagon/hexagon_control_wrapper.cc
mace/core/runtime/hexagon/hexagon_control_wrapper.cc
+69
-83
mace/core/runtime/hexagon/hexagon_control_wrapper.h
mace/core/runtime/hexagon/hexagon_control_wrapper.h
+8
-10
mace/core/runtime/hexagon/hexagon_controller_dummy.cc
mace/core/runtime/hexagon/hexagon_controller_dummy.cc
+140
-26
mace/core/runtime/hexagon/hexagon_nn.h
mace/core/runtime/hexagon/hexagon_nn.h
+135
-67
mace/core/runtime/hexagon/hexagon_nn_ops.h
mace/core/runtime/hexagon/hexagon_nn_ops.h
+5
-5
mace/core/runtime/hexagon/ops.h
mace/core/runtime/hexagon/ops.h
+0
-1
mace/core/runtime/hexagon/quantize.cc
mace/core/runtime/hexagon/quantize.cc
+5
-5
mace/core/runtime/hexagon/quantize.h
mace/core/runtime/hexagon/quantize.h
+11
-7
mace/core/runtime/opencl/opencl_allocator.cc
mace/core/runtime/opencl/opencl_allocator.cc
+11
-16
mace/core/runtime/opencl/opencl_development.cc
mace/core/runtime/opencl/opencl_development.cc
+3
-2
mace/core/runtime/opencl/opencl_production.cc
mace/core/runtime/opencl/opencl_production.cc
+2
-1
mace/core/runtime/opencl/opencl_runtime.cc
mace/core/runtime/opencl/opencl_runtime.cc
+16
-21
mace/core/runtime/opencl/opencl_runtime.h
mace/core/runtime/opencl/opencl_runtime.h
+3
-1
mace/core/runtime/opencl/opencl_wrapper.h
mace/core/runtime/opencl/opencl_wrapper.h
+4
-4
mace/core/tensor.h
mace/core/tensor.h
+38
-57
mace/core/testing/test_benchmark.cc
mace/core/testing/test_benchmark.cc
+1
-3
mace/core/testing/test_benchmark.h
mace/core/testing/test_benchmark.h
+1
-1
mace/core/types.cc
mace/core/types.cc
+7
-13
mace/core/workspace.cc
mace/core/workspace.cc
+36
-39
mace/core/workspace.h
mace/core/workspace.h
+2
-2
mace/kernels/activation.h
mace/kernels/activation.h
+12
-10
mace/kernels/addn.h
mace/kernels/addn.h
+1
-1
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+4
-6
mace/kernels/bias_add.h
mace/kernels/bias_add.h
+1
-3
mace/kernels/buffer_to_image.h
mace/kernels/buffer_to_image.h
+6
-8
mace/kernels/channel_shuffle.h
mace/kernels/channel_shuffle.h
+4
-2
mace/kernels/concat.h
mace/kernels/concat.h
+10
-10
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+6
-6
mace/kernels/conv_pool_2d_util.cc
mace/kernels/conv_pool_2d_util.cc
+21
-10
mace/kernels/conv_pool_2d_util.h
mace/kernels/conv_pool_2d_util.h
+1
-1
mace/kernels/depthwise_conv2d.h
mace/kernels/depthwise_conv2d.h
+5
-4
mace/kernels/eltwise.h
mace/kernels/eltwise.h
+11
-14
mace/kernels/fully_connected.h
mace/kernels/fully_connected.h
+8
-10
mace/kernels/global_avg_pooling.h
mace/kernels/global_avg_pooling.h
+4
-2
mace/kernels/matmul.h
mace/kernels/matmul.h
+1
-4
mace/kernels/neon/batch_norm_neon.cc
mace/kernels/neon/batch_norm_neon.cc
+2
-1
mace/kernels/neon/conv_2d_neon.cc
mace/kernels/neon/conv_2d_neon.cc
+4
-5
mace/kernels/neon/conv_2d_neon_3x3.cc
mace/kernels/neon/conv_2d_neon_3x3.cc
+4
-8
mace/kernels/neon/depthwise_conv_neon.cc
mace/kernels/neon/depthwise_conv_neon.cc
+4
-5
mace/kernels/opencl/activation_opencl.cc
mace/kernels/opencl/activation_opencl.cc
+3
-4
mace/kernels/opencl/addn.cc
mace/kernels/opencl/addn.cc
+8
-16
mace/kernels/opencl/batch_norm_opencl.cc
mace/kernels/opencl/batch_norm_opencl.cc
+3
-6
mace/kernels/opencl/bias_add_opencl.cc
mace/kernels/opencl/bias_add_opencl.cc
+8
-13
mace/kernels/opencl/buffer_to_image.cc
mace/kernels/opencl/buffer_to_image.cc
+28
-26
mace/kernels/opencl/cl/common.h
mace/kernels/opencl/cl/common.h
+2
-2
mace/kernels/opencl/concat.cc
mace/kernels/opencl/concat.cc
+24
-29
mace/kernels/opencl/conv_2d_opencl.cc
mace/kernels/opencl/conv_2d_opencl.cc
+19
-18
mace/kernels/opencl/conv_2d_opencl_1x1.cc
mace/kernels/opencl/conv_2d_opencl_1x1.cc
+5
-11
mace/kernels/opencl/conv_2d_opencl_3x3.cc
mace/kernels/opencl/conv_2d_opencl_3x3.cc
+5
-10
mace/kernels/opencl/conv_2d_opencl_general.cc
mace/kernels/opencl/conv_2d_opencl_general.cc
+5
-10
mace/kernels/opencl/depthwise_conv_opencl.cc
mace/kernels/opencl/depthwise_conv_opencl.cc
+11
-12
mace/kernels/opencl/eltwise_opencl.cc
mace/kernels/opencl/eltwise_opencl.cc
+6
-14
mace/kernels/opencl/fully_connected_opencl.cc
mace/kernels/opencl/fully_connected_opencl.cc
+12
-23
mace/kernels/opencl/helper.cc
mace/kernels/opencl/helper.cc
+40
-48
mace/kernels/opencl/helper.h
mace/kernels/opencl/helper.h
+6
-7
mace/kernels/opencl/matmul.cc
mace/kernels/opencl/matmul.cc
+9
-18
mace/kernels/opencl/pooling_opencl.cc
mace/kernels/opencl/pooling_opencl.cc
+9
-18
mace/kernels/opencl/resize_bilinear_opencl.cc
mace/kernels/opencl/resize_bilinear_opencl.cc
+9
-11
mace/kernels/opencl/softmax_opencl.cc
mace/kernels/opencl/softmax_opencl.cc
+6
-11
mace/kernels/opencl/space_to_batch_opencl.cc
mace/kernels/opencl/space_to_batch_opencl.cc
+16
-15
mace/kernels/opencl/winograd_transform.cc
mace/kernels/opencl/winograd_transform.cc
+53
-48
mace/kernels/pooling.h
mace/kernels/pooling.h
+25
-26
mace/kernels/reshape.h
mace/kernels/reshape.h
+1
-2
mace/kernels/resize_bilinear.h
mace/kernels/resize_bilinear.h
+11
-10
mace/kernels/space_to_batch.h
mace/kernels/space_to_batch.h
+11
-11
mace/kernels/winograd_transform.h
mace/kernels/winograd_transform.h
+22
-22
mace/ops/activation.h
mace/ops/activation.h
+2
-1
mace/ops/activation_test.cc
mace/ops/activation_test.cc
+3
-4
mace/ops/addn.h
mace/ops/addn.h
+4
-6
mace/ops/addn_benchmark.cc
mace/ops/addn_benchmark.cc
+1
-2
mace/ops/batch_norm_benchmark.cc
mace/ops/batch_norm_benchmark.cc
+1
-1
mace/ops/batch_to_space.h
mace/ops/batch_to_space.h
+14
-13
mace/ops/batch_to_space_benchmark.cc
mace/ops/batch_to_space_benchmark.cc
+1
-1
mace/ops/bias_add_benchmark.cc
mace/ops/bias_add_benchmark.cc
+1
-1
mace/ops/buffer_to_image.h
mace/ops/buffer_to_image.h
+5
-4
mace/ops/buffer_to_image_test.cc
mace/ops/buffer_to_image_test.cc
+45
-26
mace/ops/channel_shuffle.h
mace/ops/channel_shuffle.h
+2
-2
mace/ops/channel_shuffle_benchmark.cc
mace/ops/channel_shuffle_benchmark.cc
+1
-1
mace/ops/concat.h
mace/ops/concat.h
+3
-2
mace/ops/concat_benchmark.cc
mace/ops/concat_benchmark.cc
+8
-11
mace/ops/concat_test.cc
mace/ops/concat_test.cc
+4
-4
mace/ops/conv_2d_test.cc
mace/ops/conv_2d_test.cc
+47
-57
mace/ops/eltwise.h
mace/ops/eltwise.h
+6
-4
mace/ops/eltwise_benchmark.cc
mace/ops/eltwise_benchmark.cc
+1
-1
mace/ops/eltwise_test.cc
mace/ops/eltwise_test.cc
+41
-55
mace/ops/folded_batch_norm.cc
mace/ops/folded_batch_norm.cc
+12
-15
mace/ops/folded_batch_norm_test.cc
mace/ops/folded_batch_norm_test.cc
+1
-1
mace/ops/fully_connected.h
mace/ops/fully_connected.h
+4
-5
mace/ops/fully_connected_benchmark.cc
mace/ops/fully_connected_benchmark.cc
+16
-12
mace/ops/fully_connected_test.cc
mace/ops/fully_connected_test.cc
+32
-66
mace/ops/fused_conv_2d_test.cc
mace/ops/fused_conv_2d_test.cc
+24
-16
mace/ops/global_avg_pooling.h
mace/ops/global_avg_pooling.h
+1
-1
mace/ops/global_avg_pooling_benchmark.cc
mace/ops/global_avg_pooling_benchmark.cc
+2
-2
mace/ops/image_to_buffer.h
mace/ops/image_to_buffer.h
+5
-4
mace/ops/matmul.h
mace/ops/matmul.h
+2
-2
mace/ops/matmul_test.cc
mace/ops/matmul_test.cc
+27
-35
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+4
-5
mace/ops/pooling.h
mace/ops/pooling.h
+6
-2
mace/ops/pooling_benchmark.cc
mace/ops/pooling_benchmark.cc
+2
-2
mace/ops/pooling_test.cc
mace/ops/pooling_test.cc
+4
-2
mace/ops/reshape.h
mace/ops/reshape.h
+5
-3
mace/ops/reshape_test.cc
mace/ops/reshape_test.cc
+0
-1
mace/ops/softmax.cc
mace/ops/softmax.cc
+3
-3
mace/ops/softmax.h
mace/ops/softmax.h
+2
-3
mace/ops/softmax_test.cc
mace/ops/softmax_test.cc
+7
-9
mace/ops/space_to_batch.h
mace/ops/space_to_batch.h
+14
-11
mace/ops/space_to_batch_benchmark.cc
mace/ops/space_to_batch_benchmark.cc
+1
-1
mace/ops/winograd_convolution_test.cc
mace/ops/winograd_convolution_test.cc
+22
-14
mace/ops/winograd_inverse_transform.h
mace/ops/winograd_inverse_transform.h
+2
-2
mace/ops/winograd_transform.h
mace/ops/winograd_transform.h
+3
-3
mace/ops/winograd_transform_benchmark.cc
mace/ops/winograd_transform_benchmark.cc
+20
-22
mace/public/mace.h
mace/public/mace.h
+22
-24
mace/utils/command_line_flags.h
mace/utils/command_line_flags.h
+1
-1
mace/utils/env_time.h
mace/utils/env_time.h
+0
-1
mace/utils/logging.h
mace/utils/logging.h
+1
-1
mace/utils/string_util.h
mace/utils/string_util.h
+1
-1
mace/utils/timer.h
mace/utils/timer.h
+4
-12
mace/utils/tuner_test.cc
mace/utils/tuner_test.cc
+8
-21
mace/utils/utils.h
mace/utils/utils.h
+2
-2
mace/utils/utils_test.cc
mace/utils/utils_test.cc
+8
-21
未找到文件。
.gitlab-ci.yml
浏览文件 @
6b9aafd4
stages
:
-
ops_test
-
ops_benchmark
-
cpplint
cpplint
:
stage
:
cpplint
only
:
-
master
script
:
-
curl -o cpplint.py https://raw.githubusercontent.com/google/styleguide/gh-pages/cpplint/cpplint.py
-
python cpplint.py --root=mace --linelength=80 --counting=detailed $(find mace -name *.h -or -name *.cc | grep -vE "half.h")
ops_test
:
stage
:
ops_test
...
...
mace/core/allocator.h
浏览文件 @
6b9aafd4
...
...
@@ -9,8 +9,8 @@
#include <malloc.h>
#include "mace/core/registry.h"
#include "mace/public/mace.h"
#include "mace/core/types.h"
#include "mace/public/mace.h"
namespace
mace
{
...
...
@@ -81,7 +81,7 @@ class CPUAllocator : public Allocator {
free
(
data
);
};
void
*
Map
(
void
*
buffer
,
size_t
offset
,
size_t
nbytes
)
const
override
{
return
(
char
*
)
buffer
+
offset
;
return
(
char
*
)
buffer
+
offset
;
}
void
*
MapImage
(
void
*
buffer
,
const
std
::
vector
<
size_t
>
&
image_shape
,
...
...
mace/core/arg_helper.cc
浏览文件 @
6b9aafd4
...
...
@@ -83,12 +83,12 @@ INSTANTIATE_GET_SINGLE_ARGUMENT(string, s, false)
#define INSTANTIATE_GET_REPEATED_ARGUMENT(T, fieldname, \
enforce_lossless_conversion) \
template <> \
std::vector<T> ArgumentHelper::GetRepeatedArgument<T>(
\
std::vector<T> ArgumentHelper::GetRepeatedArgument<T>( \
const string &name, const std::vector<T> &default_value) const { \
if (arg_map_.count(name) == 0) { \
return default_value; \
} \
std::vector<T> values;
\
std::vector<T> values; \
for (const auto &v : arg_map_.at(name).fieldname()) { \
if (enforce_lossless_conversion) { \
auto supportsConversion = \
...
...
mace/core/buffer.h
浏览文件 @
6b9aafd4
...
...
@@ -5,9 +5,9 @@
#ifndef MACE_CORE_BUFFER_H_
#define MACE_CORE_BUFFER_H_
#include "mace/core/types.h"
#include "mace/core/allocator.h"
#include <vector>
#include "mace/core/allocator.h"
#include "mace/core/types.h"
namespace
mace
{
...
...
@@ -39,23 +39,19 @@ class BufferBase {
virtual
bool
OnHost
()
const
=
0
;
virtual
index_t
offset
()
const
{
return
0
;
};
virtual
index_t
offset
()
const
{
return
0
;
};
template
<
typename
T
>
template
<
typename
T
>
const
T
*
data
()
const
{
return
reinterpret_cast
<
const
T
*>
(
raw_data
());
}
template
<
typename
T
>
template
<
typename
T
>
T
*
mutable_data
()
{
return
reinterpret_cast
<
T
*>
(
raw_mutable_data
());
}
index_t
size
()
const
{
return
size_
;
}
index_t
size
()
const
{
return
size_
;
}
protected:
index_t
size_
;
...
...
@@ -64,26 +60,26 @@ class BufferBase {
class
Buffer
:
public
BufferBase
{
public:
Buffer
(
Allocator
*
allocator
)
:
BufferBase
(
0
),
allocator_
(
allocator
),
buf_
(
nullptr
),
mapped_buf_
(
nullptr
),
is_data_owner_
(
true
)
{}
:
BufferBase
(
0
),
allocator_
(
allocator
),
buf_
(
nullptr
),
mapped_buf_
(
nullptr
),
is_data_owner_
(
true
)
{}
Buffer
(
Allocator
*
allocator
,
index_t
size
)
:
BufferBase
(
size
),
allocator_
(
allocator
),
mapped_buf_
(
nullptr
),
is_data_owner_
(
true
)
{
:
BufferBase
(
size
),
allocator_
(
allocator
),
mapped_buf_
(
nullptr
),
is_data_owner_
(
true
)
{
buf_
=
allocator
->
New
(
size
);
}
Buffer
(
Allocator
*
allocator
,
void
*
data
,
index_t
size
)
:
BufferBase
(
size
),
allocator_
(
allocator
),
buf_
(
data
),
mapped_buf_
(
nullptr
),
is_data_owner_
(
false
)
{}
:
BufferBase
(
size
),
allocator_
(
allocator
),
buf_
(
data
),
mapped_buf_
(
nullptr
),
is_data_owner_
(
false
)
{}
virtual
~
Buffer
()
{
if
(
mapped_buf_
!=
nullptr
)
{
...
...
@@ -155,12 +151,10 @@ class Buffer : public BufferBase {
void
Copy
(
void
*
src
,
index_t
offset
,
index_t
length
)
{
MACE_CHECK_NOTNULL
(
mapped_buf_
);
MACE_CHECK
(
length
<=
size_
,
"out of buffer"
);
memcpy
(
mapped_buf_
,
(
char
*
)
src
+
offset
,
length
);
memcpy
(
mapped_buf_
,
(
char
*
)
src
+
offset
,
length
);
}
bool
OnHost
()
const
{
return
allocator_
->
OnHost
();
}
bool
OnHost
()
const
{
return
allocator_
->
OnHost
();
}
private:
Allocator
*
allocator_
;
...
...
@@ -168,23 +162,24 @@ class Buffer : public BufferBase {
void
*
mapped_buf_
;
bool
is_data_owner_
;
DISABLE_COPY_AND_ASSIGN
(
Buffer
);
DISABLE_COPY_AND_ASSIGN
(
Buffer
);
};
class
Image
:
public
BufferBase
{
public:
Image
()
:
BufferBase
(
0
),
allocator_
(
GetDeviceAllocator
(
OPENCL
)),
buf_
(
nullptr
),
mapped_buf_
(
nullptr
)
{}
:
BufferBase
(
0
),
allocator_
(
GetDeviceAllocator
(
OPENCL
)),
buf_
(
nullptr
),
mapped_buf_
(
nullptr
)
{}
Image
(
std
::
vector
<
size_t
>
shape
,
DataType
data_type
)
:
BufferBase
(
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
std
::
multiplies
<
index_t
>
())
*
GetEnumTypeSize
(
data_type
)),
allocator_
(
GetDeviceAllocator
(
OPENCL
)),
mapped_buf_
(
nullptr
)
{
:
BufferBase
(
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
std
::
multiplies
<
index_t
>
())
*
GetEnumTypeSize
(
data_type
)),
allocator_
(
GetDeviceAllocator
(
OPENCL
)),
mapped_buf_
(
nullptr
)
{
shape_
=
shape
;
data_type_
=
data_type
;
buf_
=
allocator_
->
NewImage
(
shape
,
data_type
);
...
...
@@ -214,9 +209,7 @@ class Image : public BufferBase {
return
mapped_buf_
;
}
std
::
vector
<
size_t
>
image_shape
()
const
{
return
shape_
;
}
std
::
vector
<
size_t
>
image_shape
()
const
{
return
shape_
;
}
void
*
Map
(
index_t
offset
,
index_t
length
,
std
::
vector
<
size_t
>
*
pitch
)
const
{
MACE_NOT_IMPLEMENTED
;
...
...
@@ -241,17 +234,11 @@ class Image : public BufferBase {
mapped_buf_
=
nullptr
;
};
void
Resize
(
index_t
size
)
{
MACE_NOT_IMPLEMENTED
;
}
void
Resize
(
index_t
size
)
{
MACE_NOT_IMPLEMENTED
;
}
void
Copy
(
void
*
src
,
index_t
offset
,
index_t
length
)
{
MACE_NOT_IMPLEMENTED
;
}
void
Copy
(
void
*
src
,
index_t
offset
,
index_t
length
)
{
MACE_NOT_IMPLEMENTED
;
}
bool
OnHost
()
const
{
return
allocator_
->
OnHost
();
}
bool
OnHost
()
const
{
return
allocator_
->
OnHost
();
}
private:
Allocator
*
allocator_
;
...
...
@@ -260,34 +247,25 @@ class Image : public BufferBase {
void
*
buf_
;
void
*
mapped_buf_
;
DISABLE_COPY_AND_ASSIGN
(
Image
);
DISABLE_COPY_AND_ASSIGN
(
Image
);
};
class
BufferSlice
:
public
BufferBase
{
public:
BufferSlice
()
:
buffer_
(
nullptr
),
mapped_buf_
(
nullptr
),
offset_
(
0
),
length_
(
0
)
{}
:
buffer_
(
nullptr
),
mapped_buf_
(
nullptr
),
offset_
(
0
),
length_
(
0
)
{}
BufferSlice
(
BufferBase
*
buffer
,
index_t
offset
,
index_t
length
)
:
BufferBase
(
buffer
->
size
()),
buffer_
(
buffer
),
mapped_buf_
(
nullptr
),
offset_
(
offset
),
length_
(
length
)
{
:
BufferBase
(
buffer
->
size
()),
buffer_
(
buffer
),
mapped_buf_
(
nullptr
),
offset_
(
offset
),
length_
(
length
)
{
MACE_CHECK
(
offset
>=
0
,
"buffer slice offset should >= 0"
);
MACE_CHECK
(
offset
+
length
<=
size_
,
"buffer slice offset + length ("
,
offset
,
" + "
,
length
,
") should <= "
,
size_
);
MACE_CHECK
(
offset
+
length
<=
size_
,
"buffer slice offset + length ("
,
offset
,
" + "
,
length
,
") should <= "
,
size_
);
}
BufferSlice
(
const
BufferSlice
&
other
)
:
BufferSlice
(
other
.
buffer_
,
other
.
offset_
,
other
.
length_
)
{}
BufferSlice
(
const
BufferSlice
&
other
)
:
BufferSlice
(
other
.
buffer_
,
other
.
offset_
,
other
.
length_
)
{}
~
BufferSlice
()
{
if
(
buffer_
!=
nullptr
&&
mapped_buf_
!=
nullptr
)
{
...
...
@@ -303,7 +281,7 @@ class BufferSlice : public BufferBase {
const
void
*
raw_data
()
const
{
if
(
OnHost
())
{
MACE_CHECK_NOTNULL
(
buffer_
);
return
(
char
*
)
buffer_
->
raw_data
()
+
offset_
;
return
(
char
*
)
buffer_
->
raw_data
()
+
offset_
;
}
else
{
MACE_CHECK_NOTNULL
(
mapped_buf_
);
return
mapped_buf_
;
...
...
@@ -320,9 +298,7 @@ class BufferSlice : public BufferBase {
return
nullptr
;
}
void
UnMap
(
void
*
mapped_ptr
)
const
{
MACE_NOT_IMPLEMENTED
;
}
void
UnMap
(
void
*
mapped_ptr
)
const
{
MACE_NOT_IMPLEMENTED
;
}
void
Map
(
std
::
vector
<
size_t
>
*
pitch
)
{
MACE_CHECK_NOTNULL
(
buffer_
);
...
...
@@ -336,21 +312,13 @@ class BufferSlice : public BufferBase {
mapped_buf_
=
nullptr
;
};
void
Resize
(
index_t
size
)
{
MACE_NOT_IMPLEMENTED
;
}
void
Resize
(
index_t
size
)
{
MACE_NOT_IMPLEMENTED
;
}
void
Copy
(
void
*
src
,
index_t
offset
,
index_t
length
)
{
MACE_NOT_IMPLEMENTED
;
}
void
Copy
(
void
*
src
,
index_t
offset
,
index_t
length
)
{
MACE_NOT_IMPLEMENTED
;
}
index_t
offset
()
const
{
return
offset_
;
}
index_t
offset
()
const
{
return
offset_
;
}
bool
OnHost
()
const
{
return
buffer_
->
OnHost
();
}
bool
OnHost
()
const
{
return
buffer_
->
OnHost
();
}
private:
BufferBase
*
buffer_
;
...
...
@@ -358,7 +326,6 @@ class BufferSlice : public BufferBase {
index_t
offset_
;
index_t
length_
;
};
}
#endif // MACE_CORE_BUFFER_H_
#endif
// MACE_CORE_BUFFER_H_
mace/core/mace.cc
浏览文件 @
6b9aafd4
...
...
@@ -3,9 +3,9 @@
//
#include "mace/public/mace.h"
#include "mace/core/types.h"
#include "mace/core/net.h"
#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
#include "mace/core/types.h"
namespace
mace
{
...
...
@@ -13,46 +13,34 @@ ConstTensor::ConstTensor(const std::string &name,
const
unsigned
char
*
data
,
const
std
::
vector
<
int64_t
>
&
dims
,
const
DataType
data_type
,
uint32_t
node_id
)
:
name_
(
name
),
data_
(
data
),
data_size_
(
std
::
accumulate
(
dims
.
begin
(),
dims
.
end
(),
1
,
std
::
multiplies
<
int64_t
>
())),
dims_
(
dims
.
begin
(),
dims
.
end
()),
data_type_
(
data_type
),
node_id_
(
node_id
)
{}
uint32_t
node_id
)
:
name_
(
name
),
data_
(
data
),
data_size_
(
std
::
accumulate
(
dims
.
begin
(),
dims
.
end
(),
1
,
std
::
multiplies
<
int64_t
>
())),
dims_
(
dims
.
begin
(),
dims
.
end
()),
data_type_
(
data_type
),
node_id_
(
node_id
)
{}
ConstTensor
::
ConstTensor
(
const
std
::
string
&
name
,
const
unsigned
char
*
data
,
const
std
::
vector
<
int64_t
>
&
dims
,
const
int
data_type
,
uint32_t
node_id
)
:
name_
(
name
),
data_
(
data
),
data_size_
(
std
::
accumulate
(
dims
.
begin
(),
dims
.
end
(),
1
,
std
::
multiplies
<
int64_t
>
())),
dims_
(
dims
.
begin
(),
dims
.
end
()),
data_type_
(
static_cast
<
DataType
>
(
data_type
)),
node_id_
(
node_id
)
{}
const
std
::
string
&
ConstTensor
::
name
()
const
{
return
name_
;
}
const
unsigned
char
*
ConstTensor
::
data
()
const
{
return
data_
;
}
int64_t
ConstTensor
::
data_size
()
const
{
return
data_size_
;
}
const
std
::
vector
<
int64_t
>
&
ConstTensor
::
dims
()
const
{
return
dims_
;
}
DataType
ConstTensor
::
data_type
()
const
{
return
data_type_
;
}
uint32_t
ConstTensor
::
node_id
()
const
{
return
node_id_
;
}
uint32_t
node_id
)
:
name_
(
name
),
data_
(
data
),
data_size_
(
std
::
accumulate
(
dims
.
begin
(),
dims
.
end
(),
1
,
std
::
multiplies
<
int64_t
>
())),
dims_
(
dims
.
begin
(),
dims
.
end
()),
data_type_
(
static_cast
<
DataType
>
(
data_type
)),
node_id_
(
node_id
)
{}
const
std
::
string
&
ConstTensor
::
name
()
const
{
return
name_
;
}
const
unsigned
char
*
ConstTensor
::
data
()
const
{
return
data_
;
}
int64_t
ConstTensor
::
data_size
()
const
{
return
data_size_
;
}
const
std
::
vector
<
int64_t
>
&
ConstTensor
::
dims
()
const
{
return
dims_
;
}
DataType
ConstTensor
::
data_type
()
const
{
return
data_type_
;
}
uint32_t
ConstTensor
::
node_id
()
const
{
return
node_id_
;
}
Argument
::
Argument
()
:
has_bits_
(
0
)
{}
...
...
@@ -73,74 +61,42 @@ void Argument::CopyFrom(const Argument &from) {
this
->
has_bits_
=
from
.
has_bits_
;
}
const
std
::
string
&
Argument
::
name
()
const
{
return
name_
;
}
void
Argument
::
set_name
(
const
std
::
string
&
value
)
{
name_
=
value
;
}
bool
Argument
::
has_f
()
const
{
return
(
has_bits_
&
0x00000001u
)
!=
0
;
}
void
Argument
::
set_has_f
()
{
has_bits_
|=
0x00000001u
;
}
float
Argument
::
f
()
const
{
return
f_
;
}
const
std
::
string
&
Argument
::
name
()
const
{
return
name_
;
}
void
Argument
::
set_name
(
const
std
::
string
&
value
)
{
name_
=
value
;
}
bool
Argument
::
has_f
()
const
{
return
(
has_bits_
&
0x00000001u
)
!=
0
;
}
void
Argument
::
set_has_f
()
{
has_bits_
|=
0x00000001u
;
}
float
Argument
::
f
()
const
{
return
f_
;
}
void
Argument
::
set_f
(
float
value
)
{
set_has_f
();
f_
=
value
;
}
bool
Argument
::
has_i
()
const
{
return
(
has_bits_
&
0x00000002u
)
!=
0
;
}
void
Argument
::
set_has_i
()
{
has_bits_
|=
0x00000002u
;
}
int64_t
Argument
::
i
()
const
{
return
i_
;
}
bool
Argument
::
has_i
()
const
{
return
(
has_bits_
&
0x00000002u
)
!=
0
;
}
void
Argument
::
set_has_i
()
{
has_bits_
|=
0x00000002u
;
}
int64_t
Argument
::
i
()
const
{
return
i_
;
}
void
Argument
::
set_i
(
int64_t
value
)
{
set_has_i
();
i_
=
value
;
}
bool
Argument
::
has_s
()
const
{
return
(
has_bits_
&
0x00000004u
)
!=
0
;
}
void
Argument
::
set_has_s
()
{
has_bits_
|=
0x00000004u
;
}
std
::
string
Argument
::
s
()
const
{
return
s_
;
}
bool
Argument
::
has_s
()
const
{
return
(
has_bits_
&
0x00000004u
)
!=
0
;
}
void
Argument
::
set_has_s
()
{
has_bits_
|=
0x00000004u
;
}
std
::
string
Argument
::
s
()
const
{
return
s_
;
}
void
Argument
::
set_s
(
const
std
::
string
&
value
)
{
set_has_s
();
s_
=
value
;
}
const
std
::
vector
<
float
>
&
Argument
::
floats
()
const
{
return
floats_
;
}
void
Argument
::
add_floats
(
float
value
)
{
floats_
.
push_back
(
value
);
}
const
std
::
vector
<
float
>
&
Argument
::
floats
()
const
{
return
floats_
;
}
void
Argument
::
add_floats
(
float
value
)
{
floats_
.
push_back
(
value
);
}
void
Argument
::
set_floats
(
const
std
::
vector
<
float
>
&
value
)
{
floats_
.
resize
(
value
.
size
());
std
::
copy
(
value
.
begin
(),
value
.
end
(),
floats_
.
begin
());
}
const
std
::
vector
<
int64_t
>
&
Argument
::
ints
()
const
{
return
ints_
;
}
void
Argument
::
add_ints
(
int64_t
value
)
{
ints_
.
push_back
(
value
);
}
const
std
::
vector
<
int64_t
>
&
Argument
::
ints
()
const
{
return
ints_
;
}
void
Argument
::
add_ints
(
int64_t
value
)
{
ints_
.
push_back
(
value
);
}
void
Argument
::
set_ints
(
const
std
::
vector
<
int64_t
>
&
value
)
{
ints_
.
resize
(
value
.
size
());
std
::
copy
(
value
.
begin
(),
value
.
end
(),
ints_
.
begin
());
}
const
std
::
vector
<
std
::
string
>
&
Argument
::
strings
()
const
{
return
strings_
;
}
const
std
::
vector
<
std
::
string
>
&
Argument
::
strings
()
const
{
return
strings_
;
}
void
Argument
::
add_strings
(
const
::
std
::
string
&
value
)
{
strings_
.
push_back
(
value
);
}
...
...
@@ -156,31 +112,21 @@ void NodeInput::CopyFrom(const NodeInput &from) {
node_id_
=
from
.
node_id
();
output_port_
=
from
.
output_port
();
}
int
NodeInput
::
node_id
()
const
{
return
node_id_
;
}
void
NodeInput
::
set_node_id
(
int
node_id
)
{
node_id_
=
node_id
;
}
int
NodeInput
::
output_port
()
const
{
return
output_port_
;
}
void
NodeInput
::
set_output_port
(
int
output_port
)
{
output_port_
=
output_port
;
}
int
NodeInput
::
node_id
()
const
{
return
node_id_
;
}
void
NodeInput
::
set_node_id
(
int
node_id
)
{
node_id_
=
node_id
;
}
int
NodeInput
::
output_port
()
const
{
return
output_port_
;
}
void
NodeInput
::
set_output_port
(
int
output_port
)
{
output_port_
=
output_port
;
}
// OutputShape
OutputShape
::
OutputShape
()
{}
OutputShape
::
OutputShape
(
const
std
::
vector
<
int64_t
>
&
dims
)
:
dims_
(
dims
.
begin
(),
dims
.
end
())
{}
OutputShape
::
OutputShape
(
const
std
::
vector
<
int64_t
>
&
dims
)
:
dims_
(
dims
.
begin
(),
dims
.
end
())
{}
void
OutputShape
::
CopyFrom
(
const
OutputShape
&
from
)
{
auto
from_dims
=
from
.
dims
();
dims_
.
resize
(
from_dims
.
size
());
std
::
copy
(
from_dims
.
begin
(),
from_dims
.
end
(),
dims_
.
begin
());
}
const
std
::
vector
<
int64_t
>
&
OutputShape
::
dims
()
const
{
return
dims_
;
}
const
std
::
vector
<
int64_t
>
&
OutputShape
::
dims
()
const
{
return
dims_
;
}
// Operator Def
void
OperatorDef
::
CopyFrom
(
const
OperatorDef
&
from
)
{
...
...
@@ -220,68 +166,38 @@ void OperatorDef::CopyFrom(const OperatorDef &from) {
}
auto
from_out_max_byte_size
=
from
.
out_max_byte_size
();
out_max_byte_size_
.
resize
(
from_out_max_byte_size
.
size
());
std
::
copy
(
from_out_max_byte_size
.
begin
(),
from_out_max_byte_size
.
end
(),
std
::
copy
(
from_out_max_byte_size
.
begin
(),
from_out_max_byte_size
.
end
(),
out_max_byte_size_
.
begin
());
has_bits_
=
from
.
has_bits_
;
}
const
std
::
string
&
OperatorDef
::
name
()
const
{
return
name_
;
}
const
std
::
string
&
OperatorDef
::
name
()
const
{
return
name_
;
}
void
OperatorDef
::
set_name
(
const
std
::
string
&
name_
)
{
set_has_name
();
OperatorDef
::
name_
=
name_
;
}
bool
OperatorDef
::
has_name
()
const
{
return
(
has_bits_
&
0x00000001u
)
!=
0
;
}
void
OperatorDef
::
set_has_name
()
{
has_bits_
|=
0x00000001u
;
}
const
std
::
string
&
OperatorDef
::
type
()
const
{
return
type_
;
}
bool
OperatorDef
::
has_name
()
const
{
return
(
has_bits_
&
0x00000001u
)
!=
0
;
}
void
OperatorDef
::
set_has_name
()
{
has_bits_
|=
0x00000001u
;
}
const
std
::
string
&
OperatorDef
::
type
()
const
{
return
type_
;
}
void
OperatorDef
::
set_type
(
const
std
::
string
&
type_
)
{
set_has_type
();
OperatorDef
::
type_
=
type_
;
}
bool
OperatorDef
::
has_type
()
const
{
return
(
has_bits_
&
0x00000002u
)
!=
0
;
}
void
OperatorDef
::
set_has_type
()
{
has_bits_
|=
0x00000002u
;
}
int
OperatorDef
::
mem_id
()
const
{
return
mem_id_
;
}
bool
OperatorDef
::
has_type
()
const
{
return
(
has_bits_
&
0x00000002u
)
!=
0
;
}
void
OperatorDef
::
set_has_type
()
{
has_bits_
|=
0x00000002u
;
}
int
OperatorDef
::
mem_id
()
const
{
return
mem_id_
;
}
void
OperatorDef
::
set_mem_id
(
const
int
mem_id
)
{
set_has_mem_id
();
mem_id_
=
mem_id
;
}
bool
OperatorDef
::
has_mem_id
()
const
{
return
(
has_bits_
&
0x00000004u
)
!=
0
;
}
void
OperatorDef
::
set_has_mem_id
()
{
has_bits_
|=
0x00000004u
;
}
uint32_t
OperatorDef
::
node_id
()
const
{
return
node_id_
;
}
void
OperatorDef
::
set_node_id
(
uint32_t
node_id
)
{
node_id_
=
node_id
;
}
uint32_t
OperatorDef
::
op_id
()
const
{
return
op_id_
;
}
uint32_t
OperatorDef
::
padding
()
const
{
return
padding_
;
}
void
OperatorDef
::
set_padding
(
uint32_t
padding
)
{
padding_
=
padding
;
}
bool
OperatorDef
::
has_mem_id
()
const
{
return
(
has_bits_
&
0x00000004u
)
!=
0
;
}
void
OperatorDef
::
set_has_mem_id
()
{
has_bits_
|=
0x00000004u
;
}
uint32_t
OperatorDef
::
node_id
()
const
{
return
node_id_
;
}
void
OperatorDef
::
set_node_id
(
uint32_t
node_id
)
{
node_id_
=
node_id
;
}
uint32_t
OperatorDef
::
op_id
()
const
{
return
op_id_
;
}
uint32_t
OperatorDef
::
padding
()
const
{
return
padding_
;
}
void
OperatorDef
::
set_padding
(
uint32_t
padding
)
{
padding_
=
padding
;
}
const
std
::
vector
<
NodeInput
>
&
OperatorDef
::
node_input
()
const
{
return
node_input_
;
}
...
...
@@ -294,9 +210,7 @@ const std::vector<int> &OperatorDef::out_max_byte_size() const {
void
OperatorDef
::
add_out_max_byte_size
(
int
value
)
{
out_max_byte_size_
.
push_back
(
value
);
}
const
std
::
vector
<
std
::
string
>
&
OperatorDef
::
input
()
const
{
return
input_
;
}
const
std
::
vector
<
std
::
string
>
&
OperatorDef
::
input
()
const
{
return
input_
;
}
const
std
::
string
&
OperatorDef
::
input
(
int
index
)
const
{
MACE_CHECK
(
0
<=
index
&&
index
<=
input_
.
size
());
return
input_
[
index
];
...
...
@@ -308,16 +222,12 @@ std::string *OperatorDef::add_input() {
void
OperatorDef
::
add_input
(
const
::
std
::
string
&
value
)
{
input_
.
push_back
(
value
);
}
void
OperatorDef
::
add_input
(
::
std
::
string
&&
value
)
{
input_
.
push_back
(
value
);
}
void
OperatorDef
::
add_input
(
::
std
::
string
&&
value
)
{
input_
.
push_back
(
value
);
}
void
OperatorDef
::
set_input
(
const
std
::
vector
<
std
::
string
>
&
value
)
{
input_
.
resize
(
value
.
size
());
std
::
copy
(
value
.
begin
(),
value
.
end
(),
input_
.
begin
());
}
const
std
::
vector
<
std
::
string
>
&
OperatorDef
::
output
()
const
{
return
output_
;
}
const
std
::
vector
<
std
::
string
>
&
OperatorDef
::
output
()
const
{
return
output_
;
}
const
std
::
string
&
OperatorDef
::
output
(
int
index
)
const
{
MACE_CHECK
(
0
<=
index
&&
index
<=
output_
.
size
());
return
output_
[
index
];
...
...
@@ -336,9 +246,7 @@ void OperatorDef::set_output(const std::vector<std::string> &value) {
output_
.
resize
(
value
.
size
());
std
::
copy
(
value
.
begin
(),
value
.
end
(),
output_
.
begin
());
}
const
std
::
vector
<
Argument
>
&
OperatorDef
::
arg
()
const
{
return
arg_
;
}
const
std
::
vector
<
Argument
>
&
OperatorDef
::
arg
()
const
{
return
arg_
;
}
Argument
*
OperatorDef
::
add_arg
()
{
arg_
.
emplace_back
(
Argument
());
return
&
arg_
.
back
();
...
...
@@ -358,18 +266,12 @@ void OperatorDef::set_output_type(const std::vector<DataType> &value) {
}
// MemoryBlock
MemoryBlock
::
MemoryBlock
(
int
mem_id
,
uint32_t
x
,
uint32_t
y
)
:
mem_id_
(
mem_id
),
x_
(
x
),
y_
(
y
)
{}
MemoryBlock
::
MemoryBlock
(
int
mem_id
,
uint32_t
x
,
uint32_t
y
)
:
mem_id_
(
mem_id
),
x_
(
x
),
y_
(
y
)
{}
int
MemoryBlock
::
mem_id
()
const
{
return
mem_id_
;
}
uint32_t
MemoryBlock
::
x
()
const
{
return
x_
;
}
uint32_t
MemoryBlock
::
y
()
const
{
return
y_
;
}
int
MemoryBlock
::
mem_id
()
const
{
return
mem_id_
;
}
uint32_t
MemoryBlock
::
x
()
const
{
return
x_
;
}
uint32_t
MemoryBlock
::
y
()
const
{
return
y_
;
}
// MemoryArena
const
std
::
vector
<
MemoryBlock
>
&
MemoryArena
::
mem_block
()
const
{
...
...
@@ -378,131 +280,69 @@ const std::vector<MemoryBlock> &MemoryArena::mem_block() const {
std
::
vector
<
MemoryBlock
>
&
MemoryArena
::
mutable_mem_block
()
{
return
mem_block_
;
}
int
MemoryArena
::
mem_block_size
()
const
{
return
mem_block_
.
size
();
}
int
MemoryArena
::
mem_block_size
()
const
{
return
mem_block_
.
size
();
}
// InputInfo
const
std
::
string
&
InputInfo
::
name
()
const
{
return
name_
;
}
int32_t
InputInfo
::
node_id
()
const
{
return
node_id_
;
}
int32_t
InputInfo
::
max_byte_size
()
const
{
return
max_byte_size_
;
}
DataType
InputInfo
::
data_type
()
const
{
return
data_type_
;
}
const
std
::
vector
<
int32_t
>
&
InputInfo
::
dims
()
const
{
return
dims_
;
}
const
std
::
string
&
InputInfo
::
name
()
const
{
return
name_
;
}
int32_t
InputInfo
::
node_id
()
const
{
return
node_id_
;
}
int32_t
InputInfo
::
max_byte_size
()
const
{
return
max_byte_size_
;
}
DataType
InputInfo
::
data_type
()
const
{
return
data_type_
;
}
const
std
::
vector
<
int32_t
>
&
InputInfo
::
dims
()
const
{
return
dims_
;
}
// OutputInfo
const
std
::
string
&
OutputInfo
::
name
()
const
{
return
name_
;
}
int32_t
OutputInfo
::
node_id
()
const
{
return
node_id_
;
}
int32_t
OutputInfo
::
max_byte_size
()
const
{
return
max_byte_size_
;
}
DataType
OutputInfo
::
data_type
()
const
{
return
data_type_
;
}
void
OutputInfo
::
set_data_type
(
DataType
data_type
)
{
data_type_
=
data_type
;
}
const
std
::
vector
<
int32_t
>
&
OutputInfo
::
dims
()
const
{
return
dims_
;
}
void
OutputInfo
::
set_dims
(
const
std
::
vector
<
int32_t
>
&
dims
)
{
dims_
=
dims
;
}
const
std
::
string
&
OutputInfo
::
name
()
const
{
return
name_
;
}
int32_t
OutputInfo
::
node_id
()
const
{
return
node_id_
;
}
int32_t
OutputInfo
::
max_byte_size
()
const
{
return
max_byte_size_
;
}
DataType
OutputInfo
::
data_type
()
const
{
return
data_type_
;
}
void
OutputInfo
::
set_data_type
(
DataType
data_type
)
{
data_type_
=
data_type
;
}
const
std
::
vector
<
int32_t
>
&
OutputInfo
::
dims
()
const
{
return
dims_
;
}
void
OutputInfo
::
set_dims
(
const
std
::
vector
<
int32_t
>
&
dims
)
{
dims_
=
dims
;
}
// NetDef
NetDef
::
NetDef
()
:
has_bits_
(
0
)
{}
const
std
::
string
&
NetDef
::
name
()
const
{
return
name_
;
}
const
std
::
string
&
NetDef
::
name
()
const
{
return
name_
;
}
void
NetDef
::
set_name
(
const
std
::
string
&
value
)
{
set_has_name
();
name_
=
value
;
}
bool
NetDef
::
has_name
()
const
{
return
(
has_bits_
&
0x00000001u
)
!=
0
;
}
void
NetDef
::
set_has_name
()
{
has_bits_
|=
0x00000001u
;
}
const
std
::
string
&
NetDef
::
version
()
const
{
return
version_
;
}
bool
NetDef
::
has_name
()
const
{
return
(
has_bits_
&
0x00000001u
)
!=
0
;
}
void
NetDef
::
set_has_name
()
{
has_bits_
|=
0x00000001u
;
}
const
std
::
string
&
NetDef
::
version
()
const
{
return
version_
;
}
void
NetDef
::
set_version
(
const
std
::
string
&
value
)
{
set_has_version
();
version_
=
value
;
}
bool
NetDef
::
has_version
()
const
{
return
(
has_bits_
&
0x00000002u
)
!=
0
;
}
void
NetDef
::
set_has_version
()
{
has_bits_
|=
0x00000002u
;
}
const
std
::
vector
<
OperatorDef
>
&
NetDef
::
op
()
const
{
return
op_
;
}
bool
NetDef
::
has_version
()
const
{
return
(
has_bits_
&
0x00000002u
)
!=
0
;
}
void
NetDef
::
set_has_version
()
{
has_bits_
|=
0x00000002u
;
}
const
std
::
vector
<
OperatorDef
>
&
NetDef
::
op
()
const
{
return
op_
;
}
OperatorDef
*
NetDef
::
add_op
()
{
op_
.
emplace_back
(
OperatorDef
());
return
&
op_
.
back
();
}
std
::
vector
<
OperatorDef
>
&
NetDef
::
mutable_op
()
{
return
op_
;
}
const
std
::
vector
<
Argument
>
&
NetDef
::
arg
()
const
{
return
arg_
;
}
std
::
vector
<
OperatorDef
>
&
NetDef
::
mutable_op
()
{
return
op_
;
}
const
std
::
vector
<
Argument
>
&
NetDef
::
arg
()
const
{
return
arg_
;
}
Argument
*
NetDef
::
add_arg
()
{
arg_
.
emplace_back
(
Argument
());
return
&
arg_
.
back
();
}
std
::
vector
<
Argument
>
&
NetDef
::
mutable_arg
()
{
return
arg_
;
}
const
std
::
vector
<
ConstTensor
>
&
NetDef
::
tensors
()
const
{
return
tensors_
;
}
std
::
vector
<
ConstTensor
>
&
NetDef
::
mutable_tensors
()
{
return
tensors_
;
}
const
MemoryArena
&
NetDef
::
mem_arena
()
const
{
return
mem_arena_
;
}
std
::
vector
<
Argument
>
&
NetDef
::
mutable_arg
()
{
return
arg_
;
}
const
std
::
vector
<
ConstTensor
>
&
NetDef
::
tensors
()
const
{
return
tensors_
;
}
std
::
vector
<
ConstTensor
>
&
NetDef
::
mutable_tensors
()
{
return
tensors_
;
}
const
MemoryArena
&
NetDef
::
mem_arena
()
const
{
return
mem_arena_
;
}
MemoryArena
&
NetDef
::
mutable_mem_arena
()
{
set_has_mem_arena
();
return
mem_arena_
;
}
bool
NetDef
::
has_mem_arena
()
const
{
return
(
has_bits_
&
0x00000004u
)
!=
0
;
}
void
NetDef
::
set_has_mem_arena
()
{
has_bits_
|=
0x00000004u
;
}
const
std
::
vector
<
InputInfo
>
&
NetDef
::
input_info
()
const
{
return
input_info_
;
}
bool
NetDef
::
has_mem_arena
()
const
{
return
(
has_bits_
&
0x00000004u
)
!=
0
;
}
void
NetDef
::
set_has_mem_arena
()
{
has_bits_
|=
0x00000004u
;
}
const
std
::
vector
<
InputInfo
>
&
NetDef
::
input_info
()
const
{
return
input_info_
;
}
const
std
::
vector
<
OutputInfo
>
&
NetDef
::
output_info
()
const
{
return
output_info_
;
}
std
::
vector
<
OutputInfo
>
&
NetDef
::
mutable_output_info
()
{
return
output_info_
;
}
std
::
vector
<
OutputInfo
>
&
NetDef
::
mutable_output_info
()
{
return
output_info_
;
}
int
NetDef
::
op_size
()
const
{
return
op_
.
size
();
}
int
NetDef
::
op_size
()
const
{
return
op_
.
size
();
}
const
OperatorDef
&
NetDef
::
op
(
const
int
idx
)
const
{
MACE_CHECK
(
0
<=
idx
&&
idx
<
op_size
());
...
...
@@ -510,26 +350,27 @@ const OperatorDef &NetDef::op(const int idx) const {
}
// Mace Engine
MaceEngine
::
MaceEngine
(
const
NetDef
*
net_def
,
DeviceType
device_type
)
:
op_registry_
(
new
OperatorRegistry
()),
device_type_
(
device_type
),
ws_
(
new
Workspace
()),
net_
(
nullptr
),
hexagon_controller_
(
nullptr
)
{
ws_
->
CreateTensor
(
"mace_input_node:0"
,
GetDeviceAllocator
(
device_type_
),
MaceEngine
::
MaceEngine
(
const
NetDef
*
net_def
,
DeviceType
device_type
)
:
op_registry_
(
new
OperatorRegistry
()),
device_type_
(
device_type
),
ws_
(
new
Workspace
()),
net_
(
nullptr
),
hexagon_controller_
(
nullptr
)
{
ws_
->
CreateTensor
(
"mace_input_node:0"
,
GetDeviceAllocator
(
device_type_
),
DT_FLOAT
);
ws_
->
CreateTensor
(
"mace_output_node:0"
,
GetDeviceAllocator
(
device_type_
),
ws_
->
CreateTensor
(
"mace_output_node:0"
,
GetDeviceAllocator
(
device_type_
),
DT_FLOAT
);
if
(
device_type
==
HEXAGON
)
{
hexagon_controller_
.
reset
(
new
HexagonControlWrapper
());
MACE_CHECK
(
hexagon_controller_
->
Config
(),
"hexagon config error"
);
MACE_CHECK
(
hexagon_controller_
->
Init
(),
"hexagon init error"
);
hexagon_controller_
->
SetDebugLevel
(
static_cast
<
int
>
(
mace
::
logging
::
LogMessage
::
MinVLogLevel
()));
int
dsp_mode
=
ArgumentHelper
::
GetSingleArgument
<
NetDef
,
int
>
(
*
net_def
,
"dsp_mode"
,
0
);
static_cast
<
int
>
(
mace
::
logging
::
LogMessage
::
MinVLogLevel
()));
int
dsp_mode
=
ArgumentHelper
::
GetSingleArgument
<
NetDef
,
int
>
(
*
net_def
,
"dsp_mode"
,
0
);
hexagon_controller_
->
SetGraphMode
(
dsp_mode
);
MACE_CHECK
(
hexagon_controller_
->
SetupGraph
(
*
net_def
),
"hexagon setup graph error"
);
"hexagon setup graph error"
);
if
(
VLOG_IS_ON
(
2
))
{
hexagon_controller_
->
PrintGraph
();
}
...
...
@@ -537,8 +378,8 @@ MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type) :
ws_
->
LoadModelTensor
(
*
net_def
,
device_type
);
// Init model
auto
net
=
CreateNet
(
op_registry_
,
*
net_def
,
ws_
.
get
(),
device_type
,
NetMode
::
INIT
);
auto
net
=
CreateNet
(
op_registry_
,
*
net_def
,
ws_
.
get
(),
device_type
,
NetMode
::
INIT
);
if
(
!
net
->
Run
())
{
LOG
(
FATAL
)
<<
"Net init run failed"
;
}
...
...
@@ -548,18 +389,19 @@ MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type) :
MaceEngine
::
MaceEngine
(
const
NetDef
*
net_def
,
DeviceType
device_type
,
const
std
::
vector
<
std
::
string
>
&
input_nodes
,
const
std
::
vector
<
std
::
string
>
&
output_nodes
)
:
op_registry_
(
new
OperatorRegistry
()),
device_type_
(
device_type
),
ws_
(
new
Workspace
()),
net_
(
nullptr
),
hexagon_controller_
(
nullptr
)
{
const
std
::
vector
<
std
::
string
>
&
output_nodes
)
:
op_registry_
(
new
OperatorRegistry
()),
device_type_
(
device_type
),
ws_
(
new
Workspace
()),
net_
(
nullptr
),
hexagon_controller_
(
nullptr
)
{
for
(
auto
input_name
:
input_nodes
)
{
ws_
->
CreateTensor
(
MakeString
(
"mace_input_node_"
,
input_name
,
":0"
),
GetDeviceAllocator
(
device_type_
),
DT_FLOAT
);
GetDeviceAllocator
(
device_type_
),
DT_FLOAT
);
}
for
(
auto
output_name
:
output_nodes
)
{
ws_
->
CreateTensor
(
MakeString
(
"mace_output_node_"
,
output_name
,
":0"
),
GetDeviceAllocator
(
device_type_
),
DT_FLOAT
);
GetDeviceAllocator
(
device_type_
),
DT_FLOAT
);
}
if
(
device_type
==
HEXAGON
)
{
hexagon_controller_
.
reset
(
new
HexagonControlWrapper
());
...
...
@@ -567,8 +409,8 @@ MaceEngine::MaceEngine(const NetDef *net_def,
MACE_CHECK
(
hexagon_controller_
->
Init
(),
"hexagon init error"
);
hexagon_controller_
->
SetDebugLevel
(
static_cast
<
int
>
(
mace
::
logging
::
LogMessage
::
MinVLogLevel
()));
int
dsp_mode
=
ArgumentHelper
::
GetSingleArgument
<
NetDef
,
int
>
(
*
net_def
,
"dsp_mode"
,
0
);
int
dsp_mode
=
ArgumentHelper
::
GetSingleArgument
<
NetDef
,
int
>
(
*
net_def
,
"dsp_mode"
,
0
);
hexagon_controller_
->
SetGraphMode
(
dsp_mode
);
MACE_CHECK
(
hexagon_controller_
->
SetupGraph
(
*
net_def
),
"hexagon setup graph error"
);
...
...
@@ -579,14 +421,13 @@ MaceEngine::MaceEngine(const NetDef *net_def,
ws_
->
LoadModelTensor
(
*
net_def
,
device_type
);
// Init model
auto
net
=
CreateNet
(
op_registry_
,
*
net_def
,
ws_
.
get
(),
device_type
,
NetMode
::
INIT
);
auto
net
=
CreateNet
(
op_registry_
,
*
net_def
,
ws_
.
get
(),
device_type
,
NetMode
::
INIT
);
if
(
!
net
->
Run
())
{
LOG
(
FATAL
)
<<
"Net init run failed"
;
}
net_
=
std
::
move
(
CreateNet
(
op_registry_
,
*
net_def
,
ws_
.
get
(),
device_type
));
}
}
MaceEngine
::~
MaceEngine
()
{
if
(
device_type_
==
HEXAGON
)
{
...
...
@@ -643,10 +484,11 @@ bool MaceEngine::Run(const float *input,
bool
MaceEngine
::
Run
(
const
std
::
vector
<
MaceInputInfo
>
&
inputs
,
std
::
map
<
std
::
string
,
float
*>
&
outputs
,
RunMetadata
*
run_metadata
)
{
MACE_CHECK
(
device_type_
!=
HEXAGON
,
"HEXAGON not supports multiple outputs now"
);
MACE_CHECK
(
device_type_
!=
HEXAGON
,
"HEXAGON not supports multiple outputs now"
);
for
(
auto
input
:
inputs
)
{
Tensor
*
input_tensor
=
ws_
->
GetTensor
(
MakeString
(
"mace_input_node_"
,
input
.
name
,
":0"
));
Tensor
*
input_tensor
=
ws_
->
GetTensor
(
MakeString
(
"mace_input_node_"
,
input
.
name
,
":0"
));
input_tensor
->
Resize
(
input
.
shape
);
{
Tensor
::
MappingGuard
input_guard
(
input_tensor
);
...
...
@@ -658,7 +500,8 @@ bool MaceEngine::Run(const std::vector<MaceInputInfo> &inputs,
LOG
(
FATAL
)
<<
"Net run failed"
;
}
for
(
auto
output
:
outputs
)
{
Tensor
*
output_tensor
=
ws_
->
GetTensor
(
MakeString
(
"mace_output_node_"
,
output
.
first
+
":0"
));
Tensor
*
output_tensor
=
ws_
->
GetTensor
(
MakeString
(
"mace_output_node_"
,
output
.
first
+
":0"
));
// save output
if
(
output_tensor
!=
nullptr
&&
output
.
second
!=
nullptr
)
{
Tensor
::
MappingGuard
output_guard
(
output_tensor
);
...
...
mace/core/net.cc
浏览文件 @
6b9aafd4
...
...
@@ -3,9 +3,9 @@
//
#include "mace/core/net.h"
#include "mace/utils/utils.h"
#include "mace/utils/timer.h"
#include "mace/utils/memory_logging.h"
#include "mace/utils/timer.h"
#include "mace/utils/utils.h"
namespace
mace
{
...
...
@@ -20,8 +20,7 @@ SerialNet::SerialNet(const std::shared_ptr<const OperatorRegistry> op_registry,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
)
:
NetBase
(
op_registry
,
net_def
,
ws
,
type
),
device_type_
(
type
)
{
:
NetBase
(
op_registry
,
net_def
,
ws
,
type
),
device_type_
(
type
)
{
MACE_LATENCY_LOGGER
(
1
,
"Constructing SerialNet "
,
net_def
->
name
());
for
(
int
idx
=
0
;
idx
<
net_def
->
op_size
();
++
idx
)
{
const
auto
&
operator_def
=
net_def
->
op
(
idx
);
...
...
@@ -41,8 +40,8 @@ bool SerialNet::Run(RunMetadata *run_metadata) {
MACE_LATENCY_LOGGER
(
1
,
"Running net"
);
for
(
auto
iter
=
operators_
.
begin
();
iter
!=
operators_
.
end
();
++
iter
)
{
auto
&
op
=
*
iter
;
MACE_LATENCY_LOGGER
(
2
,
"Running operator "
,
op
->
debug_def
().
name
(),
"("
,
op
->
debug_def
().
type
(),
")"
);
MACE_LATENCY_LOGGER
(
2
,
"Running operator "
,
op
->
debug_def
().
name
(),
"("
,
op
->
debug_def
().
type
(),
")"
);
bool
future_wait
=
(
device_type_
==
DeviceType
::
OPENCL
&&
(
run_metadata
!=
nullptr
||
std
::
distance
(
iter
,
operators_
.
end
())
==
1
));
...
...
@@ -99,7 +98,8 @@ std::unique_ptr<NetBase> CreateNet(
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
)
{
std
::
unique_ptr
<
NetBase
>
net
(
new
SerialNet
(
op_registry
,
net_def
,
ws
,
type
,
mode
));
std
::
unique_ptr
<
NetBase
>
net
(
new
SerialNet
(
op_registry
,
net_def
,
ws
,
type
,
mode
));
return
net
;
}
...
...
mace/core/operator.h
浏览文件 @
6b9aafd4
...
...
@@ -7,10 +7,10 @@
#include "mace/core/arg_helper.h"
#include "mace/core/future.h"
#include "mace/public/mace.h"
#include "mace/core/registry.h"
#include "mace/core/tensor.h"
#include "mace/core/workspace.h"
#include "mace/public/mace.h"
namespace
mace
{
...
...
@@ -147,7 +147,7 @@ OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name) {
class
OperatorRegistry
{
public:
typedef
Registry
<
std
::
string
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*>
RegistryType
;
RegistryType
;
OperatorRegistry
();
~
OperatorRegistry
()
=
default
;
RegistryType
*
registry
()
{
return
&
registry_
;
};
...
...
mace/core/preallocated_pooled_allocator.h
浏览文件 @
6b9aafd4
...
...
@@ -36,6 +36,6 @@ class PreallocatedPooledAllocator {
std
::
unordered_map
<
int
,
std
::
unique_ptr
<
BufferBase
>>
buffers_
;
};
}
// namespace mace
}
// namespace mace
#endif // MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
#endif
// MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
mace/core/runtime/hexagon/hexagon_control_wrapper.cc
浏览文件 @
6b9aafd4
...
...
@@ -2,19 +2,19 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <vector>
#include <thread>
#include <sys/time.h>
#include <thread>
#include <vector>
#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
#include "mace/core/runtime/hexagon/hexagon_nn_ops.h"
namespace
{
inline
int64_t
NowMicros
()
{
struct
timeval
tv
;
gettimeofday
(
&
tv
,
nullptr
);
return
static_cast
<
int64_t
>
(
tv
.
tv_sec
)
*
1000000
+
tv
.
tv_usec
;
}
inline
int64_t
NowMicros
()
{
struct
timeval
tv
;
gettimeofday
(
&
tv
,
nullptr
);
return
static_cast
<
int64_t
>
(
tv
.
tv_sec
)
*
1000000
+
tv
.
tv_usec
;
}
}
namespace
mace
{
...
...
@@ -63,9 +63,9 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) {
// const node
std
::
thread
const_thread
([
&
]()
{
std
::
vector
<
hexagon_nn_const_node
>
const_node_list
;
for
(
const
ConstTensor
&
const_tensor
:
net_def
.
tensors
())
{
for
(
const
ConstTensor
&
const_tensor
:
net_def
.
tensors
())
{
std
::
vector
<
int
>
tensor_shape
(
const_tensor
.
dims
().
begin
(),
const_tensor
.
dims
().
end
());
const_tensor
.
dims
().
end
());
while
(
tensor_shape
.
size
()
<
4
)
{
tensor_shape
.
insert
(
tensor_shape
.
begin
(),
1
);
}
...
...
@@ -77,32 +77,32 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) {
const_node
.
tensor
.
width
=
tensor_shape
[
2
];
const_node
.
tensor
.
depth
=
tensor_shape
[
3
];
if
(
const_tensor
.
data_type
()
==
DataType
::
DT_INT32
&&
const_tensor
.
data_size
()
==
0
)
{
if
(
const_tensor
.
data_type
()
==
DataType
::
DT_INT32
&&
const_tensor
.
data_size
()
==
0
)
{
const_node
.
tensor
.
data
=
NULL
;
const_node
.
tensor
.
dataLen
=
0
;
}
else
{
const_node
.
tensor
.
data
=
const_cast
<
unsigned
char
*>
(
const_tensor
.
data
());
const_node
.
tensor
.
dataLen
=
const_tensor
.
data_size
()
*
GetEnumTypeSize
(
const_tensor
.
data_type
());
const_cast
<
unsigned
char
*>
(
const_tensor
.
data
());
const_node
.
tensor
.
dataLen
=
const_tensor
.
data_size
()
*
GetEnumTypeSize
(
const_tensor
.
data_type
());
}
const_node_list
.
push_back
(
const_node
);
// 255 is magic number: why fastrpc limits sequence length to that?
if
(
const_node_list
.
size
()
>=
250
)
{
MACE_CHECK
(
hexagon_nn_append_const_node_list
(
nn_id_
,
const_node_list
.
data
(),
const_node_list
.
size
())
==
0
,
"append const node error"
);
MACE_CHECK
(
hexagon_nn_append_const_node_list
(
nn_id_
,
const_node_list
.
data
(),
const_node_list
.
size
())
==
0
,
"append const node error"
);
const_node_list
.
clear
();
}
}
if
(
!
const_node_list
.
empty
())
{
MACE_CHECK
(
hexagon_nn_append_const_node_list
(
nn_id_
,
const_node_list
.
data
(),
const_node_list
.
size
())
==
0
,
"append const node error"
);
MACE_CHECK
(
hexagon_nn_append_const_node_list
(
nn_id_
,
const_node_list
.
data
(),
const_node_list
.
size
())
==
0
,
"append const node error"
);
}
const_node_list
.
clear
();
});
...
...
@@ -117,7 +117,7 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) {
std
::
vector
<
hexagon_nn_input
>
inputs
;
std
::
vector
<
hexagon_nn_output
>
outputs
;
for
(
const
OperatorDef
&
op
:
net_def
.
op
())
{
for
(
const
OperatorDef
&
op
:
net_def
.
op
())
{
int
op_id
=
op_map
.
GetOpId
(
op
.
type
());
inputs
.
resize
(
op
.
node_input
().
size
());
for
(
size_t
i
=
0
;
i
<
op
.
node_input
().
size
();
++
i
)
{
...
...
@@ -131,9 +131,8 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) {
cached_inputs
.
push_back
(
inputs
);
cached_outputs
.
push_back
(
outputs
);
hexagon_nn_padding_type
padding_type
=
static_cast
<
hexagon_nn_padding_type
>
(
op
.
padding
());
hexagon_nn_padding_type
padding_type
=
static_cast
<
hexagon_nn_padding_type
>
(
op
.
padding
());
hexagon_nn_op_node
op_node
;
op_node
.
node_id
=
node_id
(
op
.
node_id
());
...
...
@@ -146,8 +145,7 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) {
op_node_list
.
push_back
(
op_node
);
if
(
op_node_list
.
size
()
>=
125
)
{
MACE_CHECK
(
hexagon_nn_append_node_list
(
nn_id_
,
op_node_list
.
data
(),
MACE_CHECK
(
hexagon_nn_append_node_list
(
nn_id_
,
op_node_list
.
data
(),
op_node_list
.
size
())
==
0
,
"append node error"
);
op_node_list
.
clear
();
...
...
@@ -157,8 +155,7 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) {
}
if
(
!
op_node_list
.
empty
())
{
MACE_CHECK
(
hexagon_nn_append_node_list
(
nn_id_
,
op_node_list
.
data
(),
MACE_CHECK
(
hexagon_nn_append_node_list
(
nn_id_
,
op_node_list
.
data
(),
op_node_list
.
size
())
==
0
,
"append node error"
);
}
...
...
@@ -172,10 +169,10 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) {
// input info
num_inputs_
=
0
;
for
(
const
InputInfo
&
input_info
:
net_def
.
input_info
())
{
for
(
const
InputInfo
&
input_info
:
net_def
.
input_info
())
{
std
::
vector
<
index_t
>
input_shape
;
input_shape
.
insert
(
input_shape
.
begin
(),
input_info
.
dims
().
begin
(),
input_info
.
dims
().
end
());
input_shape
.
insert
(
input_shape
.
begin
(),
input_info
.
dims
().
begin
(),
input_info
.
dims
().
end
());
while
(
input_shape
.
size
()
<
4
)
{
input_shape
.
insert
(
input_shape
.
begin
(),
1
);
}
...
...
@@ -186,10 +183,10 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def) {
// output info
num_outputs_
=
0
;
for
(
const
OutputInfo
&
output_info
:
net_def
.
output_info
())
{
for
(
const
OutputInfo
&
output_info
:
net_def
.
output_info
())
{
std
::
vector
<
index_t
>
output_shape
;
output_shape
.
insert
(
output_shape
.
begin
(),
output_info
.
dims
().
begin
(),
output_info
.
dims
().
end
());
output_shape
.
insert
(
output_shape
.
begin
(),
output_info
.
dims
().
begin
(),
output_info
.
dims
().
end
());
while
(
output_shape
.
size
()
<
4
)
{
output_shape
.
insert
(
output_shape
.
begin
(),
1
);
}
...
...
@@ -218,27 +215,27 @@ bool HexagonControlWrapper::TeardownGraph() {
return
hexagon_nn_teardown
(
nn_id_
)
==
0
;
}
#define PRINT_BUFSIZE (2
*1024*
1024)
#define PRINT_BUFSIZE (2
* 1024 *
1024)
void
HexagonControlWrapper
::
PrintLog
()
{
char
*
buf
;
if
((
buf
=
new
char
[
PRINT_BUFSIZE
])
==
NULL
)
return
;
MACE_CHECK
(
hexagon_nn_getlog
(
nn_id_
,
reinterpret_cast
<
unsigned
char
*>
(
buf
)
,
PRINT_BUFSIZE
)
==
0
,
"print log error"
);
MACE_CHECK
(
hexagon_nn_getlog
(
nn_id_
,
reinterpret_cast
<
unsigned
char
*>
(
buf
),
PRINT_BUFSIZE
)
==
0
,
"print log error"
);
LOG
(
INFO
)
<<
std
::
string
(
buf
);
delete
[]
buf
;
delete
[]
buf
;
}
void
HexagonControlWrapper
::
PrintGraph
()
{
LOG
(
INFO
)
<<
"Print Graph"
;
char
*
buf
;
if
((
buf
=
new
char
[
PRINT_BUFSIZE
])
==
NULL
)
return
;
MACE_CHECK
(
hexagon_nn_snpprint
(
nn_id_
,
reinterpret_cast
<
unsigned
char
*>
(
buf
)
,
PRINT_BUFSIZE
)
==
0
,
"print graph error"
);
MACE_CHECK
(
hexagon_nn_snpprint
(
nn_id_
,
reinterpret_cast
<
unsigned
char
*>
(
buf
),
PRINT_BUFSIZE
)
==
0
,
"print graph error"
);
LOG
(
INFO
)
<<
std
::
string
(
buf
);
delete
[]
buf
;
delete
[]
buf
;
}
void
HexagonControlWrapper
::
SetDebugLevel
(
int
level
)
{
...
...
@@ -256,9 +253,9 @@ void HexagonControlWrapper::GetPerfInfo() {
LOG
(
INFO
)
<<
"Get perf info"
;
std
::
vector
<
hexagon_nn_perfinfo
>
perf_info
(
MAX_NODE
);
unsigned
int
n_items
=
0
;
MACE_CHECK
(
hexagon_nn_get_perfinfo
(
nn_id_
,
perf_info
.
data
(),
MAX_NODE
,
&
n_items
)
==
0
,
"get perf info error"
);
MACE_CHECK
(
hexagon_nn_get_perfinfo
(
nn_id_
,
perf_info
.
data
(),
MAX_NODE
,
&
n_items
)
==
0
,
"get perf info error"
);
std
::
unordered_map
<
uint32_t
,
float
>
node_id_counters
;
std
::
unordered_map
<
std
::
string
,
std
::
pair
<
int
,
float
>>
node_type_counters
;
...
...
@@ -269,8 +266,9 @@ void HexagonControlWrapper::GetPerfInfo() {
unsigned
int
node_id
=
perf_info
[
i
].
node_id
;
unsigned
int
node_type_id
=
perf_info
[
i
].
node_type
;
node_id_counters
[
node_id
]
=
((
static_cast
<
uint64_t
>
(
perf_info
[
i
].
counter_hi
)
<<
32
)
+
perf_info
[
i
].
counter_lo
)
*
1.0
f
/
perf_info
[
i
].
executions
;
((
static_cast
<
uint64_t
>
(
perf_info
[
i
].
counter_hi
)
<<
32
)
+
perf_info
[
i
].
counter_lo
)
*
1.0
f
/
perf_info
[
i
].
executions
;
char
node_type_buf
[
MAX_NODE
];
hexagon_nn_op_id_to_name
(
node_type_id
,
node_type_buf
,
MAX_NODE
);
...
...
@@ -288,7 +286,7 @@ void HexagonControlWrapper::GetPerfInfo() {
total_duration
+=
node_id_counters
[
node_id
];
}
for
(
auto
&
node_type_counter
:
node_type_counters
)
{
for
(
auto
&
node_type_counter
:
node_type_counters
)
{
LOG
(
INFO
)
<<
"node type: "
<<
node_type_counter
.
first
<<
", time: "
<<
node_type_counter
.
second
.
first
<<
", duration: "
<<
node_type_counter
.
second
.
second
;
...
...
@@ -312,33 +310,25 @@ bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor,
output_tensor
->
Resize
(
output_shapes_
[
0
]);
std
::
vector
<
uint32_t
>
output_shape
(
4
);
uint32_t
output_bytes
;
int
res
=
hexagon_nn_execute
(
nn_id_
,
input_tensor
.
shape
()[
0
],
input_tensor
.
shape
()[
1
],
input_tensor
.
shape
()[
2
],
input_tensor
.
shape
()[
3
],
reinterpret_cast
<
const
unsigned
char
*>
(
input_tensor
.
raw_data
()),
input_tensor
.
raw_size
(),
&
output_shape
[
0
],
&
output_shape
[
1
],
&
output_shape
[
2
],
&
output_shape
[
3
],
reinterpret_cast
<
unsigned
char
*>
(
output_tensor
->
raw_mutable_data
()),
output_tensor
->
raw_size
(),
&
output_bytes
);
int
res
=
hexagon_nn_execute
(
nn_id_
,
input_tensor
.
shape
()[
0
],
input_tensor
.
shape
()[
1
],
input_tensor
.
shape
()[
2
],
input_tensor
.
shape
()[
3
],
reinterpret_cast
<
const
unsigned
char
*>
(
input_tensor
.
raw_data
()),
input_tensor
.
raw_size
(),
&
output_shape
[
0
],
&
output_shape
[
1
],
&
output_shape
[
2
],
&
output_shape
[
3
],
reinterpret_cast
<
unsigned
char
*>
(
output_tensor
->
raw_mutable_data
()),
output_tensor
->
raw_size
(),
&
output_bytes
);
MACE_CHECK
(
res
==
0
,
"execute error"
);
MACE_ASSERT
(
output_shape
==
output_shapes_
[
0
],
"wrong output shape inferred"
);
MACE_ASSERT
(
output_shape
==
output_shapes_
[
0
],
"wrong output shape inferred"
);
MACE_ASSERT
(
output_bytes
==
output_tensor
->
raw_size
(),
"wrong output bytes inferred."
);
return
res
==
0
;
};
bool
HexagonControlWrapper
::
ExecuteGraphNew
(
const
std
::
vector
<
Tensor
>
&
input_tensors
,
std
::
vector
<
Tensor
>
*
output_tensors
)
{
bool
HexagonControlWrapper
::
ExecuteGraphNew
(
const
std
::
vector
<
Tensor
>
&
input_tensors
,
std
::
vector
<
Tensor
>
*
output_tensors
)
{
LOG
(
INFO
)
<<
"Execute graph new: "
<<
nn_id_
;
int
num_inputs
=
input_tensors
.
size
();
int
num_outputs
=
output_tensors
->
size
();
...
...
@@ -355,7 +345,7 @@ bool HexagonControlWrapper::ExecuteGraphNew(const std::vector<Tensor> &input_ten
inputs
[
i
].
width
=
input_shape
[
2
];
inputs
[
i
].
depth
=
input_shape
[
3
];
inputs
[
i
].
data
=
const_cast
<
unsigned
char
*>
(
reinterpret_cast
<
const
unsigned
char
*>
(
input_tensors
[
i
].
raw_data
()));
reinterpret_cast
<
const
unsigned
char
*>
(
input_tensors
[
i
].
raw_data
()));
inputs
[
i
].
dataLen
=
input_tensors
[
i
].
raw_size
();
inputs
[
i
].
data_valid_len
=
input_tensors
[
i
].
raw_size
();
inputs
[
i
].
unused
=
0
;
...
...
@@ -365,16 +355,16 @@ bool HexagonControlWrapper::ExecuteGraphNew(const std::vector<Tensor> &input_ten
(
*
output_tensors
)[
i
].
SetDtype
(
output_data_types_
[
i
]);
(
*
output_tensors
)[
i
].
Resize
(
output_shapes_
[
i
]);
outputs
[
i
].
data
=
reinterpret_cast
<
unsigned
char
*>
(
(
*
output_tensors
)[
i
].
raw_mutable_data
());
(
*
output_tensors
)[
i
].
raw_mutable_data
());
outputs
[
i
].
dataLen
=
(
*
output_tensors
)[
i
].
raw_size
();
}
int
res
=
hexagon_nn_execute_new
(
nn_id_
,
inputs
,
num_inputs
,
outputs
,
num_outputs
);
int
res
=
hexagon_nn_execute_new
(
nn_id_
,
inputs
,
num_inputs
,
outputs
,
num_outputs
);
for
(
int
i
=
0
;
i
<
num_outputs
;
++
i
)
{
std
::
vector
<
uint32_t
>
output_shape
{
outputs
[
i
].
batches
,
outputs
[
i
].
height
,
outputs
[
i
].
width
,
outputs
[
i
].
depth
};
outputs
[
i
].
width
,
outputs
[
i
].
depth
};
MACE_ASSERT
(
output_shape
==
output_shapes_
[
i
],
"wrong output shape inferred"
);
MACE_ASSERT
(
outputs
[
i
].
data_valid_len
==
(
*
output_tensors
)[
i
].
raw_size
(),
...
...
@@ -397,9 +387,7 @@ bool HexagonControlWrapper::ExecuteGraphPreQuantize(const Tensor &input_tensor,
float
*
min_in_data
=
input_tensors
[
1
].
mutable_data
<
float
>
();
input_tensors
[
2
].
Resize
({
1
,
1
,
1
,
1
});
float
*
max_in_data
=
input_tensors
[
2
].
mutable_data
<
float
>
();
quantizer_
.
Quantize
(
input_tensor
,
&
input_tensors
[
0
],
min_in_data
,
quantizer_
.
Quantize
(
input_tensor
,
&
input_tensors
[
0
],
min_in_data
,
max_in_data
);
if
(
!
ExecuteGraphNew
(
input_tensors
,
&
output_tensors
))
{
return
false
;
...
...
@@ -409,11 +397,9 @@ bool HexagonControlWrapper::ExecuteGraphPreQuantize(const Tensor &input_tensor,
const
float
*
min_out_data
=
output_tensors
[
1
].
data
<
float
>
();
const
float
*
max_out_data
=
output_tensors
[
2
].
data
<
float
>
();
quantizer_
.
DeQuantize
(
output_tensors
[
0
],
*
min_out_data
,
*
max_out_data
,
quantizer_
.
DeQuantize
(
output_tensors
[
0
],
*
min_out_data
,
*
max_out_data
,
output_tensor
);
return
true
;
}
}
// namespace mace
}
// namespace mace
mace/core/runtime/hexagon/hexagon_control_wrapper.h
浏览文件 @
6b9aafd4
...
...
@@ -16,16 +16,17 @@ namespace mace {
class
HexagonControlWrapper
{
public:
HexagonControlWrapper
()
{};
HexagonControlWrapper
(){};
int
GetVersion
();
bool
Config
();
bool
Init
();
bool
Finalize
();
bool
SetupGraph
(
const
NetDef
&
net_def
);
bool
SetupGraph
(
const
NetDef
&
net_def
);
bool
ExecuteGraph
(
const
Tensor
&
input_tensor
,
Tensor
*
output_tensor
);
bool
ExecuteGraphNew
(
const
std
::
vector
<
Tensor
>
&
input_tensors
,
bool
ExecuteGraphNew
(
const
std
::
vector
<
Tensor
>
&
input_tensors
,
std
::
vector
<
Tensor
>
*
output_tensors
);
bool
ExecuteGraphPreQuantize
(
const
Tensor
&
input_tensor
,
Tensor
*
output_tensor
);
bool
ExecuteGraphPreQuantize
(
const
Tensor
&
input_tensor
,
Tensor
*
output_tensor
);
bool
TeardownGraph
();
void
PrintLog
();
...
...
@@ -38,9 +39,7 @@ class HexagonControlWrapper {
private:
static
constexpr
int
NODE_ID_OFFSET
=
10000
;
inline
uint32_t
node_id
(
uint32_t
nodeid
)
{
return
NODE_ID_OFFSET
+
nodeid
;
}
inline
uint32_t
node_id
(
uint32_t
nodeid
)
{
return
NODE_ID_OFFSET
+
nodeid
;
}
int
nn_id_
;
Quantizer
quantizer_
;
...
...
@@ -52,9 +51,8 @@ class HexagonControlWrapper {
uint32_t
num_inputs_
;
uint32_t
num_outputs_
;
DISABLE_COPY_AND_ASSIGN
(
HexagonControlWrapper
);
DISABLE_COPY_AND_ASSIGN
(
HexagonControlWrapper
);
};
}
#endif // MACE_DSP_HEXAGON_CONTROL_WRAPPER_H_
#endif
// MACE_DSP_HEXAGON_CONTROL_WRAPPER_H_
mace/core/runtime/hexagon/hexagon_controller_dummy.cc
浏览文件 @
6b9aafd4
...
...
@@ -10,31 +10,145 @@ int hexagon_controller_InitHexagonWithMaxAttributes(int enable_dcvs,
return
0
;
}
int
hexagon_controller_DeInitHexagon
()
{
int
hexagon_controller_DeInitHexagon
()
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_config
)(
void
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_init
)(
void
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_set_debug_level
)(
hexagon_nn_nn_id
id
,
int
level
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_set_graph_mode
)(
hexagon_nn_nn_id
id
,
int
mode
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_snpprint
)(
hexagon_nn_nn_id
id
,
unsigned
char
*
buf
,
int
bufLen
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_getlog
)(
hexagon_nn_nn_id
id
,
unsigned
char
*
buf
,
int
bufLen
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_append_node
)(
hexagon_nn_nn_id
id
,
unsigned
int
node_id
,
unsigned
int
operation
,
hexagon_nn_padding_type
padding
,
const
hexagon_nn_input
*
inputs
,
int
inputsLen
,
const
hexagon_nn_output
*
outputs
,
int
outputsLen
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_append_node_list
)(
hexagon_nn_nn_id
id
,
const
hexagon_nn_op_node
*
ops
,
int
opsLen
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_append_const_node
)(
hexagon_nn_nn_id
id
,
unsigned
int
node_id
,
unsigned
int
batches
,
unsigned
int
height
,
unsigned
int
width
,
unsigned
int
depth
,
const
unsigned
char
*
data
,
int
dataLen
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_append_const_node_list
)(
hexagon_nn_nn_id
id
,
const
hexagon_nn_const_node
*
consts
,
int
constsLen
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_prepare
)(
hexagon_nn_nn_id
id
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_execute
)(
hexagon_nn_nn_id
id
,
unsigned
int
batches_in
,
unsigned
int
height_in
,
unsigned
int
width_in
,
unsigned
int
depth_in
,
const
unsigned
char
*
data_in
,
int
data_inLen
,
unsigned
int
*
batches_out
,
unsigned
int
*
height_out
,
unsigned
int
*
width_out
,
unsigned
int
*
depth_out
,
unsigned
char
*
data_out
,
int
data_outLen
,
unsigned
int
*
data_len_out
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_teardown
)(
hexagon_nn_nn_id
id
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_set_powersave_level
)(
unsigned
int
level
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_get_perfinfo
)(
hexagon_nn_nn_id
id
,
hexagon_nn_perfinfo
*
info_out
,
int
info_outLen
,
unsigned
int
*
n_items
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_reset_perfinfo
)(
hexagon_nn_nn_id
id
,
unsigned
int
event
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_last_execution_cycles
)(
hexagon_nn_nn_id
id
,
unsigned
int
*
cycles_lo
,
unsigned
int
*
cycles_hi
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_version
)(
int
*
ver
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_op_name_to_id
)(
const
char
*
name
,
unsigned
int
*
node_id
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_op_id_to_name
)(
unsigned
int
node_id
,
char
*
name
,
int
nameLen
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_disable_dcvs
)(
void
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_GetHexagonBinaryVersion
)(
int
*
ver
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_PrintLog
)(
const
unsigned
char
*
buf
,
int
bufLen
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_execute_new
)(
hexagon_nn_nn_id
id
,
const
hexagon_nn_tensordef
*
inputs
,
int
inputsLen
,
hexagon_nn_tensordef
*
outputs
,
int
outputsLen
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_config
)(
void
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_init
)(
void
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_set_debug_level
)(
hexagon_nn_nn_id
id
,
int
level
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_set_graph_mode
)(
hexagon_nn_nn_id
id
,
int
mode
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_snpprint
)(
hexagon_nn_nn_id
id
,
unsigned
char
*
buf
,
int
bufLen
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_getlog
)(
hexagon_nn_nn_id
id
,
unsigned
char
*
buf
,
int
bufLen
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_append_node
)(
hexagon_nn_nn_id
id
,
unsigned
int
node_id
,
unsigned
int
operation
,
hexagon_nn_padding_type
padding
,
const
hexagon_nn_input
*
inputs
,
int
inputsLen
,
const
hexagon_nn_output
*
outputs
,
int
outputsLen
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_append_node_list
)(
hexagon_nn_nn_id
id
,
const
hexagon_nn_op_node
*
ops
,
int
opsLen
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_append_const_node
)(
hexagon_nn_nn_id
id
,
unsigned
int
node_id
,
unsigned
int
batches
,
unsigned
int
height
,
unsigned
int
width
,
unsigned
int
depth
,
const
unsigned
char
*
data
,
int
dataLen
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_append_const_node_list
)(
hexagon_nn_nn_id
id
,
const
hexagon_nn_const_node
*
consts
,
int
constsLen
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_prepare
)(
hexagon_nn_nn_id
id
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_execute
)(
hexagon_nn_nn_id
id
,
unsigned
int
batches_in
,
unsigned
int
height_in
,
unsigned
int
width_in
,
unsigned
int
depth_in
,
const
unsigned
char
*
data_in
,
int
data_inLen
,
unsigned
int
*
batches_out
,
unsigned
int
*
height_out
,
unsigned
int
*
width_out
,
unsigned
int
*
depth_out
,
unsigned
char
*
data_out
,
int
data_outLen
,
unsigned
int
*
data_len_out
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_teardown
)(
hexagon_nn_nn_id
id
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_set_powersave_level
)(
unsigned
int
level
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_get_perfinfo
)(
hexagon_nn_nn_id
id
,
hexagon_nn_perfinfo
*
info_out
,
int
info_outLen
,
unsigned
int
*
n_items
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_reset_perfinfo
)(
hexagon_nn_nn_id
id
,
unsigned
int
event
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_last_execution_cycles
)(
hexagon_nn_nn_id
id
,
unsigned
int
*
cycles_lo
,
unsigned
int
*
cycles_hi
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_version
)(
int
*
ver
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_op_name_to_id
)(
const
char
*
name
,
unsigned
int
*
node_id
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_op_id_to_name
)(
unsigned
int
node_id
,
char
*
name
,
int
nameLen
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_disable_dcvs
)(
void
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_GetHexagonBinaryVersion
)(
int
*
ver
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_PrintLog
)(
const
unsigned
char
*
buf
,
int
bufLen
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_execute_new
)(
hexagon_nn_nn_id
id
,
const
hexagon_nn_tensordef
*
inputs
,
int
inputsLen
,
hexagon_nn_tensordef
*
outputs
,
int
outputsLen
)
__QAIC_HEADER_ATTRIBUTE
{
return
0
;
}
mace/core/runtime/hexagon/hexagon_nn.h
浏览文件 @
6b9aafd4
...
...
@@ -2,27 +2,27 @@
#define _HEXAGON_NN_H
#ifndef __QAIC_HEADER
#define __QAIC_HEADER(ff) ff
#endif //__QAIC_HEADER
#endif
//__QAIC_HEADER
#ifndef __QAIC_HEADER_EXPORT
#define __QAIC_HEADER_EXPORT
#endif // __QAIC_HEADER_EXPORT
#endif
// __QAIC_HEADER_EXPORT
#ifndef __QAIC_HEADER_ATTRIBUTE
#define __QAIC_HEADER_ATTRIBUTE
#endif // __QAIC_HEADER_ATTRIBUTE
#endif
// __QAIC_HEADER_ATTRIBUTE
#ifndef __QAIC_IMPL
#define __QAIC_IMPL(ff) ff
#endif //__QAIC_IMPL
#endif
//__QAIC_IMPL
#ifndef __QAIC_IMPL_EXPORT
#define __QAIC_IMPL_EXPORT
#endif // __QAIC_IMPL_EXPORT
#endif
// __QAIC_IMPL_EXPORT
#ifndef __QAIC_IMPL_ATTRIBUTE
#define __QAIC_IMPL_ATTRIBUTE
#endif // __QAIC_IMPL_ATTRIBUTE
#endif
// __QAIC_IMPL_ATTRIBUTE
#ifdef __cplusplus
extern
"C"
{
#endif
...
...
@@ -30,92 +30,160 @@ extern "C" {
#define __QAIC_STRING1_OBJECT_DEFINED__
#define __STRING1_OBJECT__
typedef
struct
_cstring1_s
{
char
*
data
;
int
dataLen
;
char
*
data
;
int
dataLen
;
}
_cstring1_t
;
#endif
/* __QAIC_STRING1_OBJECT_DEFINED__ */
typedef
struct
hexagon_nn_input
hexagon_nn_input
;
struct
hexagon_nn_input
{
unsigned
int
src_id
;
unsigned
int
output_idx
;
unsigned
int
src_id
;
unsigned
int
output_idx
;
};
typedef
struct
hexagon_nn_output
hexagon_nn_output
;
struct
hexagon_nn_output
{
unsigned
int
max_size
;
unsigned
int
unused
;
unsigned
int
max_size
;
unsigned
int
unused
;
};
typedef
struct
hexagon_nn_perfinfo
hexagon_nn_perfinfo
;
struct
hexagon_nn_perfinfo
{
unsigned
int
node_id
;
unsigned
int
node_type
;
unsigned
int
executions
;
unsigned
int
unused
;
unsigned
int
counter_lo
;
unsigned
int
counter_hi
;
unsigned
int
node_id
;
unsigned
int
node_type
;
unsigned
int
executions
;
unsigned
int
unused
;
unsigned
int
counter_lo
;
unsigned
int
counter_hi
;
};
typedef
int
hexagon_nn_nn_id
;
enum
hexagon_nn_padding_type
{
NN_PAD_NA
,
NN_PAD_SAME
,
NN_PAD_VALID
,
NN_PAD_MIRROR_REFLECT
,
NN_PAD_MIRROR_SYMMETRIC
,
NN_PAD_SAME_CAFFE
,
_32BIT_PLACEHOLDER_hexagon_nn_padding_type
=
0x7fffffff
NN_PAD_NA
,
NN_PAD_SAME
,
NN_PAD_VALID
,
NN_PAD_MIRROR_REFLECT
,
NN_PAD_MIRROR_SYMMETRIC
,
NN_PAD_SAME_CAFFE
,
_32BIT_PLACEHOLDER_hexagon_nn_padding_type
=
0x7fffffff
};
typedef
enum
hexagon_nn_padding_type
hexagon_nn_padding_type
;
typedef
struct
hexagon_nn_tensordef
hexagon_nn_tensordef
;
struct
hexagon_nn_tensordef
{
unsigned
int
batches
;
unsigned
int
height
;
unsigned
int
width
;
unsigned
int
depth
;
unsigned
char
*
data
;
int
dataLen
;
unsigned
int
data_valid_len
;
unsigned
int
unused
;
unsigned
int
batches
;
unsigned
int
height
;
unsigned
int
width
;
unsigned
int
depth
;
unsigned
char
*
data
;
int
dataLen
;
unsigned
int
data_valid_len
;
unsigned
int
unused
;
};
typedef
struct
hexagon_nn_op_node
hexagon_nn_op_node
;
struct
hexagon_nn_op_node
{
unsigned
int
node_id
;
unsigned
int
operation
;
hexagon_nn_padding_type
padding
;
hexagon_nn_input
*
inputs
;
int
inputsLen
;
hexagon_nn_output
*
outputs
;
int
outputsLen
;
unsigned
int
node_id
;
unsigned
int
operation
;
hexagon_nn_padding_type
padding
;
hexagon_nn_input
*
inputs
;
int
inputsLen
;
hexagon_nn_output
*
outputs
;
int
outputsLen
;
};
typedef
struct
hexagon_nn_const_node
hexagon_nn_const_node
;
struct
hexagon_nn_const_node
{
unsigned
int
node_id
;
hexagon_nn_tensordef
tensor
;
unsigned
int
node_id
;
hexagon_nn_tensordef
tensor
;
};
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_config
)(
void
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_init
)(
void
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_set_debug_level
)(
hexagon_nn_nn_id
id
,
int
level
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_set_graph_mode
)(
hexagon_nn_nn_id
id
,
int
mode
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_snpprint
)(
hexagon_nn_nn_id
id
,
unsigned
char
*
buf
,
int
bufLen
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_getlog
)(
hexagon_nn_nn_id
id
,
unsigned
char
*
buf
,
int
bufLen
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_append_node
)(
hexagon_nn_nn_id
id
,
unsigned
int
node_id
,
unsigned
int
operation
,
hexagon_nn_padding_type
padding
,
const
hexagon_nn_input
*
inputs
,
int
inputsLen
,
const
hexagon_nn_output
*
outputs
,
int
outputsLen
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_append_node_list
)(
hexagon_nn_nn_id
id
,
const
hexagon_nn_op_node
*
ops
,
int
opsLen
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_append_const_node
)(
hexagon_nn_nn_id
id
,
unsigned
int
node_id
,
unsigned
int
batches
,
unsigned
int
height
,
unsigned
int
width
,
unsigned
int
depth
,
const
unsigned
char
*
data
,
int
dataLen
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_append_const_node_list
)(
hexagon_nn_nn_id
id
,
const
hexagon_nn_const_node
*
consts
,
int
constsLen
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_prepare
)(
hexagon_nn_nn_id
id
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_execute
)(
hexagon_nn_nn_id
id
,
unsigned
int
batches_in
,
unsigned
int
height_in
,
unsigned
int
width_in
,
unsigned
int
depth_in
,
const
unsigned
char
*
data_in
,
int
data_inLen
,
unsigned
int
*
batches_out
,
unsigned
int
*
height_out
,
unsigned
int
*
width_out
,
unsigned
int
*
depth_out
,
unsigned
char
*
data_out
,
int
data_outLen
,
unsigned
int
*
data_len_out
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_teardown
)(
hexagon_nn_nn_id
id
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_set_powersave_level
)(
unsigned
int
level
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_get_perfinfo
)(
hexagon_nn_nn_id
id
,
hexagon_nn_perfinfo
*
info_out
,
int
info_outLen
,
unsigned
int
*
n_items
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_reset_perfinfo
)(
hexagon_nn_nn_id
id
,
unsigned
int
event
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_last_execution_cycles
)(
hexagon_nn_nn_id
id
,
unsigned
int
*
cycles_lo
,
unsigned
int
*
cycles_hi
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_version
)(
int
*
ver
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_op_name_to_id
)(
const
char
*
name
,
unsigned
int
*
node_id
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_op_id_to_name
)(
unsigned
int
node_id
,
char
*
name
,
int
nameLen
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_disable_dcvs
)(
void
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_GetHexagonBinaryVersion
)(
int
*
ver
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_PrintLog
)(
const
unsigned
char
*
buf
,
int
bufLen
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_execute_new
)(
hexagon_nn_nn_id
id
,
const
hexagon_nn_tensordef
*
inputs
,
int
inputsLen
,
hexagon_nn_tensordef
*
outputs
,
int
outputsLen
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_config
)(
void
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_init
)(
void
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_set_debug_level
)(
hexagon_nn_nn_id
id
,
int
level
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_set_graph_mode
)(
hexagon_nn_nn_id
id
,
int
mode
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_snpprint
)(
hexagon_nn_nn_id
id
,
unsigned
char
*
buf
,
int
bufLen
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_getlog
)(
hexagon_nn_nn_id
id
,
unsigned
char
*
buf
,
int
bufLen
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_append_node
)(
hexagon_nn_nn_id
id
,
unsigned
int
node_id
,
unsigned
int
operation
,
hexagon_nn_padding_type
padding
,
const
hexagon_nn_input
*
inputs
,
int
inputsLen
,
const
hexagon_nn_output
*
outputs
,
int
outputsLen
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_append_node_list
)(
hexagon_nn_nn_id
id
,
const
hexagon_nn_op_node
*
ops
,
int
opsLen
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_append_const_node
)(
hexagon_nn_nn_id
id
,
unsigned
int
node_id
,
unsigned
int
batches
,
unsigned
int
height
,
unsigned
int
width
,
unsigned
int
depth
,
const
unsigned
char
*
data
,
int
dataLen
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_append_const_node_list
)(
hexagon_nn_nn_id
id
,
const
hexagon_nn_const_node
*
consts
,
int
constsLen
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_prepare
)(
hexagon_nn_nn_id
id
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_execute
)(
hexagon_nn_nn_id
id
,
unsigned
int
batches_in
,
unsigned
int
height_in
,
unsigned
int
width_in
,
unsigned
int
depth_in
,
const
unsigned
char
*
data_in
,
int
data_inLen
,
unsigned
int
*
batches_out
,
unsigned
int
*
height_out
,
unsigned
int
*
width_out
,
unsigned
int
*
depth_out
,
unsigned
char
*
data_out
,
int
data_outLen
,
unsigned
int
*
data_len_out
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_teardown
)(
hexagon_nn_nn_id
id
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_set_powersave_level
)(
unsigned
int
level
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_get_perfinfo
)(
hexagon_nn_nn_id
id
,
hexagon_nn_perfinfo
*
info_out
,
int
info_outLen
,
unsigned
int
*
n_items
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_reset_perfinfo
)(
hexagon_nn_nn_id
id
,
unsigned
int
event
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_last_execution_cycles
)(
hexagon_nn_nn_id
id
,
unsigned
int
*
cycles_lo
,
unsigned
int
*
cycles_hi
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_version
)(
int
*
ver
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_op_name_to_id
)(
const
char
*
name
,
unsigned
int
*
node_id
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_op_id_to_name
)(
unsigned
int
node_id
,
char
*
name
,
int
nameLen
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_disable_dcvs
)(
void
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_GetHexagonBinaryVersion
)(
int
*
ver
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_PrintLog
)(
const
unsigned
char
*
buf
,
int
bufLen
)
__QAIC_HEADER_ATTRIBUTE
;
__QAIC_HEADER_EXPORT
int
__QAIC_HEADER
(
hexagon_nn_execute_new
)(
hexagon_nn_nn_id
id
,
const
hexagon_nn_tensordef
*
inputs
,
int
inputsLen
,
hexagon_nn_tensordef
*
outputs
,
int
outputsLen
)
__QAIC_HEADER_ATTRIBUTE
;
#ifdef __cplusplus
}
#endif
#endif //_HEXAGON_NN_H
#endif
//_HEXAGON_NN_H
mace/core/runtime/hexagon/hexagon_nn_ops.h
浏览文件 @
6b9aafd4
...
...
@@ -5,8 +5,8 @@
#ifndef LIBMACE_HEXAGON_NN_OPS_H
#define LIBMACE_HEXAGON_NN_OPS_H
#include "mace/utils/logging.h"
#include <unordered_map>
#include "mace/utils/logging.h"
namespace
mace
{
...
...
@@ -24,8 +24,7 @@ typedef enum op_type_enum {
class
OpMap
{
public:
void
Init
()
{
#define DEF_OP(NAME) \
op_map_[#NAME] = OP_##NAME;
#define DEF_OP(NAME) op_map_[#NAME] = OP_##NAME;
#include "mace/core/runtime/hexagon/ops.h"
...
...
@@ -40,9 +39,10 @@ class OpMap {
return
OP_INVALID
;
}
}
private:
std
::
unordered_map
<
std
::
string
,
int
>
op_map_
;
};
}
// namespace mace
}
// namespace mace
#endif // LIBMACE_HEXAGON_NN_OPS_H
#endif
// LIBMACE_HEXAGON_NN_OPS_H
mace/core/runtime/hexagon/ops.h
浏览文件 @
6b9aafd4
...
...
@@ -178,4 +178,3 @@ DEF_OP(QuantizedBiasAdd_8p8to8)
#undef __SELF_DEF_OP_WREF
#undef DEF_OP_WREF
#endif
mace/core/runtime/hexagon/quantize.cc
浏览文件 @
6b9aafd4
...
...
@@ -29,16 +29,16 @@ void Quantizer::Quantize(const Tensor &in_tensor,
float
*
max_out
)
{
float
stepsize
;
float
recip_stepsize
;
QuantizeAdjustRange
(
min_in
,
max_in
,
min_out
,
max_out
,
&
stepsize
,
&
recip_stepsize
);
QuantizeAdjustRange
(
min_in
,
max_in
,
min_out
,
max_out
,
&
stepsize
,
&
recip_stepsize
);
const
float
*
in
=
in_tensor
.
data
<
float
>
();
uint8_t
*
out
=
out_tensor
->
mutable_data
<
uint8_t
>
();
for
(
int
i
=
0
;
i
<
in_tensor
.
size
();
i
++
)
{
const
float
inval
=
in
[
i
];
float
ival
=
static_cast
<
uint8_t
>
((
inval
-
*
min_out
)
*
recip_stepsize
+
0.5
f
);
float
ival
=
static_cast
<
uint8_t
>
((
inval
-
*
min_out
)
*
recip_stepsize
+
0.5
f
);
if
(
ival
<
0
)
ival
=
0
;
if
(
ival
>
255
)
ival
=
255
;
out
[
i
]
=
static_cast
<
uint8_t
>
(
ival
);
...
...
@@ -93,4 +93,4 @@ void Quantizer::DeQuantize(const Tensor &in_tensor,
}
}
}
// namespace mace
\ No newline at end of file
}
// namespace mace
\ No newline at end of file
mace/core/runtime/hexagon/quantize.h
浏览文件 @
6b9aafd4
...
...
@@ -16,13 +16,17 @@ class Quantizer {
void
Quantize
(
const
Tensor
&
in_tensor
,
Tensor
*
out_tensor
,
float
*
min_out
,
float
*
max_out
);
float
*
min_out
,
float
*
max_out
);
void
Quantize
(
const
Tensor
&
in_tensor
,
const
float
min_in
,
const
float
max_in
,
const
float
min_in
,
const
float
max_in
,
Tensor
*
out_tensor
,
float
*
min_out
,
float
*
max_out
);
float
*
min_out
,
float
*
max_out
);
void
DeQuantize
(
const
Tensor
&
in_tensor
,
const
float
min_in
,
const
float
max_in
,
const
float
min_in
,
const
float
max_in
,
Tensor
*
out_tensor
);
private:
...
...
@@ -33,9 +37,9 @@ class Quantizer {
float
*
stepsize
,
float
*
recip_stepsize
);
DISABLE_COPY_AND_ASSIGN
(
Quantizer
);
DISABLE_COPY_AND_ASSIGN
(
Quantizer
);
};
}
// mace
}
// mace
#endif // MACE_DSP_UTIL_QUANTIZE_H_
#endif
// MACE_DSP_UTIL_QUANTIZE_H_
mace/core/runtime/opencl/opencl_allocator.cc
浏览文件 @
6b9aafd4
...
...
@@ -2,8 +2,8 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_allocator.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
namespace
mace
{
...
...
@@ -29,7 +29,6 @@ static cl_channel_type DataTypeToCLChannelType(const DataType t) {
return
0
;
}
}
}
OpenCLAllocator
::
OpenCLAllocator
()
{}
...
...
@@ -49,17 +48,16 @@ void *OpenCLAllocator::New(size_t nbytes) const {
void
*
OpenCLAllocator
::
NewImage
(
const
std
::
vector
<
size_t
>
&
image_shape
,
const
DataType
dt
)
const
{
MACE_CHECK
(
image_shape
.
size
()
==
2
)
<<
"Image shape's size must equal 2"
;
VLOG
(
3
)
<<
"Allocate OpenCL image: "
<<
image_shape
[
0
]
<<
", "
<<
image_shape
[
1
];
VLOG
(
3
)
<<
"Allocate OpenCL image: "
<<
image_shape
[
0
]
<<
", "
<<
image_shape
[
1
];
cl
::
ImageFormat
img_format
(
CL_RGBA
,
DataTypeToCLChannelType
(
dt
));
cl_int
error
;
cl
::
Image2D
*
cl_image
=
new
cl
::
Image2D
(
OpenCLRuntime
::
Global
()
->
context
(),
CL_MEM_READ_WRITE
|
CL_MEM_ALLOC_HOST_PTR
,
img_format
,
image_shape
[
0
],
image_shape
[
1
],
0
,
nullptr
,
&
error
);
CL_MEM_READ_WRITE
|
CL_MEM_ALLOC_HOST_PTR
,
img_format
,
image_shape
[
0
],
image_shape
[
1
],
0
,
nullptr
,
&
error
);
MACE_CHECK
(
error
==
CL_SUCCESS
)
<<
error
<<
" with image shape: ["
<<
image_shape
[
0
]
<<
", "
<<
image_shape
[
1
]
<<
"]"
;
...
...
@@ -89,8 +87,8 @@ void *OpenCLAllocator::Map(void *buffer, size_t offset, size_t nbytes) const {
// TODO(heliangliang) Non-blocking call
cl_int
error
;
void
*
mapped_ptr
=
queue
.
enqueueMapBuffer
(
*
cl_buffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
offset
,
nbytes
,
nullptr
,
nullptr
,
&
error
);
queue
.
enqueueMapBuffer
(
*
cl_buffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
offset
,
nbytes
,
nullptr
,
nullptr
,
&
error
);
MACE_CHECK
(
error
==
CL_SUCCESS
);
return
mapped_ptr
;
}
...
...
@@ -106,13 +104,10 @@ void *OpenCLAllocator::MapImage(void *buffer,
mapped_image_pitch
->
resize
(
2
);
cl_int
error
;
void
*
mapped_ptr
=
OpenCLRuntime
::
Global
()
->
command_queue
().
enqueueMapImage
(
*
cl_image
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
origin
,
region
,
mapped_image_pitch
->
data
(),
mapped_image_pitch
->
data
()
+
1
,
nullptr
,
nullptr
,
&
error
);
void
*
mapped_ptr
=
OpenCLRuntime
::
Global
()
->
command_queue
().
enqueueMapImage
(
*
cl_image
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
origin
,
region
,
mapped_image_pitch
->
data
(),
mapped_image_pitch
->
data
()
+
1
,
nullptr
,
nullptr
,
&
error
);
MACE_CHECK
(
error
==
CL_SUCCESS
)
<<
error
;
return
mapped_ptr
;
...
...
mace/core/runtime/opencl/opencl_development.cc
浏览文件 @
6b9aafd4
...
...
@@ -5,8 +5,8 @@
#include <vector>
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/utils/utils.h"
#include "mace/utils/logging.h"
#include "mace/utils/utils.h"
namespace
mace
{
...
...
@@ -16,7 +16,8 @@ bool GetSourceOrBinaryProgram(const std::string &program_name,
cl
::
Device
&
device
,
cl
::
Program
*
program
,
bool
*
is_binary
)
{
extern
const
std
::
map
<
std
::
string
,
std
::
vector
<
unsigned
char
>>
kEncryptedProgramMap
;
extern
const
std
::
map
<
std
::
string
,
std
::
vector
<
unsigned
char
>>
kEncryptedProgramMap
;
*
is_binary
=
false
;
auto
it_source
=
kEncryptedProgramMap
.
find
(
program_name
);
if
(
it_source
==
kEncryptedProgramMap
.
end
())
{
...
...
mace/core/runtime/opencl/opencl_production.cc
浏览文件 @
6b9aafd4
...
...
@@ -14,7 +14,8 @@ bool GetSourceOrBinaryProgram(const std::string &program_name,
cl
::
Device
&
device
,
cl
::
Program
*
program
,
bool
*
is_binary
)
{
extern
const
std
::
map
<
std
::
string
,
std
::
vector
<
unsigned
char
>>
kCompiledProgramMap
;
extern
const
std
::
map
<
std
::
string
,
std
::
vector
<
unsigned
char
>>
kCompiledProgramMap
;
*
is_binary
=
true
;
auto
it_binary
=
kCompiledProgramMap
.
find
(
binary_file_name_prefix
);
if
(
it_binary
==
kCompiledProgramMap
.
end
())
{
...
...
mace/core/runtime/opencl/opencl_runtime.cc
浏览文件 @
6b9aafd4
...
...
@@ -48,11 +48,9 @@ double OpenCLProfilingTimer::ElapsedMicros() {
return
(
stop_nanos_
-
start_nanos_
)
/
1000.0
;
}
double
OpenCLProfilingTimer
::
AccumulatedMicros
()
{
return
accumulated_micros_
;
}
double
OpenCLProfilingTimer
::
AccumulatedMicros
()
{
return
accumulated_micros_
;
}
void
OpenCLProfilingTimer
::
AccumulateTiming
(){
void
OpenCLProfilingTimer
::
AccumulateTiming
()
{
StopTiming
();
accumulated_micros_
+=
(
stop_nanos_
-
start_nanos_
)
/
1000.0
;
}
...
...
@@ -116,7 +114,8 @@ OpenCLRuntime::OpenCLRuntime() {
cl
::
CommandQueue
command_queue
(
context
,
gpu_device
,
properties
);
const
char
*
kernel_path
=
getenv
(
"MACE_KERNEL_PATH"
);
this
->
kernel_path_
=
std
::
string
(
kernel_path
==
nullptr
?
""
:
kernel_path
)
+
"/"
;
this
->
kernel_path_
=
std
::
string
(
kernel_path
==
nullptr
?
""
:
kernel_path
)
+
"/"
;
this
->
device_
=
new
cl
::
Device
(
gpu_device
);
this
->
context_
=
new
cl
::
Context
(
context
);
...
...
@@ -163,18 +162,14 @@ void OpenCLRuntime::BuildProgram(const std::string &program_name,
MACE_CHECK_NOTNULL
(
program
);
std
::
string
binary_file_name_prefix
=
GenerateCLBinaryFilenamePrefix
(
built_program_key
);
GenerateCLBinaryFilenamePrefix
(
built_program_key
);
std
::
vector
<
unsigned
char
>
program_vec
;
bool
is_opencl_binary
;
const
bool
found
=
GetSourceOrBinaryProgram
(
program_name
,
binary_file_name_prefix
,
context
(),
device
(),
program
,
&
is_opencl_binary
);
const
bool
found
=
GetSourceOrBinaryProgram
(
program_name
,
binary_file_name_prefix
,
context
(),
device
(),
program
,
&
is_opencl_binary
);
MACE_CHECK
(
found
,
"Program not found for "
,
is_opencl_binary
?
"binary: "
:
"source: "
,
built_program_key
);
is_opencl_binary
?
"binary: "
:
"source: "
,
built_program_key
);
// Build program
std
::
string
build_options_str
=
...
...
@@ -190,13 +185,13 @@ void OpenCLRuntime::BuildProgram(const std::string &program_name,
}
LOG
(
FATAL
)
<<
"Build program from "
<<
(
is_opencl_binary
?
"binary: "
:
"source: "
)
<<
built_program_key
<<
" failed: "
<<
ret
;
<<
built_program_key
<<
" failed: "
<<
ret
;
}
if
(
!
is_opencl_binary
)
{
// Write binary if necessary
std
::
string
binary_filename
=
kernel_path_
+
binary_file_name_prefix
+
".bin"
;
std
::
string
binary_filename
=
kernel_path_
+
binary_file_name_prefix
+
".bin"
;
size_t
device_list_size
=
1
;
std
::
unique_ptr
<
size_t
[]
>
program_binary_sizes
(
new
size_t
[
device_list_size
]);
...
...
@@ -240,8 +235,8 @@ cl::Kernel OpenCLRuntime::BuildKernel(
if
(
built_program_it
!=
built_program_map_
.
end
())
{
program
=
built_program_it
->
second
;
}
else
{
this
->
BuildProgram
(
program_name
,
built_program_key
,
build_options_str
,
&
program
);
this
->
BuildProgram
(
program_name
,
built_program_key
,
build_options_str
,
&
program
);
built_program_map_
.
emplace
(
built_program_key
,
program
);
}
return
cl
::
Kernel
(
program
,
kernel_name
.
c_str
());
...
...
@@ -250,9 +245,9 @@ cl::Kernel OpenCLRuntime::BuildKernel(
void
OpenCLRuntime
::
GetCallStats
(
const
cl
::
Event
&
event
,
CallStats
*
stats
)
{
if
(
stats
!=
nullptr
)
{
stats
->
start_micros
=
event
.
getProfilingInfo
<
CL_PROFILING_COMMAND_START
>
()
/
1000
;
event
.
getProfilingInfo
<
CL_PROFILING_COMMAND_START
>
()
/
1000
;
stats
->
end_micros
=
event
.
getProfilingInfo
<
CL_PROFILING_COMMAND_END
>
()
/
1000
;
event
.
getProfilingInfo
<
CL_PROFILING_COMMAND_END
>
()
/
1000
;
}
}
...
...
mace/core/runtime/opencl/opencl_runtime.h
浏览文件 @
6b9aafd4
...
...
@@ -19,7 +19,8 @@ namespace mace {
class
OpenCLProfilingTimer
:
public
Timer
{
public:
explicit
OpenCLProfilingTimer
(
const
cl
::
Event
*
event
)
:
event_
(
event
),
accumulated_micros_
(
0
)
{};
explicit
OpenCLProfilingTimer
(
const
cl
::
Event
*
event
)
:
event_
(
event
),
accumulated_micros_
(
0
){};
void
StartTiming
()
override
;
void
StopTiming
()
override
;
void
AccumulateTiming
()
override
;
...
...
@@ -48,6 +49,7 @@ class OpenCLRuntime {
cl
::
Kernel
BuildKernel
(
const
std
::
string
&
program_name
,
const
std
::
string
&
kernel_name
,
const
std
::
set
<
std
::
string
>
&
build_options
);
private:
OpenCLRuntime
();
~
OpenCLRuntime
();
...
...
mace/core/runtime/opencl/opencl_wrapper.h
浏览文件 @
6b9aafd4
...
...
@@ -7,10 +7,10 @@
namespace
mace
{
// These functions are not thread-safe.
void
LoadOpenCLLibrary
();
void
UnloadOpenCLLibrary
();
// These functions are not thread-safe.
void
LoadOpenCLLibrary
();
void
UnloadOpenCLLibrary
();
}
// namespace mace
#endif // MACE_CORE_RUNTIME_OPENCL_OPENCL_WRAPPER_H_
mace/core/tensor.h
浏览文件 @
6b9aafd4
...
...
@@ -65,23 +65,20 @@ inline std::ostream &operator<<(std::ostream &os, unsigned char c) {
class
Tensor
{
public:
Tensor
(
Allocator
*
alloc
,
DataType
type
)
:
allocator_
(
alloc
),
dtype_
(
type
),
buffer_
(
nullptr
),
is_buffer_owner_
(
true
),
name_
(
""
)
{};
:
allocator_
(
alloc
),
dtype_
(
type
),
buffer_
(
nullptr
),
is_buffer_owner_
(
true
),
name_
(
""
)
{};
Tensor
(
BufferBase
*
buffer
,
DataType
dtype
)
:
dtype_
(
dtype
),
buffer_
(
buffer
),
is_buffer_owner_
(
false
),
name_
(
""
)
{}
:
dtype_
(
dtype
),
buffer_
(
buffer
),
is_buffer_owner_
(
false
),
name_
(
""
)
{}
Tensor
(
const
BufferSlice
&
buffer_slice
,
DataType
dtype
)
:
dtype_
(
dtype
),
buffer_slice_
(
buffer_slice
),
is_buffer_owner_
(
false
),
name_
(
""
)
{
:
dtype_
(
dtype
),
buffer_slice_
(
buffer_slice
),
is_buffer_owner_
(
false
),
name_
(
""
)
{
buffer_
=
&
buffer_slice_
;
}
...
...
@@ -102,8 +99,8 @@ class Tensor {
inline
index_t
dim_size
()
const
{
return
shape_
.
size
();
}
inline
index_t
dim
(
unsigned
int
index
)
const
{
MACE_CHECK
(
index
<
shape_
.
size
(),
"Dim out of range: "
,
index
,
" >= "
,
shape_
.
size
());
MACE_CHECK
(
index
<
shape_
.
size
(),
"Dim out of range: "
,
index
,
" >= "
,
shape_
.
size
());
return
shape_
[
index
];
}
...
...
@@ -112,40 +109,35 @@ class Tensor {
std
::
multiplies
<
int64_t
>
());
}
inline
index_t
raw_size
()
const
{
return
size
()
*
SizeOfType
();
}
inline
index_t
raw_size
()
const
{
return
size
()
*
SizeOfType
();
}
inline
bool
has_opencl_image
()
const
{
return
buffer_
!=
nullptr
&&
!
buffer_
->
OnHost
()
&&
typeid
(
*
buffer_
)
==
typeid
(
Image
);
return
buffer_
!=
nullptr
&&
!
buffer_
->
OnHost
()
&&
typeid
(
*
buffer_
)
==
typeid
(
Image
);
}
inline
bool
has_opencl_buffer
()
const
{
return
buffer_
!=
nullptr
&&
!
buffer_
->
OnHost
()
&&
!
has_opencl_image
();
return
buffer_
!=
nullptr
&&
!
buffer_
->
OnHost
()
&&
!
has_opencl_image
();
}
inline
cl
::
Image
*
opencl_image
()
const
{
MACE_CHECK
(
has_opencl_image
(),
"do not have image"
);
return
static_cast
<
cl
::
Image
*>
(
buffer_
->
buffer
());
return
static_cast
<
cl
::
Image
*>
(
buffer_
->
buffer
());
}
inline
cl
::
Buffer
*
opencl_buffer
()
const
{
MACE_CHECK
(
has_opencl_buffer
(),
"do not have opencl buffer"
);
return
static_cast
<
cl
::
Buffer
*>
(
buffer_
->
buffer
());
return
static_cast
<
cl
::
Buffer
*>
(
buffer_
->
buffer
());
}
inline
index_t
buffer_offset
()
const
{
return
buffer_
->
offset
();
}
inline
index_t
buffer_offset
()
const
{
return
buffer_
->
offset
();
}
inline
const
void
*
raw_data
()
const
{
MACE_CHECK
(
buffer_
!=
nullptr
,
"buffer is null"
);
return
buffer_
->
raw_data
();
}
template
<
typename
T
>
template
<
typename
T
>
inline
const
T
*
data
()
const
{
MACE_CHECK
(
buffer_
!=
nullptr
,
"buffer is null"
);
return
buffer_
->
data
<
T
>
();
...
...
@@ -156,7 +148,7 @@ class Tensor {
return
buffer_
->
raw_mutable_data
();
}
template
<
typename
T
>
template
<
typename
T
>
inline
T
*
mutable_data
()
{
MACE_CHECK
(
buffer_
!=
nullptr
,
"buffer is null"
);
return
static_cast
<
T
*>
(
buffer_
->
raw_mutable_data
());
...
...
@@ -188,25 +180,17 @@ class Tensor {
is_buffer_owner_
=
true
;
}
else
{
MACE_CHECK
(
has_opencl_image
(),
"Cannot ResizeImage buffer, use Resize."
);
Image
*
image
=
dynamic_cast
<
Image
*>
(
buffer_
);
MACE_CHECK
(
image_shape
[
0
]
<=
image
->
image_shape
()[
0
]
&&
image_shape
[
1
]
<=
image
->
image_shape
()[
1
],
"tensor (source op "
,
name_
,
"): current physical image shape: "
,
image
->
image_shape
()[
0
],
", "
,
image
->
image_shape
()[
1
],
" < logical image shape: "
,
image_shape
[
0
],
", "
,
image_shape
[
1
]);
Image
*
image
=
dynamic_cast
<
Image
*>
(
buffer_
);
MACE_CHECK
(
image_shape
[
0
]
<=
image
->
image_shape
()[
0
]
&&
image_shape
[
1
]
<=
image
->
image_shape
()[
1
],
"tensor (source op "
,
name_
,
"): current physical image shape: "
,
image
->
image_shape
()[
0
],
", "
,
image
->
image_shape
()[
1
],
" < logical image shape: "
,
image_shape
[
0
],
", "
,
image_shape
[
1
]);
}
}
inline
void
ResizeLike
(
const
Tensor
&
other
)
{
ResizeLike
(
&
other
);
}
inline
void
ResizeLike
(
const
Tensor
&
other
)
{
ResizeLike
(
&
other
);
}
inline
void
ResizeLike
(
const
Tensor
*
other
)
{
if
(
other
->
has_opencl_image
())
{
...
...
@@ -229,7 +213,7 @@ class Tensor {
memcpy
(
buffer_
->
raw_mutable_data
(),
src
,
size
);
}
template
<
typename
T
>
template
<
typename
T
>
inline
void
Copy
(
const
T
*
src
,
index_t
length
)
{
MACE_CHECK
(
length
==
size
(),
"copy src and dst with different size."
);
CopyBytes
(
static_cast
<
const
void
*>
(
src
),
sizeof
(
T
)
*
length
);
...
...
@@ -248,13 +232,9 @@ class Tensor {
return
type_size
;
}
inline
BufferBase
*
UnderlyingBuffer
()
const
{
return
buffer_
;
}
inline
BufferBase
*
UnderlyingBuffer
()
const
{
return
buffer_
;
}
inline
void
SetSourceOpName
(
const
std
::
string
name
)
{
name_
=
name
;
}
inline
void
SetSourceOpName
(
const
std
::
string
name
)
{
name_
=
name
;
}
inline
void
DebugPrint
()
const
{
using
namespace
numerical_chars
;
...
...
@@ -272,8 +252,9 @@ class Tensor {
}
CASES
(
dtype_
,
(
os
<<
(
this
->
data
<
T
>
()[
i
])
<<
", "
));
}
LOG
(
INFO
)
<<
"Tensor size: ["
<<
dim
(
0
)
<<
", "
<<
dim
(
1
)
<<
", "
<<
dim
(
2
)
<<
", "
<<
dim
(
3
)
<<
"], content:
\n
"
<<
os
.
str
();
LOG
(
INFO
)
<<
"Tensor size: ["
<<
dim
(
0
)
<<
", "
<<
dim
(
1
)
<<
", "
<<
dim
(
2
)
<<
", "
<<
dim
(
3
)
<<
"], content:
\n
"
<<
os
.
str
();
}
class
MappingGuard
{
...
...
@@ -301,20 +282,20 @@ class Tensor {
const
Tensor
*
tensor_
;
std
::
vector
<
size_t
>
mapped_image_pitch_
;
DISABLE_COPY_AND_ASSIGN
(
MappingGuard
);
DISABLE_COPY_AND_ASSIGN
(
MappingGuard
);
};
private:
Allocator
*
allocator_
;
DataType
dtype_
;
std
::
vector
<
index_t
>
shape_
;
std
::
vector
<
size_t
>
image_shape_
;
std
::
vector
<
size_t
>
image_shape_
;
BufferBase
*
buffer_
;
BufferSlice
buffer_slice_
;
bool
is_buffer_owner_
;
std
::
string
name_
;
DISABLE_COPY_AND_ASSIGN
(
Tensor
);
DISABLE_COPY_AND_ASSIGN
(
Tensor
);
};
}
// namespace tensor
...
...
mace/core/testing/test_benchmark.cc
浏览文件 @
6b9aafd4
...
...
@@ -99,9 +99,7 @@ void RestartTiming() {
accum_time
=
0
;
start_time
=
NowMicros
();
}
void
StartTiming
()
{
start_time
=
NowMicros
();
}
void
StartTiming
()
{
start_time
=
NowMicros
();
}
void
StopTiming
()
{
if
(
start_time
!=
0
)
{
accum_time
+=
(
NowMicros
()
-
start_time
);
...
...
mace/core/testing/test_benchmark.h
浏览文件 @
6b9aafd4
...
...
@@ -6,9 +6,9 @@
#ifndef MACE_CORE_TESTING_TEST_BENCHMARK_H_
#define MACE_CORE_TESTING_TEST_BENCHMARK_H_
#include <string>
#include <utility>
#include <vector>
#include <string>
#define MACE_BENCHMARK_CONCAT(a, b, c) a##b##c
#define BENCHMARK(n) \
...
...
mace/core/types.cc
浏览文件 @
6b9aafd4
...
...
@@ -2,8 +2,8 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <map>
#include <cstdint>
#include <map>
#include "mace/core/types.h"
#include "mace/utils/logging.h"
...
...
@@ -30,18 +30,12 @@ bool DataTypeCanUseMemcpy(DataType dt) {
std
::
string
DataTypeToString
(
const
DataType
dt
)
{
static
std
::
map
<
DataType
,
std
::
string
>
dtype_string_map
=
{
{
DT_FLOAT
,
"DT_FLOAT"
},
{
DT_HALF
,
"DT_HALF"
},
{
DT_DOUBLE
,
"DT_DOUBLE"
},
{
DT_UINT8
,
"DT_UINT8"
},
{
DT_INT8
,
"DT_INT8"
},
{
DT_INT32
,
"DT_INT32"
},
{
DT_UINT32
,
"DT_UINT32"
},
{
DT_UINT16
,
"DT_UINT16"
},
{
DT_INT64
,
"DT_INT64"
},
{
DT_BOOL
,
"DT_BOOL"
},
{
DT_STRING
,
"DT_STRING"
}
};
{
DT_FLOAT
,
"DT_FLOAT"
},
{
DT_HALF
,
"DT_HALF"
},
{
DT_DOUBLE
,
"DT_DOUBLE"
},
{
DT_UINT8
,
"DT_UINT8"
},
{
DT_INT8
,
"DT_INT8"
},
{
DT_INT32
,
"DT_INT32"
},
{
DT_UINT32
,
"DT_UINT32"
},
{
DT_UINT16
,
"DT_UINT16"
},
{
DT_INT64
,
"DT_INT64"
},
{
DT_BOOL
,
"DT_BOOL"
},
{
DT_STRING
,
"DT_STRING"
}};
MACE_CHECK
(
dt
!=
DT_INVALID
)
<<
"Not support Invalid data type"
;
return
dtype_string_map
[
dt
];
}
...
...
mace/core/workspace.cc
浏览文件 @
6b9aafd4
...
...
@@ -5,8 +5,8 @@
#include <string>
#include <vector>
#include "mace/core/workspace.h"
#include "mace/core/arg_helper.h"
#include "mace/core/workspace.h"
#include "mace/utils/timer.h"
namespace
mace
{
...
...
@@ -19,7 +19,7 @@ Tensor *Workspace::CreateTensor(const std::string &name,
}
else
{
VLOG
(
3
)
<<
"Creating Tensor "
<<
name
;
tensor_map_
[
name
]
=
std
::
move
(
std
::
unique_ptr
<
Tensor
>
(
new
Tensor
(
alloc
,
type
)));
std
::
move
(
std
::
unique_ptr
<
Tensor
>
(
new
Tensor
(
alloc
,
type
)));
}
return
GetTensor
(
name
);
}
...
...
@@ -35,7 +35,7 @@ const Tensor *Workspace::GetTensor(const std::string &name) const {
Tensor
*
Workspace
::
GetTensor
(
const
std
::
string
&
name
)
{
return
const_cast
<
Tensor
*>
(
static_cast
<
const
Workspace
*>
(
this
)
->
GetTensor
(
name
));
static_cast
<
const
Workspace
*>
(
this
)
->
GetTensor
(
name
));
}
std
::
vector
<
std
::
string
>
Workspace
::
Tensors
()
const
{
...
...
@@ -51,28 +51,28 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
index_t
model_data_size
=
0
;
unsigned
char
*
model_data_ptr
=
nullptr
;
for
(
auto
&
const_tensor
:
net_def
.
tensors
())
{
if
(
model_data_ptr
==
nullptr
||
reinterpret_cast
<
long
long
>
(
const_tensor
.
data
())
<
reinterpret_cast
<
long
long
>
(
model_data_ptr
))
{
if
(
model_data_ptr
==
nullptr
||
reinterpret_cast
<
long
long
>
(
const_tensor
.
data
())
<
reinterpret_cast
<
long
long
>
(
model_data_ptr
))
{
model_data_ptr
=
const_cast
<
unsigned
char
*>
(
const_tensor
.
data
());
}
}
for
(
auto
&
const_tensor
:
net_def
.
tensors
())
{
model_data_size
=
std
::
max
(
model_data_size
,
static_cast
<
index_t
>
(
(
reinterpret_cast
<
long
long
>
(
const_tensor
.
data
())
-
reinterpret_cast
<
long
long
>
(
model_data_ptr
))
+
const_tensor
.
data_size
()
*
GetEnumTypeSize
(
const_tensor
.
data_type
())));
model_data_size
=
std
::
max
(
model_data_size
,
static_cast
<
index_t
>
((
reinterpret_cast
<
long
long
>
(
const_tensor
.
data
())
-
reinterpret_cast
<
long
long
>
(
model_data_ptr
))
+
const_tensor
.
data_size
()
*
GetEnumTypeSize
(
const_tensor
.
data_type
())));
}
VLOG
(
3
)
<<
"Model data size: "
<<
model_data_size
;
if
(
type
==
DeviceType
::
CPU
)
{
tensor_buffer_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
type
),
model_data_ptr
,
model_data_size
)));
new
Buffer
(
GetDeviceAllocator
(
type
),
model_data_ptr
,
model_data_size
)));
}
else
{
tensor_buffer_
=
std
::
move
(
std
::
unique_ptr
<
Buffer
>
(
new
Buffer
(
GetDeviceAllocator
(
type
),
model_data_size
)));
new
Buffer
(
GetDeviceAllocator
(
type
),
model_data_size
)));
tensor_buffer_
->
Map
(
nullptr
);
tensor_buffer_
->
Copy
(
model_data_ptr
,
0
,
model_data_size
);
tensor_buffer_
->
UnMap
();
...
...
@@ -81,8 +81,7 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
for
(
auto
&
const_tensor
:
net_def
.
tensors
())
{
MACE_LATENCY_LOGGER
(
2
,
"Load tensor "
,
const_tensor
.
name
());
VLOG
(
3
)
<<
"Tensor name: "
<<
const_tensor
.
name
()
<<
", data type: "
<<
const_tensor
.
data_type
()
<<
", shape: "
<<
", data type: "
<<
const_tensor
.
data_type
()
<<
", shape: "
<<
MakeString
(
std
::
vector
<
index_t
>
(
const_tensor
.
dims
().
begin
(),
const_tensor
.
dims
().
end
()));
std
::
vector
<
index_t
>
dims
;
...
...
@@ -90,14 +89,12 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
dims
.
push_back
(
d
);
}
index_t
offset
=
(
long
long
)
const_tensor
.
data
()
-
(
long
long
)
model_data_ptr
;
index_t
offset
=
(
long
long
)
const_tensor
.
data
()
-
(
long
long
)
model_data_ptr
;
std
::
unique_ptr
<
Tensor
>
tensor
(
new
Tensor
(
BufferSlice
(
tensor_buffer_
.
get
(),
offset
,
const_tensor
.
data_size
()
*
GetEnumTypeSize
(
const_tensor
.
data_type
())),
const_tensor
.
data_type
()));
new
Tensor
(
BufferSlice
(
tensor_buffer_
.
get
(),
offset
,
const_tensor
.
data_size
()
*
GetEnumTypeSize
(
const_tensor
.
data_type
())),
const_tensor
.
data_type
()));
tensor
->
Reshape
(
dims
);
tensor_map_
[
const_tensor
.
name
()]
=
std
::
move
(
tensor
);
...
...
@@ -118,13 +115,11 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) {
// as GPU have consistent data type for each layer for now.
// As DSP may have different data output type for each op,
// we stick to the same concept.
for
(
auto
&
op
:
net_def
.
op
())
{
for
(
auto
&
op
:
net_def
.
op
())
{
if
(
op
.
has_mem_id
())
{
const
DataType
op_dtype
=
static_cast
<
DataType
>
(
ArgumentHelper
::
GetSingleArgument
<
OperatorDef
,
int
>
(
op
,
"T"
,
static_cast
<
int
>
(
DT_FLOAT
)));
ArgumentHelper
::
GetSingleArgument
<
OperatorDef
,
int
>
(
op
,
"T"
,
static_cast
<
int
>
(
DT_FLOAT
)));
if
(
op_dtype
!=
DataType
::
DT_INVALID
)
{
dtype
=
op_dtype
;
// find first valid data type, break
...
...
@@ -133,22 +128,24 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) {
}
}
MACE_CHECK
(
dtype
!=
DataType
::
DT_INVALID
,
"data type is invalid."
);
for
(
auto
&
mem_block
:
net_def
.
mem_arena
().
mem_block
())
{
std
::
unique_ptr
<
BufferBase
>
image_buf
(
new
Image
({
mem_block
.
x
(),
mem_block
.
y
()},
dtype
));
for
(
auto
&
mem_block
:
net_def
.
mem_arena
().
mem_block
())
{
std
::
unique_ptr
<
BufferBase
>
image_buf
(
new
Image
({
mem_block
.
x
(),
mem_block
.
y
()},
dtype
));
preallocated_allocator_
.
SetBuffer
(
mem_block
.
mem_id
(),
std
::
move
(
image_buf
));
}
VLOG
(
3
)
<<
"Preallocate image to tensors"
;
for
(
auto
&
op
:
net_def
.
op
())
{
for
(
auto
&
op
:
net_def
.
op
())
{
if
(
op
.
has_mem_id
())
{
std
::
unique_ptr
<
Tensor
>
tensor
(
new
Tensor
(
preallocated_allocator_
.
GetBuffer
(
op
.
mem_id
()),
dtype
));
std
::
unique_ptr
<
Tensor
>
tensor
(
new
Tensor
(
preallocated_allocator_
.
GetBuffer
(
op
.
mem_id
()),
dtype
));
tensor
->
SetSourceOpName
(
op
.
name
());
VLOG
(
3
)
<<
"Tensor: "
<<
op
.
name
()
<<
"("
<<
op
.
type
()
<<
")"
<<
"; Mem: "
<<
op
.
mem_id
()
<<
"; Image shape: "
<<
dynamic_cast
<
Image
*>
(
tensor
->
UnderlyingBuffer
())
->
image_shape
()[
0
]
<<
", "
<<
dynamic_cast
<
Image
*>
(
tensor
->
UnderlyingBuffer
())
->
image_shape
()[
1
];
VLOG
(
3
)
<<
"Tensor: "
<<
op
.
name
()
<<
"("
<<
op
.
type
()
<<
")"
<<
"; Mem: "
<<
op
.
mem_id
()
<<
"; Image shape: "
<<
dynamic_cast
<
Image
*>
(
tensor
->
UnderlyingBuffer
())
->
image_shape
()[
0
]
<<
", "
<<
dynamic_cast
<
Image
*>
(
tensor
->
UnderlyingBuffer
())
->
image_shape
()[
1
];
tensor_map_
[
op
.
output
(
0
)]
=
std
::
move
(
tensor
);
}
}
...
...
mace/core/workspace.h
浏览文件 @
6b9aafd4
...
...
@@ -5,9 +5,9 @@
#ifndef MACE_CORE_WORKSPACE_H_
#define MACE_CORE_WORKSPACE_H_
#include "mace/core/preallocated_pooled_allocator.h"
#include "mace/core/tensor.h"
#include "mace/public/mace.h"
#include "mace/core/preallocated_pooled_allocator.h"
namespace
mace
{
...
...
@@ -43,7 +43,7 @@ class Workspace {
PreallocatedPooledAllocator
preallocated_allocator_
;
DISABLE_COPY_AND_ASSIGN
(
Workspace
);
DISABLE_COPY_AND_ASSIGN
(
Workspace
);
};
}
// namespace mace
...
...
mace/kernels/activation.h
浏览文件 @
6b9aafd4
...
...
@@ -6,9 +6,9 @@
#define MACE_KERNELS_ACTIVATION_H_
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/core/runtime/opencl/cl2_header.h"
namespace
mace
{
namespace
kernels
{
...
...
@@ -99,17 +99,15 @@ void PReLUActivation(const T *input_ptr,
output_ptr
[
i
]
=
in
;
}
}
}
template
<
DeviceType
D
,
typename
T
>
class
ActivationFunctor
{
public:
ActivationFunctor
(
ActivationType
type
,
T
relux_max_limit
)
:
activation_
(
type
),
relux_max_limit_
(
relux_max_limit
){}
:
activation_
(
type
),
relux_max_limit_
(
relux_max_limit
)
{}
void
operator
()(
const
Tensor
*
input
,
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
alpha
,
Tensor
*
output
,
StatsFuture
*
future
)
{
...
...
@@ -118,9 +116,11 @@ class ActivationFunctor {
if
(
activation_
==
PRELU
)
{
MACE_CHECK_NOTNULL
(
alpha
);
const
T
*
alpha_ptr
=
alpha
->
data
<
T
>
();
PReLUActivation
(
input_ptr
,
output
->
size
(),
input
->
dim
(
3
),
alpha_ptr
,
output_ptr
);
PReLUActivation
(
input_ptr
,
output
->
size
(),
input
->
dim
(
3
),
alpha_ptr
,
output_ptr
);
}
else
{
DoActivation
(
input_ptr
,
output_ptr
,
output
->
size
(),
activation_
,
relux_max_limit_
);
DoActivation
(
input_ptr
,
output_ptr
,
output
->
size
(),
activation_
,
relux_max_limit_
);
}
}
...
...
@@ -131,14 +131,16 @@ class ActivationFunctor {
template
<
>
void
ActivationFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
Tensor
*
input
,
const
Tensor
*
alpha
,
Tensor
*
output
,
StatsFuture
*
future
);
const
Tensor
*
input
,
const
Tensor
*
alpha
,
Tensor
*
output
,
StatsFuture
*
future
);
template
<
typename
T
>
class
ActivationFunctor
<
DeviceType
::
OPENCL
,
T
>
{
public:
ActivationFunctor
(
ActivationType
type
,
T
relux_max_limit
)
:
activation_
(
type
),
relux_max_limit_
(
relux_max_limit
){}
:
activation_
(
type
),
relux_max_limit_
(
relux_max_limit
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
alpha
,
...
...
mace/kernels/addn.h
浏览文件 @
6b9aafd4
...
...
@@ -18,7 +18,7 @@ namespace mace {
namespace
kernels
{
namespace
{
constexpr
int
kCostPerGroup
=
1024
;
constexpr
int
kCostPerGroup
=
1024
;
}
// namespace
template
<
DeviceType
D
,
typename
T
>
...
...
mace/kernels/batch_norm.h
浏览文件 @
6b9aafd4
...
...
@@ -10,10 +10,10 @@
#endif
#include "mace/core/future.h"
#include "mace/public/mace.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/kernels/activation.h"
#include "mace/public/mace.h"
namespace
mace
{
namespace
kernels
{
...
...
@@ -24,7 +24,7 @@ struct BatchNormFunctorBase {
const
float
relux_max_limit
)
:
folded_constant_
(
folded_constant
),
activation_
(
activation
),
relux_max_limit_
(
relux_max_limit
){}
relux_max_limit_
(
relux_max_limit
)
{}
const
bool
folded_constant_
;
const
ActivationType
activation_
;
...
...
@@ -36,8 +36,7 @@ struct BatchNormFunctor : BatchNormFunctorBase {
BatchNormFunctor
(
const
bool
folded_constant
,
const
ActivationType
activation
,
const
float
relux_max_limit
)
:
BatchNormFunctorBase
(
folded_constant
,
activation
,
relux_max_limit
)
{}
:
BatchNormFunctorBase
(
folded_constant
,
activation
,
relux_max_limit
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
scale
,
...
...
@@ -147,8 +146,7 @@ struct BatchNormFunctor<DeviceType::OPENCL, T> : BatchNormFunctorBase {
BatchNormFunctor
(
const
bool
folded_constant
,
const
ActivationType
activation
,
const
float
relux_max_limit
)
:
BatchNormFunctorBase
(
folded_constant
,
activation
,
relux_max_limit
)
{}
:
BatchNormFunctorBase
(
folded_constant
,
activation
,
relux_max_limit
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
scale
,
const
Tensor
*
offset
,
...
...
mace/kernels/bias_add.h
浏览文件 @
6b9aafd4
...
...
@@ -6,9 +6,9 @@
#define MACE_KERNELS_BIAS_ADD_H_
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/public/mace.h"
#include "mace/core/runtime/opencl/cl2_header.h"
namespace
mace
{
namespace
kernels
{
...
...
@@ -32,7 +32,6 @@ struct BiasAddFunctor {
const
T
*
bias_ptr
=
bias
->
data
<
T
>
();
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
#pragma omp parallel for collapse(4)
for
(
index_t
n
=
0
;
n
<
batch
;
++
n
)
{
for
(
index_t
h
=
0
;
h
<
height
;
++
h
)
{
...
...
@@ -44,7 +43,6 @@ struct BiasAddFunctor {
}
}
}
}
};
...
...
mace/kernels/buffer_to_image.h
浏览文件 @
6b9aafd4
...
...
@@ -17,10 +17,9 @@ struct BufferToImageFunctorBase {
bool
i2b_
;
};
template
<
DeviceType
D
,
typename
T
>
struct
BufferToImageFunctor
:
BufferToImageFunctorBase
{
BufferToImageFunctor
(
bool
i2b
=
false
)
:
BufferToImageFunctorBase
(
i2b
)
{}
template
<
DeviceType
D
,
typename
T
>
struct
BufferToImageFunctor
:
BufferToImageFunctorBase
{
BufferToImageFunctor
(
bool
i2b
=
false
)
:
BufferToImageFunctorBase
(
i2b
)
{}
void
operator
()(
Tensor
*
input
,
const
BufferType
type
,
Tensor
*
output
,
...
...
@@ -29,10 +28,9 @@ struct BufferToImageFunctor : BufferToImageFunctorBase{
}
};
template
<
typename
T
>
struct
BufferToImageFunctor
<
DeviceType
::
OPENCL
,
T
>
:
BufferToImageFunctorBase
{
BufferToImageFunctor
(
bool
i2b
=
false
)
:
BufferToImageFunctorBase
(
i2b
)
{}
template
<
typename
T
>
struct
BufferToImageFunctor
<
DeviceType
::
OPENCL
,
T
>
:
BufferToImageFunctorBase
{
BufferToImageFunctor
(
bool
i2b
=
false
)
:
BufferToImageFunctorBase
(
i2b
)
{}
void
operator
()(
Tensor
*
input
,
const
BufferType
type
,
Tensor
*
output
,
...
...
mace/kernels/channel_shuffle.h
浏览文件 @
6b9aafd4
...
...
@@ -16,8 +16,10 @@ class ChannelShuffleFunctor {
public:
ChannelShuffleFunctor
(
const
int
group
)
:
group_
(
group
)
{}
void
operator
()(
const
T
*
input
,
const
index_t
*
input_shape
,
T
*
output
,
StatsFuture
*
future
)
{
void
operator
()(
const
T
*
input
,
const
index_t
*
input_shape
,
T
*
output
,
StatsFuture
*
future
)
{
index_t
batch
=
input_shape
[
0
];
index_t
channels
=
input_shape
[
1
];
index_t
height
=
input_shape
[
2
];
...
...
mace/kernels/concat.h
浏览文件 @
6b9aafd4
...
...
@@ -6,23 +6,23 @@
#define MACE_KERNELS_CONCAT_H_
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/public/mace.h"
#include "mace/core/tensor.h"
#include "mace/core/runtime/opencl/cl2_header.h"
namespace
mace
{
namespace
kernels
{
struct
ConcatFunctorBase
{
ConcatFunctorBase
(
const
int32_t
axis
)
:
axis_
(
axis
)
{}
ConcatFunctorBase
(
const
int32_t
axis
)
:
axis_
(
axis
)
{}
int32_t
axis_
;
};
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
ConcatFunctor
:
ConcatFunctorBase
{
ConcatFunctor
(
const
int32_t
axis
)
:
ConcatFunctorBase
(
axis
)
{}
ConcatFunctor
(
const
int32_t
axis
)
:
ConcatFunctorBase
(
axis
)
{}
void
operator
()(
const
std
::
vector
<
const
Tensor
*>
&
input_list
,
Tensor
*
output
,
...
...
@@ -75,14 +75,14 @@ struct ConcatFunctor : ConcatFunctorBase {
}
};
template
<
typename
T
>
struct
ConcatFunctor
<
DeviceType
::
OPENCL
,
T
>
:
ConcatFunctorBase
{
ConcatFunctor
(
const
int32_t
axis
)
:
ConcatFunctorBase
(
axis
)
{}
template
<
typename
T
>
struct
ConcatFunctor
<
DeviceType
::
OPENCL
,
T
>
:
ConcatFunctorBase
{
ConcatFunctor
(
const
int32_t
axis
)
:
ConcatFunctorBase
(
axis
)
{}
void
operator
()(
const
std
::
vector
<
const
Tensor
*>
&
input_list
,
Tensor
*
output
,
StatsFuture
*
future
);
Tensor
*
output
,
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
};
}
// namepsace kernels
...
...
mace/kernels/conv_2d.h
浏览文件 @
6b9aafd4
...
...
@@ -116,9 +116,8 @@ void Conv2dKernelFunc(const T *input_ptr, // batch start
sum
[
sum_idx
]
+=
vaddvq_f32
(
tmp
);
#else
for
(
int
inci
=
0
;
inci
<
inc_tile_size
;
++
inci
)
{
sum
[
sum_idx
]
+=
in
[
in_idx
*
inc_tile_size
+
inci
]
*
weights
[
weights_idx
*
inc_tile_size
+
inci
];
sum
[
sum_idx
]
+=
in
[
in_idx
*
inc_tile_size
+
inci
]
*
weights
[
weights_idx
*
inc_tile_size
+
inci
];
}
#endif
}
...
...
@@ -188,7 +187,7 @@ struct Conv2dFunctorBase {
paddings_
(
paddings
),
dilations_
(
dilations
),
activation_
(
activation
),
relux_max_limit_
(
relux_max_limit
){}
relux_max_limit_
(
relux_max_limit
)
{}
const
int
*
strides_
;
// [stride_h, stride_w]
const
Padding
padding_type_
;
...
...
@@ -230,8 +229,9 @@ struct Conv2dFunctor : Conv2dFunctorBase {
padding_type_
,
output_shape
.
data
(),
paddings
.
data
());
}
else
{
paddings
=
paddings_
;
CalcOutputSize
(
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
paddings_
.
data
(),
dilations_
,
strides_
,
RoundType
::
FLOOR
,
output_shape
.
data
());
CalcOutputSize
(
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
paddings_
.
data
(),
dilations_
,
strides_
,
RoundType
::
FLOOR
,
output_shape
.
data
());
}
output
->
Resize
(
output_shape
);
...
...
mace/kernels/conv_pool_2d_util.cc
浏览文件 @
6b9aafd4
...
...
@@ -145,7 +145,7 @@ void CalcOutputSize(const index_t *input_shape, // NHWC
MACE_CHECK
(
dilations
[
0
]
>
0
&&
dilations
[
1
]
>
0
,
"Invalid dilations, must >= 1"
);
MACE_CHECK
((
dilations
[
0
]
==
1
||
strides
[
0
]
==
1
)
&&
(
dilations
[
1
]
==
1
||
strides
[
1
]
==
1
),
(
dilations
[
1
]
==
1
||
strides
[
1
]
==
1
),
"If dilations > 1, strides should be 1"
);
MACE_CHECK_NOTNULL
(
output_shape
);
MACE_CHECK_NOTNULL
(
padding_size
);
...
...
@@ -159,18 +159,29 @@ void CalcOutputSize(const index_t *input_shape, // NHWC
*/
output_shape
[
0
]
=
input_shape
[
0
];
if
(
round_type
==
FLOOR
)
{
output_shape
[
1
]
=
static_cast
<
index_t
>
(
std
::
floor
(
1.0
*
(
input_shape
[
1
]
+
padding_size
[
0
]
-
filter_shape
[
0
]
-
(
filter_shape
[
0
]
-
1
)
*
(
dilations
[
0
]
-
1
))
/
strides
[
0
])
+
1
);
output_shape
[
2
]
=
static_cast
<
index_t
>
(
std
::
floor
(
1.0
*
(
input_shape
[
2
]
+
padding_size
[
1
]
-
filter_shape
[
1
]
-
(
filter_shape
[
1
]
-
1
)
*
(
dilations
[
1
]
-
1
))
/
strides
[
1
])
+
1
);
output_shape
[
1
]
=
static_cast
<
index_t
>
(
std
::
floor
(
1.0
*
(
input_shape
[
1
]
+
padding_size
[
0
]
-
filter_shape
[
0
]
-
(
filter_shape
[
0
]
-
1
)
*
(
dilations
[
0
]
-
1
))
/
strides
[
0
])
+
1
);
output_shape
[
2
]
=
static_cast
<
index_t
>
(
std
::
floor
(
1.0
*
(
input_shape
[
2
]
+
padding_size
[
1
]
-
filter_shape
[
1
]
-
(
filter_shape
[
1
]
-
1
)
*
(
dilations
[
1
]
-
1
))
/
strides
[
1
])
+
1
);
}
else
{
output_shape
[
1
]
=
static_cast
<
index_t
>
(
std
::
ceil
(
1.0
*
(
input_shape
[
1
]
+
padding_size
[
0
]
-
filter_shape
[
0
]
-
(
filter_shape
[
0
]
-
1
)
*
(
dilations
[
0
]
-
1
))
/
strides
[
0
])
+
1
);
output_shape
[
2
]
=
static_cast
<
index_t
>
(
std
::
ceil
(
1.0
*
(
input_shape
[
2
]
+
padding_size
[
1
]
-
filter_shape
[
1
]
-
(
filter_shape
[
1
]
-
1
)
*
(
dilations
[
1
]
-
1
))
/
strides
[
1
])
+
1
);
output_shape
[
1
]
=
static_cast
<
index_t
>
(
std
::
ceil
(
1.0
*
(
input_shape
[
1
]
+
padding_size
[
0
]
-
filter_shape
[
0
]
-
(
filter_shape
[
0
]
-
1
)
*
(
dilations
[
0
]
-
1
))
/
strides
[
0
])
+
1
);
output_shape
[
2
]
=
static_cast
<
index_t
>
(
std
::
ceil
(
1.0
*
(
input_shape
[
2
]
+
padding_size
[
1
]
-
filter_shape
[
1
]
-
(
filter_shape
[
1
]
-
1
)
*
(
dilations
[
1
]
-
1
))
/
strides
[
1
])
+
1
);
}
output_shape
[
3
]
=
filter_shape
[
2
];
}
void
CalPaddingSize
(
const
index_t
*
input_shape
,
// NCHW
...
...
mace/kernels/conv_pool_2d_util.h
浏览文件 @
6b9aafd4
...
...
@@ -15,7 +15,7 @@ enum Padding {
FULL
=
2
,
// Pads with one less than the filter size on both sides
};
enum
RoundType
{
enum
RoundType
{
FLOOR
=
0
,
CEIL
=
1
,
};
...
...
mace/kernels/depthwise_conv2d.h
浏览文件 @
6b9aafd4
...
...
@@ -10,9 +10,9 @@
#endif
#include "mace/core/future.h"
#include "mace/public/mace.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/kernels/conv_pool_2d_util.h"
#include "mace/public/mace.h"
namespace
mace
{
namespace
kernels
{
...
...
@@ -247,7 +247,7 @@ struct DepthwiseConv2dFunctorBase {
paddings_
(
paddings
),
dilations_
(
dilations
),
activation_
(
activation
),
relux_max_limit_
(
relux_max_limit
){}
relux_max_limit_
(
relux_max_limit
)
{}
const
int
*
strides_
;
// [stride_h, stride_w]
const
Padding
padding_type_
;
...
...
@@ -296,8 +296,9 @@ struct DepthwiseConv2dFunctor : public DepthwiseConv2dFunctorBase {
padding_type_
,
output_shape
.
data
(),
paddings
.
data
());
}
else
{
paddings
=
paddings_
;
CalcOutputSize
(
input
->
shape
().
data
(),
fake_filter_shape
.
data
(),
paddings_
.
data
(),
dilations_
,
strides_
,
RoundType
::
FLOOR
,
output_shape
.
data
());
CalcOutputSize
(
input
->
shape
().
data
(),
fake_filter_shape
.
data
(),
paddings_
.
data
(),
dilations_
,
strides_
,
RoundType
::
FLOOR
,
output_shape
.
data
());
}
auto
input_shape
=
fake_filter_shape
;
output
->
Resize
(
output_shape
);
...
...
mace/kernels/eltwise.h
浏览文件 @
6b9aafd4
...
...
@@ -5,13 +5,13 @@
#define MACE_KERNELS_ELTWISE_H_
#include "mace/core/future.h"
#include "mace/core/tensor.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
namespace
mace
{
namespace
kernels
{
enum
EltwiseType
{
enum
EltwiseType
{
PROD
=
0
,
SUM
=
1
,
MAX
=
2
,
...
...
@@ -19,8 +19,7 @@ enum EltwiseType{
};
struct
EltwiseFunctorBase
{
EltwiseFunctorBase
(
const
EltwiseType
type
,
const
std
::
vector
<
float
>
&
coeff
)
EltwiseFunctorBase
(
const
EltwiseType
type
,
const
std
::
vector
<
float
>
&
coeff
)
:
type_
(
type
),
coeff_
(
coeff
)
{}
EltwiseType
type_
;
...
...
@@ -29,8 +28,7 @@ struct EltwiseFunctorBase {
template
<
DeviceType
D
,
typename
T
>
struct
EltwiseFunctor
:
EltwiseFunctorBase
{
EltwiseFunctor
(
const
EltwiseType
type
,
const
std
::
vector
<
float
>
&
coeff
)
EltwiseFunctor
(
const
EltwiseType
type
,
const
std
::
vector
<
float
>
&
coeff
)
:
EltwiseFunctorBase
(
type
,
coeff
)
{}
void
operator
()(
const
Tensor
*
input0
,
...
...
@@ -49,7 +47,7 @@ struct EltwiseFunctor : EltwiseFunctorBase {
switch
(
type_
)
{
case
PROD
:
#pragma omp parallel for
for
(
index_t
i
=
0
;
i
<
size
;
++
i
)
{
for
(
index_t
i
=
0
;
i
<
size
;
++
i
)
{
output_ptr
[
i
]
=
input0_ptr
[
i
]
*
input1_ptr
[
i
];
}
break
;
...
...
@@ -62,19 +60,20 @@ struct EltwiseFunctor : EltwiseFunctorBase {
}
else
{
#pragma omp parallel for
for
(
index_t
i
=
0
;
i
<
size
;
++
i
)
{
output_ptr
[
i
]
=
coeff_
[
0
]
*
input0_ptr
[
i
]
+
coeff_
[
1
]
*
input1_ptr
[
i
];
output_ptr
[
i
]
=
coeff_
[
0
]
*
input0_ptr
[
i
]
+
coeff_
[
1
]
*
input1_ptr
[
i
];
}
}
break
;
case
MAX
:
#pragma omp parallel for
for
(
index_t
i
=
0
;
i
<
size
;
++
i
)
{
for
(
index_t
i
=
0
;
i
<
size
;
++
i
)
{
output_ptr
[
i
]
=
std
::
max
<
T
>
(
input0_ptr
[
i
],
input1_ptr
[
i
]);
}
break
;
case
MIN
:
#pragma omp parallel for
for
(
index_t
i
=
0
;
i
<
size
;
++
i
)
{
for
(
index_t
i
=
0
;
i
<
size
;
++
i
)
{
output_ptr
[
i
]
=
std
::
min
<
T
>
(
input0_ptr
[
i
],
input1_ptr
[
i
]);
}
break
;
...
...
@@ -84,11 +83,9 @@ struct EltwiseFunctor : EltwiseFunctorBase {
}
};
template
<
typename
T
>
struct
EltwiseFunctor
<
DeviceType
::
OPENCL
,
T
>:
EltwiseFunctorBase
{
EltwiseFunctor
(
const
EltwiseType
type
,
const
std
::
vector
<
float
>
&
coeff
)
struct
EltwiseFunctor
<
DeviceType
::
OPENCL
,
T
>
:
EltwiseFunctorBase
{
EltwiseFunctor
(
const
EltwiseType
type
,
const
std
::
vector
<
float
>
&
coeff
)
:
EltwiseFunctorBase
(
type
,
coeff
)
{}
void
operator
()(
const
Tensor
*
input0
,
...
...
mace/kernels/fully_connected.h
浏览文件 @
6b9aafd4
...
...
@@ -6,8 +6,8 @@
#define MACE_KERNELS_FULLY_CONNECTED_H_
#include "mace/core/future.h"
#include "mace/core/tensor.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/kernels/activation.h"
namespace
mace
{
...
...
@@ -16,25 +16,23 @@ namespace kernels {
struct
FullyConnectedBase
{
FullyConnectedBase
(
const
ActivationType
activation
,
const
float
relux_max_limit
)
:
activation_
(
activation
),
relux_max_limit_
(
relux_max_limit
){}
:
activation_
(
activation
),
relux_max_limit_
(
relux_max_limit
)
{}
const
ActivationType
activation_
;
const
float
relux_max_limit_
;
};
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
FullyConnectedFunctor
:
FullyConnectedBase
{
FullyConnectedFunctor
(
const
ActivationType
activation
,
const
float
relux_max_limit
)
:
FullyConnectedBase
(
activation
,
relux_max_limit
)
{}
const
float
relux_max_limit
)
:
FullyConnectedBase
(
activation
,
relux_max_limit
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
weight
,
const
Tensor
*
bias
,
Tensor
*
output
,
StatsFuture
*
future
)
{
std
::
vector
<
index_t
>
output_shape
=
{
input
->
dim
(
0
),
1
,
1
,
weight
->
dim
(
0
)};
output
->
Resize
(
output_shape
);
const
index_t
N
=
output
->
dim
(
0
);
...
...
@@ -70,11 +68,11 @@ struct FullyConnectedFunctor : FullyConnectedBase {
}
};
template
<
typename
T
>
template
<
typename
T
>
struct
FullyConnectedFunctor
<
DeviceType
::
OPENCL
,
T
>
:
FullyConnectedBase
{
FullyConnectedFunctor
(
const
ActivationType
activation
,
const
float
relux_max_limit
)
:
FullyConnectedBase
(
activation
,
relux_max_limit
)
{}
const
float
relux_max_limit
)
:
FullyConnectedBase
(
activation
,
relux_max_limit
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
weight
,
...
...
mace/kernels/global_avg_pooling.h
浏览文件 @
6b9aafd4
...
...
@@ -39,8 +39,10 @@ struct GlobalAvgPoolingFunctor {
template
<
>
void
GlobalAvgPoolingFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
index_t
*
input_shape
,
float
*
output
,
StatsFuture
*
future
);
const
float
*
input
,
const
index_t
*
input_shape
,
float
*
output
,
StatsFuture
*
future
);
}
// namespace kernels
}
// namespace mace
...
...
mace/kernels/matmul.h
浏览文件 @
6b9aafd4
...
...
@@ -6,20 +6,18 @@
#define MACE_KERNELS_MATMUL_H_
#include "mace/core/future.h"
#include "mace/core/tensor.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
namespace
mace
{
namespace
kernels
{
template
<
DeviceType
D
,
typename
T
>
struct
MatMulFunctor
{
void
operator
()(
const
Tensor
*
A
,
const
Tensor
*
B
,
Tensor
*
C
,
StatsFuture
*
future
)
{
std
::
vector
<
index_t
>
c_shape
=
{
A
->
dim
(
0
),
A
->
dim
(
1
),
B
->
dim
(
2
),
1
};
C
->
Resize
(
c_shape
);
const
index_t
N
=
C
->
dim
(
0
);
...
...
@@ -52,7 +50,6 @@ struct MatMulFunctor {
}
};
template
<
typename
T
>
struct
MatMulFunctor
<
DeviceType
::
OPENCL
,
T
>
{
void
operator
()(
const
Tensor
*
A
,
...
...
mace/kernels/neon/batch_norm_neon.cc
浏览文件 @
6b9aafd4
...
...
@@ -52,7 +52,8 @@ void BatchNormFunctor<DeviceType::NEON, float>::operator()(
#pragma omp parallel for collapse(2)
for
(
index_t
i
=
0
;
i
<
n
;
++
i
)
{
for
(
index_t
j
=
0
;
j
<
sample_size
;
++
j
)
{
const
float
*
input_sample_ptr
=
input_ptr
+
(
i
*
sample_size
+
j
)
*
channel
;
const
float
*
input_sample_ptr
=
input_ptr
+
(
i
*
sample_size
+
j
)
*
channel
;
float
*
output_sample_ptr
=
output_ptr
+
(
i
*
sample_size
+
j
)
*
channel
;
const
float
*
new_scale_ptr
=
new_scale
.
data
();
const
float
*
new_offset_ptr
=
new_offset
.
data
();
...
...
mace/kernels/neon/conv_2d_neon.cc
浏览文件 @
6b9aafd4
...
...
@@ -50,12 +50,11 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const Tensor *input,
MACE_CHECK_NOTNULL
(
filter
);
MACE_CHECK_NOTNULL
(
output
);
std
::
vector
<
index_t
>
output_shape_vec
(
4
);
std
::
vector
<
int
>
paddings
(
2
);
kernels
::
CalcPaddingAndOutputSize
(
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
dilations_
,
strides_
,
paddings_
,
output_shape_vec
.
data
(),
paddings
.
data
());
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
dilations_
,
strides_
,
paddings_
,
output_shape_vec
.
data
(),
paddings
.
data
());
output
->
Resize
(
output_shape_vec
);
typedef
void
(
*
Conv2dNeonFunction
)(
...
...
@@ -102,8 +101,8 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const Tensor *input,
auto
output_shape
=
output
->
shape
().
data
();
auto
conv2d_neon_func
=
selector
[
kernel_h
-
1
][
strides_
[
0
]
-
1
];
conv2d_neon_func
(
input_data
,
input_shape
,
filter_data
,
nullptr
,
bias_data
,
output_data
,
output_shape
);
conv2d_neon_func
(
input_data
,
input_shape
,
filter_data
,
nullptr
,
bias_data
,
output_data
,
output_shape
);
}
}
// namespace kernels
...
...
mace/kernels/neon/conv_2d_neon_3x3.cc
浏览文件 @
6b9aafd4
...
...
@@ -27,10 +27,8 @@ void Conv2dNeonK3x3S1(const float *input, // NCHW
int
input_channels
=
input_shape
[
1
];
int
input_height
=
input_shape
[
2
];
int
input_width
=
input_shape
[
3
];
int
multiplier
=
filter_shape
==
nullptr
?
0
:
filter_shape
[
0
];
int
filter_in_channels
=
filter_shape
==
nullptr
?
input_channels
:
1
;
int
multiplier
=
filter_shape
==
nullptr
?
0
:
filter_shape
[
0
];
int
filter_in_channels
=
filter_shape
==
nullptr
?
input_channels
:
1
;
#pragma omp parallel for collapse(2)
for
(
int
b
=
0
;
b
<
output_batch
;
++
b
)
{
for
(
int
oc
=
0
;
oc
<
output_channels
;
++
oc
)
{
...
...
@@ -230,10 +228,8 @@ void Conv2dNeonK3x3S2(const float *input, // NCHW
int
input_channels
=
input_shape
[
1
];
int
input_height
=
input_shape
[
2
];
int
input_width
=
input_shape
[
3
];
int
multiplier
=
filter_shape
==
nullptr
?
0
:
filter_shape
[
0
];
int
filter_in_channels
=
filter_shape
==
nullptr
?
input_channels
:
1
;
int
multiplier
=
filter_shape
==
nullptr
?
0
:
filter_shape
[
0
];
int
filter_in_channels
=
filter_shape
==
nullptr
?
input_channels
:
1
;
#pragma omp parallel for collapse(2)
for
(
int
b
=
0
;
b
<
output_batch
;
++
b
)
{
...
...
mace/kernels/neon/depthwise_conv_neon.cc
浏览文件 @
6b9aafd4
...
...
@@ -52,9 +52,8 @@ void DepthwiseConv2dFunctor<DeviceType::NEON, float>::operator()(
<<
"filter"
<<
kernel_h
<<
"x"
<<
kernel_w
<<
","
<<
" stride "
<<
strides_
[
0
]
<<
"x"
<<
strides_
[
1
]
<<
" is not implemented yet, using slow version"
;
DepthwiseConv2dFunctor
<
DeviceType
::
CPU
,
float
>
(
strides_
,
paddings_
,
dilations_
)(
input
,
filter
,
bias
,
output
,
future
);
DepthwiseConv2dFunctor
<
DeviceType
::
CPU
,
float
>
(
strides_
,
paddings_
,
dilations_
)(
input
,
filter
,
bias
,
output
,
future
);
return
;
}
...
...
@@ -73,8 +72,8 @@ void DepthwiseConv2dFunctor<DeviceType::NEON, float>::operator()(
input_shape
=
padded_input
.
shape
().
data
();
}
auto
conv2d_neon_func
=
selector
[
kernel_h
-
1
][
strides_
[
0
]
-
1
];
conv2d_neon_func
(
input_ptr
,
input_shape
,
filter_ptr
,
filter_shape
,
bias_ptr
,
output_ptr
,
output_shape
);
conv2d_neon_func
(
input_ptr
,
input_shape
,
filter_ptr
,
filter_shape
,
bias_ptr
,
output_
ptr
,
output_
shape
);
}
}
// namespace kernels
...
...
mace/kernels/opencl/activation_opencl.cc
浏览文件 @
6b9aafd4
...
...
@@ -57,8 +57,7 @@ void ActivationFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
default:
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
activation_
;
}
kernel_
=
runtime
->
BuildKernel
(
"activation"
,
kernel_name
,
built_options
);
kernel_
=
runtime
->
BuildKernel
(
"activation"
,
kernel_name
,
built_options
);
int
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
if
(
activation_
==
PRELU
)
{
...
...
@@ -74,8 +73,8 @@ void ActivationFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
static_cast
<
uint32_t
>
(
height
*
batch
)};
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
std
::
string
tuning_key
=
Concat
(
tuning_key_prefix_
,
output
->
dim
(
0
),
output
->
dim
(
1
),
output
->
dim
(
2
),
output
->
dim
(
3
));
Concat
(
tuning_key_prefix_
,
output
->
dim
(
0
),
output
->
dim
(
1
),
output
->
dim
(
2
),
output
->
dim
(
3
));
TuningOrRun3DKernel
(
kernel_
,
tuning_key
,
gws
,
lws
,
future
);
}
...
...
mace/kernels/opencl/addn.cc
浏览文件 @
6b9aafd4
...
...
@@ -5,8 +5,8 @@
#include "mace/kernels/addn.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/opencl/helper.h"
#include "mace/utils/utils.h"
#include "mace/utils/tuner.h"
#include "mace/utils/utils.h"
namespace
mace
{
namespace
kernels
{
...
...
@@ -57,31 +57,23 @@ void AddNFunctor<DeviceType::OPENCL, T>::operator()(
uint32_t
idx
=
0
;
for
(
auto
input
:
input_tensors
)
{
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
}
kernel_
.
setArg
(
idx
++
,
*
(
output_tensor
->
opencl_image
()));
}
const
uint32_t
gws
[
2
]
=
{
static_cast
<
uint32_t
>
(
width_pixels
),
static_cast
<
uint32_t
>
(
batch_height_pixels
)
};
const
uint32_t
gws
[
2
]
=
{
static_cast
<
uint32_t
>
(
width_pixels
),
static_cast
<
uint32_t
>
(
batch_height_pixels
)};
const
std
::
vector
<
uint32_t
>
lws
=
{
64
,
16
,
1
};
std
::
stringstream
ss
;
ss
<<
"addn_opencl_kernel_"
<<
output_shape
[
0
]
<<
"_"
<<
output_shape
[
1
]
<<
"_"
<<
output_shape
[
2
]
<<
"_"
<<
output_shape
[
3
];
ss
<<
"addn_opencl_kernel_"
<<
output_shape
[
0
]
<<
"_"
<<
output_shape
[
1
]
<<
"_"
<<
output_shape
[
2
]
<<
"_"
<<
output_shape
[
3
];
TuningOrRun2DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
};
template
struct
AddNFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
AddNFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
AddNFunctor
<
DeviceType
::
OPENCL
,
half
>;
template
struct
AddNFunctor
<
DeviceType
::
OPENCL
,
half
>;
}
// namespace kernels
}
// namespace mace
mace/kernels/opencl/batch_norm_opencl.cc
浏览文件 @
6b9aafd4
...
...
@@ -60,17 +60,14 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
activation_
;
}
kernel_
=
runtime
->
BuildKernel
(
"batch_norm"
,
kernel_name
,
built_options
);
kernel_
=
runtime
->
BuildKernel
(
"batch_norm"
,
kernel_name
,
built_options
);
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
scale
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
offset
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
offset
->
opencl_image
()));
if
(
!
folded_constant_
)
{
kernel_
.
setArg
(
idx
++
,
*
(
mean
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
mean
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
var
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
epsilon
);
}
...
...
mace/kernels/opencl/bias_add_opencl.cc
浏览文件 @
6b9aafd4
...
...
@@ -12,11 +12,10 @@ namespace mace {
namespace
kernels
{
template
<
typename
T
>
void
BiasAddFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input
,
const
Tensor
*
bias
,
Tensor
*
output
,
StatsFuture
*
future
)
{
void
BiasAddFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input
,
const
Tensor
*
bias
,
Tensor
*
output
,
StatsFuture
*
future
)
{
const
index_t
batch
=
input
->
dim
(
0
);
const
index_t
height
=
input
->
dim
(
1
);
const
index_t
width
=
input
->
dim
(
2
);
...
...
@@ -47,10 +46,8 @@ void BiasAddFunctor<DeviceType::OPENCL, T>::operator()(
cl
::
Event
event
;
cl_int
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
kernel_
,
cl
::
NullRange
,
cl
::
NDRange
(
gws
[
0
],
gws
[
1
],
gws
[
2
]),
cl
::
NDRange
(
lws
[
0
],
lws
[
1
],
lws
[
2
]),
nullptr
,
&
event
);
kernel_
,
cl
::
NullRange
,
cl
::
NDRange
(
gws
[
0
],
gws
[
1
],
gws
[
2
]),
cl
::
NDRange
(
lws
[
0
],
lws
[
1
],
lws
[
2
]),
nullptr
,
&
event
);
MACE_CHECK
(
error
==
CL_SUCCESS
);
if
(
future
!=
nullptr
)
{
future
->
wait_fn
=
[
runtime
,
event
](
CallStats
*
stats
)
{
...
...
@@ -62,9 +59,7 @@ void BiasAddFunctor<DeviceType::OPENCL, T>::operator()(
}
}
template
struct
BiasAddFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
BiasAddFunctor
<
DeviceType
::
OPENCL
,
half
>;
template
struct
BiasAddFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
BiasAddFunctor
<
DeviceType
::
OPENCL
,
half
>;
}
// namespace kernels
}
// namespace mace
mace/kernels/opencl/buffer_to_image.cc
浏览文件 @
6b9aafd4
...
...
@@ -9,36 +9,33 @@
namespace
mace
{
namespace
kernels
{
template
<
typename
T
>
void
BufferToImageFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
Tensor
*
buffer
,
const
BufferType
type
,
Tensor
*
image
,
StatsFuture
*
future
)
{
template
<
typename
T
>
void
BufferToImageFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
Tensor
*
buffer
,
const
BufferType
type
,
Tensor
*
image
,
StatsFuture
*
future
)
{
std
::
vector
<
size_t
>
image_shape
;
if
(
!
i2b_
)
{
CalImage2DShape
(
buffer
->
shape
(),
type
,
image_shape
);
if
(
type
==
WINOGRAD_FILTER
)
{
std
::
vector
<
index_t
>
new_shape
=
CalWinogradShape
(
buffer
->
shape
(),
type
);
if
(
type
==
WINOGRAD_FILTER
)
{
std
::
vector
<
index_t
>
new_shape
=
CalWinogradShape
(
buffer
->
shape
(),
type
);
image
->
ResizeImage
(
new_shape
,
image_shape
);
}
else
{
image
->
ResizeImage
(
buffer
->
shape
(),
image_shape
);
}
}
else
{
Image
*
image_buf
=
dynamic_cast
<
Image
*>
(
image
->
UnderlyingBuffer
());
Image
*
image_buf
=
dynamic_cast
<
Image
*>
(
image
->
UnderlyingBuffer
());
image_shape
=
image_buf
->
image_shape
();
buffer
->
Resize
(
image
->
shape
());
}
size_t
gws
[
2
]
=
{
image_shape
[
0
],
image_shape
[
1
]};
size_t
gws
[
2
]
=
{
image_shape
[
0
],
image_shape
[
1
]};
std
::
string
kernel_name
;
switch
(
type
)
{
case
CONV2D_FILTER
:
kernel_name
=
i2b_
?
"filter_image_to_buffer"
:
"filter_buffer_to_image"
;
break
;
case
DW_CONV2D_FILTER
:
kernel_name
=
i2b_
?
"dw_filter_image_to_buffer"
:
"dw_filter_buffer_to_image"
;
kernel_name
=
i2b_
?
"dw_filter_image_to_buffer"
:
"dw_filter_buffer_to_image"
;
break
;
case
IN_OUT_CHANNEL
:
kernel_name
=
i2b_
?
"in_out_image_to_buffer"
:
"in_out_buffer_to_image"
;
...
...
@@ -48,7 +45,8 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
break
;
case
IN_OUT_HEIGHT
:
case
WEIGHT_HEIGHT
:
kernel_name
=
i2b_
?
"in_out_height_image_to_buffer"
:
"in_out_height_buffer_to_image"
;
kernel_name
=
i2b_
?
"in_out_height_image_to_buffer"
:
"in_out_height_buffer_to_image"
;
break
;
case
IN_OUT_WIDTH
:
MACE_CHECK
(
!
i2b_
)
<<
"IN_OUT_WIDTH only support buffer to image now"
;
...
...
@@ -56,7 +54,8 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
break
;
case
WINOGRAD_FILTER
:
gws
[
1
]
/=
16
;
kernel_name
=
i2b_
?
"winograd_filter_image_to_buffer"
:
"winograd_filter_buffer_to_image"
;
kernel_name
=
i2b_
?
"winograd_filter_image_to_buffer"
:
"winograd_filter_buffer_to_image"
;
break
;
}
std
::
string
obfuscated_kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
kernel_name
);
...
...
@@ -66,25 +65,30 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
built_options
.
emplace
(
kernel_name_ss
.
str
());
if
(
buffer
->
dtype
()
==
image
->
dtype
())
{
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToCLDt
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToCLCMDDt
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToCLCMDDt
(
DataTypeToEnum
<
T
>::
value
));
}
else
{
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
DataTypeToEnum
<
T
>::
value
));
}
auto
runtime
=
OpenCLRuntime
::
Global
();
auto
b2f_kernel
=
runtime
->
BuildKernel
(
"buffer_to_image"
,
obfuscated_kernel_name
,
built_options
);
obfuscated_kernel_name
,
built_options
);
uint32_t
idx
=
0
;
b2f_kernel
.
setArg
(
idx
++
,
*
(
buffer
->
opencl_buffer
()));
if
(
!
i2b_
)
{
MACE_CHECK
(
buffer
->
buffer_offset
()
%
GetEnumTypeSize
(
buffer
->
dtype
())
==
0
,
"buffer offset not aligned"
);
b2f_kernel
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
buffer
->
buffer_offset
()
/
GetEnumTypeSize
(
buffer
->
dtype
())));
MACE_CHECK
(
buffer
->
buffer_offset
()
%
GetEnumTypeSize
(
buffer
->
dtype
())
==
0
,
"buffer offset not aligned"
);
b2f_kernel
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
buffer
->
buffer_offset
()
/
GetEnumTypeSize
(
buffer
->
dtype
())));
}
if
(
type
==
ARGUMENT
)
{
b2f_kernel
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
buffer
->
dim
(
0
)));
}
else
if
(
type
==
WEIGHT_HEIGHT
)
{
}
else
if
(
type
==
WEIGHT_HEIGHT
)
{
b2f_kernel
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
buffer
->
dim
(
0
)));
b2f_kernel
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
buffer
->
dim
(
1
)));
b2f_kernel
.
setArg
(
idx
++
,
1
);
...
...
@@ -97,10 +101,8 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
const
std
::
vector
<
uint32_t
>
lws
=
{
16
,
64
};
cl
::
Event
event
;
cl_int
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
b2f_kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
gws
[
0
],
gws
[
1
]),
cl
::
NDRange
(
lws
[
0
],
lws
[
1
]),
nullptr
,
&
event
);
b2f_kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
gws
[
0
],
gws
[
1
]),
cl
::
NDRange
(
lws
[
0
],
lws
[
1
]),
nullptr
,
&
event
);
MACE_CHECK
(
error
==
CL_SUCCESS
)
<<
"Error code: "
<<
error
;
if
(
future
!=
nullptr
)
{
...
...
mace/kernels/opencl/cl/common.h
浏览文件 @
6b9aafd4
...
...
@@ -18,8 +18,8 @@
#define READ_IMAGET CMD_TYPE(read_image, CMD_DATA_TYPE)
#define WRITE_IMAGET CMD_TYPE(write_image, CMD_DATA_TYPE)
__constant
sampler_t
SAMPLER
=
CLK_NORMALIZED_COORDS_FALSE
|
CLK_ADDRESS_CLAMP
|
CLK_FILTER_NEAREST
;
__constant
sampler_t
SAMPLER
=
CLK_NORMALIZED_COORDS_FALSE
|
CLK_ADDRESS_CLAMP
|
CLK_FILTER_NEAREST
;
inline
DATA_TYPE4
do_activation
(
DATA_TYPE4
in
,
#ifdef USE_PRELU
...
...
mace/kernels/opencl/concat.cc
浏览文件 @
6b9aafd4
...
...
@@ -5,8 +5,8 @@
#include "mace/kernels/concat.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/opencl/helper.h"
#include "mace/utils/utils.h"
#include "mace/utils/tuner.h"
#include "mace/utils/utils.h"
namespace
mace
{
namespace
kernels
{
...
...
@@ -42,24 +42,23 @@ static void Concat2(cl::Kernel *kernel,
*
kernel
=
runtime
->
BuildKernel
(
"concat"
,
kernel_name
,
built_options
);
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input0
->
opencl_image
())));
kernel
->
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input1
->
opencl_image
())));
kernel
->
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input0
->
opencl_image
())));
kernel
->
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input1
->
opencl_image
())));
kernel
->
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input0
->
dim
(
3
)));
kernel
->
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
output
->
opencl_image
())));
kernel
->
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
output
->
opencl_image
())));
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blk
),
static_cast
<
uint32_t
>
(
width
),
static_cast
<
uint32_t
>
(
channel_blk
),
static_cast
<
uint32_t
>
(
width
),
static_cast
<
uint32_t
>
(
batch
*
height
),
};
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
"concat_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
<<
output
->
dim
(
1
)
<<
"_"
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
3
);
ss
<<
"concat_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
<<
output
->
dim
(
1
)
<<
"_"
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
3
);
TuningOrRun3DKernel
(
*
kernel
,
ss
.
str
(),
gws
,
lws
,
future
);
}
...
...
@@ -97,27 +96,25 @@ static void ConcatN(cl::Kernel *kernel,
index_t
input_channel_blk
=
input
->
dim
(
3
)
/
4
;
chan_blk_offset
+=
input_channel_blk
;
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
input_channel_blk
),
static_cast
<
uint32_t
>
(
width
),
static_cast
<
uint32_t
>
(
input_channel_blk
),
static_cast
<
uint32_t
>
(
width
),
static_cast
<
uint32_t
>
(
batch
*
height
),
};
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
"concat_n_opencl_kernel_"
<<
input_channel_blk
<<
"_"
<<
width
<<
"_"
ss
<<
"concat_n_opencl_kernel_"
<<
input_channel_blk
<<
"_"
<<
width
<<
"_"
<<
batch
*
height
;
TuningOrRun3DKernel
(
*
kernel
,
ss
.
str
(),
gws
,
lws
,
future
);
}
}
template
<
typename
T
>
void
ConcatFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
std
::
vector
<
const
Tensor
*>
&
input_list
,
Tensor
*
output
,
StatsFuture
*
future
)
{
template
<
typename
T
>
void
ConcatFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
std
::
vector
<
const
Tensor
*>
&
input_list
,
Tensor
*
output
,
StatsFuture
*
future
)
{
const
int
inputs_count
=
input_list
.
size
();
MACE_CHECK
(
inputs_count
>=
2
&&
axis_
==
3
)
<<
"Concat opencl kernel only support >=2 elements with axis == 3"
;
<<
"Concat opencl kernel only support >=2 elements with axis == 3"
;
const
Tensor
*
input0
=
input_list
[
0
];
bool
divisible_four
=
input0
->
dim
(
axis_
)
%
4
==
0
;
...
...
@@ -137,8 +134,9 @@ void ConcatFunctor<DeviceType::OPENCL, T>::operator()(const std::vector<const Te
}
output_shape
[
axis_
]
+=
input
->
dim
(
axis_
);
}
MACE_CHECK
(
inputs_count
==
2
||
divisible_four
,
"Dimensions of inputs should be divisible by 4 when inputs_count > 2."
);
MACE_CHECK
(
inputs_count
==
2
||
divisible_four
,
"Dimensions of inputs should be divisible by 4 when inputs_count > 2."
);
std
::
vector
<
size_t
>
image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
image_shape
);
output
->
ResizeImage
(
output_shape
,
image_shape
);
...
...
@@ -151,17 +149,14 @@ void ConcatFunctor<DeviceType::OPENCL, T>::operator()(const std::vector<const Te
default:
if
(
divisible_four
)
{
ConcatN
(
&
kernel_
,
input_list
,
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
}
else
{
}
else
{
MACE_NOT_IMPLEMENTED
;
}
}
};
template
struct
ConcatFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
ConcatFunctor
<
DeviceType
::
OPENCL
,
half
>;
template
struct
ConcatFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
ConcatFunctor
<
DeviceType
::
OPENCL
,
half
>;
}
// namespace kernels
}
// namespace mace
mace/kernels/opencl/conv_2d_opencl.cc
浏览文件 @
6b9aafd4
...
...
@@ -47,21 +47,21 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
Tensor
*
output
,
StatsFuture
*
future
);
template
<
typename
T
>
template
<
typename
T
>
void
Conv2dFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
Tensor
*
output
,
StatsFuture
*
future
)
{
typedef
void
(
*
Conv2dOpenclFunction
)(
cl
::
Kernel
*
kernel
,
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
stride
,
const
int
*
padding
,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
DataType
dt
,
Tensor
*
output
,
StatsFuture
*
future
);
cl
::
Kernel
*
kernel
,
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
stride
,
const
int
*
padding
,
const
int
*
dilations
,
const
ActivationType
activation
,
const
float
relux_max_limit
,
const
DataType
dt
,
Tensor
*
output
,
StatsFuture
*
future
);
// Selection matrix: kernel_size x stride_size
static
const
Conv2dOpenclFunction
selector
[
5
]
=
{
Conv2dOpenclK1x1
,
nullptr
,
Conv2dOpenclK3x3
,
nullptr
,
nullptr
};
static
const
Conv2dOpenclFunction
selector
[
5
]
=
{
Conv2dOpenclK1x1
,
nullptr
,
Conv2dOpenclK3x3
,
nullptr
,
nullptr
};
index_t
kernel_h
=
filter
->
dim
(
0
);
index_t
kernel_w
=
filter
->
dim
(
1
);
...
...
@@ -83,8 +83,9 @@ void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
padding_type_
,
output_shape
.
data
(),
paddings
.
data
());
}
else
{
paddings
=
paddings_
;
CalcOutputSize
(
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
paddings_
.
data
(),
dilations_
,
strides_
,
RoundType
::
FLOOR
,
output_shape
.
data
());
CalcOutputSize
(
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
paddings_
.
data
(),
dilations_
,
strides_
,
RoundType
::
FLOOR
,
output_shape
.
data
());
}
std
::
vector
<
size_t
>
output_image_shape
;
...
...
@@ -94,18 +95,18 @@ void Conv2dFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
if
(
kernel_h
==
kernel_w
&&
kernel_h
<=
5
&&
selector
[
kernel_h
-
1
]
!=
nullptr
)
{
auto
conv2d_func
=
selector
[
kernel_h
-
1
];
conv2d_func
(
&
kernel_
,
input
,
filter
,
bias
,
strides_
[
0
],
paddings
.
data
(),
dilations_
,
activation_
,
relux_max_limit_
,
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
conv2d_func
(
&
kernel_
,
input
,
filter
,
bias
,
strides_
[
0
],
paddings
.
data
(),
dilations_
,
activation_
,
relux_max_limit_
,
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
}
else
{
Conv2dOpencl
(
&
kernel_
,
input
,
filter
,
bias
,
strides_
[
0
],
paddings
.
data
(),
dilations_
,
activation_
,
relux_max_limit_
,
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
Conv2dOpencl
(
&
kernel_
,
input
,
filter
,
bias
,
strides_
[
0
],
paddings
.
data
(),
dilations_
,
activation_
,
relux_max_limit_
,
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
}
}
template
struct
Conv2dFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
Conv2dFunctor
<
DeviceType
::
OPENCL
,
half
>;
template
struct
Conv2dFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
Conv2dFunctor
<
DeviceType
::
OPENCL
,
half
>;
}
// namespace kernels
}
// namespace mace
mace/kernels/opencl/conv_2d_opencl_1x1.cc
浏览文件 @
6b9aafd4
...
...
@@ -66,20 +66,15 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
}
auto
runtime
=
OpenCLRuntime
::
Global
();
*
kernel
=
runtime
->
BuildKernel
(
"conv_2d_1x1"
,
kernel_name
,
built_options
);
*
kernel
=
runtime
->
BuildKernel
(
"conv_2d_1x1"
,
kernel_name
,
built_options
);
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
filter
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
filter
->
opencl_image
()));
if
(
bias
!=
nullptr
)
{
kernel
->
setArg
(
idx
++
,
*
(
bias
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
bias
->
opencl_image
()));
}
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
// FIXME handle flexable data type: half not supported
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input_height
));
...
...
@@ -100,6 +95,5 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
TuningOrRun3DKernel
(
*
kernel
,
tuning_key
,
gws
,
lws
,
future
);
}
}
// namespace kernels
}
// namespace mace
mace/kernels/opencl/conv_2d_opencl_3x3.cc
浏览文件 @
6b9aafd4
...
...
@@ -61,20 +61,15 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
}
auto
runtime
=
OpenCLRuntime
::
Global
();
*
kernel
=
runtime
->
BuildKernel
(
"conv_2d_3x3"
,
kernel_name
,
built_options
);
*
kernel
=
runtime
->
BuildKernel
(
"conv_2d_3x3"
,
kernel_name
,
built_options
);
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
filter
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
filter
->
opencl_image
()));
if
(
bias
!=
nullptr
)
{
kernel
->
setArg
(
idx
++
,
*
(
bias
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
bias
->
opencl_image
()));
}
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input
->
dim
(
1
)));
kernel
->
setArg
(
idx
++
,
static_cast
<
int
>
(
input
->
dim
(
2
)));
...
...
mace/kernels/opencl/conv_2d_opencl_general.cc
浏览文件 @
6b9aafd4
...
...
@@ -61,20 +61,15 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
}
auto
runtime
=
OpenCLRuntime
::
Global
();
*
kernel
=
runtime
->
BuildKernel
(
"conv_2d"
,
kernel_name
,
built_options
);
*
kernel
=
runtime
->
BuildKernel
(
"conv_2d"
,
kernel_name
,
built_options
);
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
filter
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
filter
->
opencl_image
()));
if
(
bias
!=
nullptr
)
{
kernel
->
setArg
(
idx
++
,
*
(
bias
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
bias
->
opencl_image
()));
}
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
kernel
->
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
input
->
dim
(
1
)));
kernel
->
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
input
->
dim
(
2
)));
...
...
mace/kernels/opencl/depthwise_conv_opencl.cc
浏览文件 @
6b9aafd4
...
...
@@ -34,7 +34,7 @@ void DepthwiseConv2d(cl::Kernel *kernel,
const
index_t
channel_blocks
=
RoundUpDiv4
(
channels
);
const
index_t
input_channel_blocks
=
RoundUpDiv4
(
input_channels
);
const
index_t
width_blocks
=
RoundUpDiv4
(
width
);
if
(
kernel
->
get
()
==
nullptr
)
{
if
(
kernel
->
get
()
==
nullptr
)
{
const
index_t
input_batch
=
input
->
dim
(
0
);
const
index_t
input_height
=
input
->
dim
(
1
);
const
index_t
input_width
=
input
->
dim
(
2
);
...
...
@@ -78,18 +78,16 @@ void DepthwiseConv2d(cl::Kernel *kernel,
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
activation
;
}
*
kernel
=
runtime
->
BuildKernel
(
"depthwise_conv2d"
,
kernel_name
,
built_options
);
*
kernel
=
runtime
->
BuildKernel
(
"depthwise_conv2d"
,
kernel_name
,
built_options
);
uint32_t
idx
=
0
;
kernel
->
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
filter
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
filter
->
opencl_image
()));
if
(
bias
!=
nullptr
)
{
kernel
->
setArg
(
idx
++
,
*
(
bias
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
bias
->
opencl_image
()));
}
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel
->
setArg
(
idx
++
,
relux_max_limit
);
kernel
->
setArg
(
idx
++
,
static_cast
<
short
>
(
input_height
));
kernel
->
setArg
(
idx
++
,
static_cast
<
short
>
(
input_width
));
...
...
@@ -154,16 +152,17 @@ void DepthwiseConv2dFunctor<DeviceType::OPENCL, T>::operator()(
padding_type_
,
output_shape
.
data
(),
paddings
.
data
());
}
else
{
paddings
=
paddings_
;
CalcOutputSize
(
input
->
shape
().
data
(),
fake_filter_shape
.
data
(),
paddings_
.
data
(),
dilations_
,
strides_
,
RoundType
::
FLOOR
,
output_shape
.
data
());
CalcOutputSize
(
input
->
shape
().
data
(),
fake_filter_shape
.
data
(),
paddings_
.
data
(),
dilations_
,
strides_
,
RoundType
::
FLOOR
,
output_shape
.
data
());
}
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
DepthwiseConv2d
(
&
kernel_
,
input
,
filter
,
bias
,
strides_
[
0
],
paddings
.
data
(),
dilations_
,
activation_
,
relux_max_limit_
,
DepthwiseConv2d
(
&
kernel_
,
input
,
filter
,
bias
,
strides_
[
0
],
paddings
.
data
(),
dilations_
,
activation_
,
relux_max_limit_
,
DataTypeToEnum
<
T
>::
value
,
output
,
future
);
}
...
...
mace/kernels/opencl/eltwise_opencl.cc
浏览文件 @
6b9aafd4
...
...
@@ -15,7 +15,6 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
const
Tensor
*
input1
,
Tensor
*
output
,
StatsFuture
*
future
)
{
const
index_t
batch
=
input0
->
dim
(
0
);
const
index_t
height
=
input0
->
dim
(
1
);
const
index_t
width
=
input0
->
dim
(
2
);
...
...
@@ -38,10 +37,8 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
kernel_
=
runtime
->
BuildKernel
(
"eltwise"
,
kernel_name
,
built_options
);
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input0
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
input1
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
input0
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
input1
->
opencl_image
()));
if
(
!
coeff_
.
empty
())
{
kernel_
.
setArg
(
idx
++
,
coeff_
[
0
]);
kernel_
.
setArg
(
idx
++
,
coeff_
[
1
]);
...
...
@@ -49,17 +46,12 @@ void EltwiseFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input0,
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
}
const
uint32_t
gws
[
2
]
=
{
static_cast
<
uint32_t
>
(
width_pixels
),
static_cast
<
uint32_t
>
(
batch_height_pixels
)
};
const
uint32_t
gws
[
2
]
=
{
static_cast
<
uint32_t
>
(
width_pixels
),
static_cast
<
uint32_t
>
(
batch_height_pixels
)};
const
std
::
vector
<
uint32_t
>
lws
=
{
64
,
16
,
1
};
std
::
stringstream
ss
;
ss
<<
"eltwise_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
<<
output
->
dim
(
1
)
<<
"_"
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
3
);
ss
<<
"eltwise_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
<<
output
->
dim
(
1
)
<<
"_"
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
3
);
TuningOrRun2DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
}
...
...
mace/kernels/opencl/fully_connected_opencl.cc
浏览文件 @
6b9aafd4
...
...
@@ -10,14 +10,13 @@
namespace
mace
{
namespace
kernels
{
template
<
typename
T
>
template
<
typename
T
>
void
FullyConnectedFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input
,
const
Tensor
*
weight
,
const
Tensor
*
bias
,
Tensor
*
output
,
StatsFuture
*
future
)
{
std
::
vector
<
index_t
>
output_shape
=
{
input
->
dim
(
0
),
1
,
1
,
weight
->
dim
(
0
)};
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
...
...
@@ -57,19 +56,16 @@ void FullyConnectedFunctor<DeviceType::OPENCL, T>::operator()(
default:
LOG
(
FATAL
)
<<
"Unknown activation type: "
<<
activation_
;
}
kernel_
=
runtime
->
BuildKernel
(
"fully_connected"
,
kernel_name
,
built_options
);
kernel_
=
runtime
->
BuildKernel
(
"fully_connected"
,
kernel_name
,
built_options
);
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
weight
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
weight
->
opencl_image
()));
if
(
bias
!=
nullptr
)
{
kernel_
.
setArg
(
idx
++
,
*
(
bias
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
bias
->
opencl_image
()));
}
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
input
->
dim
(
1
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
input
->
dim
(
2
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
input
->
dim
(
3
)));
...
...
@@ -78,25 +74,18 @@ void FullyConnectedFunctor<DeviceType::OPENCL, T>::operator()(
}
const
uint32_t
gws
[
2
]
=
{
static_cast
<
uint32_t
>
(
batch
),
static_cast
<
uint32_t
>
(
output_blocks
),
static_cast
<
uint32_t
>
(
batch
),
static_cast
<
uint32_t
>
(
output_blocks
),
};
const
std
::
vector
<
uint32_t
>
lws
=
{
16
,
64
,
1
};
std
::
stringstream
ss
;
ss
<<
"fc_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
<<
output
->
dim
(
1
)
<<
"_"
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
3
);
ss
<<
"fc_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
<<
output
->
dim
(
1
)
<<
"_"
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
3
);
TuningOrRun2DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
};
template
struct
FullyConnectedFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
FullyConnectedFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
FullyConnectedFunctor
<
DeviceType
::
OPENCL
,
half
>;
template
struct
FullyConnectedFunctor
<
DeviceType
::
OPENCL
,
half
>;
}
// namespace kernels
}
// namespace mace
mace/kernels/opencl/helper.cc
浏览文件 @
6b9aafd4
...
...
@@ -3,8 +3,8 @@
//
#include "mace/kernels/opencl/helper.h"
#include "mace/utils/utils.h"
#include "mace/utils/tuner.h"
#include "mace/utils/utils.h"
namespace
mace
{
namespace
kernels
{
...
...
@@ -28,8 +28,9 @@ void CalConv2dFilterImageShape(const std::vector<index_t> &shape, /* HWOI */
}
// [H * W * M, (Ic + 3) / 4]
void
CalDepthwiseConv2dFilterImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* HWIM */
std
::
vector
<
size_t
>
&
image_shape
)
{
void
CalDepthwiseConv2dFilterImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* HWIM */
std
::
vector
<
size_t
>
&
image_shape
)
{
MACE_CHECK
(
shape
.
size
()
==
4
);
image_shape
.
resize
(
2
);
image_shape
[
0
]
=
shape
[
0
]
*
shape
[
1
]
*
shape
[
3
];
...
...
@@ -47,8 +48,9 @@ void CalArgImageShape(const std::vector<index_t> &shape,
// Only support 3x3 now
// [ (Ic + 3) / 4, 16 * Oc]
void
CalWinogradFilterImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* Oc, Ic, H, W*/
std
::
vector
<
size_t
>
&
image_shape
)
{
void
CalWinogradFilterImageShape
(
const
std
::
vector
<
index_t
>
&
shape
,
/* Oc, Ic, H, W*/
std
::
vector
<
size_t
>
&
image_shape
)
{
MACE_CHECK
(
shape
.
size
()
==
4
);
image_shape
.
resize
(
2
);
image_shape
[
0
]
=
RoundUpDiv4
(
shape
[
1
]);
...
...
@@ -115,19 +117,16 @@ void CalImage2DShape(const std::vector<index_t> &shape, /* NHWC */
}
}
std
::
vector
<
index_t
>
CalWinogradShape
(
const
std
::
vector
<
index_t
>
&
shape
,
const
BufferType
type
)
{
if
(
type
==
WINOGRAD_FILTER
)
{
return
{
16
,
shape
[
0
],
shape
[
1
],
1
};
}
else
if
(
type
==
IN_OUT_HEIGHT
)
{
index_t
out_width
=
shape
[
0
]
*
((
shape
[
1
]
-
1
)
/
2
)
*
((
shape
[
2
]
-
1
)
/
2
);
}
else
if
(
type
==
IN_OUT_HEIGHT
)
{
index_t
out_width
=
shape
[
0
]
*
((
shape
[
1
]
-
1
)
/
2
)
*
((
shape
[
2
]
-
1
)
/
2
);
return
{
16
,
shape
[
3
],
out_width
,
1
};
}
else
{
LOG
(
FATAL
)
<<
"Mace not supported yet."
;
return
std
::
vector
<
index_t
>
();
return
std
::
vector
<
index_t
>
();
}
}
...
...
@@ -188,10 +187,10 @@ void TuningOrRun3DKernel(cl::Kernel &kernel,
std
::
vector
<
uint32_t
>
local_ws
(
3
,
0
);
local_ws
[
0
]
=
std
::
min
<
uint32_t
>
(
gws
[
0
],
kwg_size
);
local_ws
[
1
]
=
std
::
min
<
uint32_t
>
(
gws
[
1
],
kwg_size
/
local_ws
[
0
]);
local_ws
[
2
]
=
std
::
min
<
uint32_t
>
(
gws
[
2
],
kwg_size
/
(
local_ws
[
0
]
*
local_ws
[
1
]));
local_ws
[
2
]
=
std
::
min
<
uint32_t
>
(
gws
[
2
],
kwg_size
/
(
local_ws
[
0
]
*
local_ws
[
1
]));
return
{
// TODO tuning these magic numbers
// TODO tuning these magic numbers
{
local_ws
[
0
],
local_ws
[
1
],
local_ws
[
2
],
1
},
{
kwg_size
/
16
,
4
,
4
,
1
},
{
kwg_size
/
32
,
4
,
8
,
1
},
...
...
@@ -217,20 +216,20 @@ void TuningOrRun3DKernel(cl::Kernel &kernel,
};
};
cl
::
Event
event
;
auto
func
=
[
&
](
const
std
::
vector
<
uint32_t
>
&
params
,
Timer
*
timer
,
auto
func
=
[
&
](
const
std
::
vector
<
uint32_t
>
&
params
,
Timer
*
timer
,
std
::
vector
<
uint32_t
>
*
tuning_result
)
->
cl_int
{
MACE_CHECK
(
params
.
size
()
==
4
)
<<
"Tuning parameters of 3D kernel must be 4D"
;
MACE_CHECK
(
params
.
size
()
==
4
)
<<
"Tuning parameters of 3D kernel must be 4D"
;
cl_int
error
=
CL_SUCCESS
;
if
(
timer
==
nullptr
)
{
uint32_t
num_blocks
=
params
[
3
];
const
uint32_t
block_size
=
gws
[
2
]
/
num_blocks
;
if
(
gws
[
2
]
%
num_blocks
>
0
)
num_blocks
++
;
for
(
uint32_t
i
=
0
;
i
<
num_blocks
;
++
i
)
{
uint32_t
gws2
=
(
i
==
num_blocks
-
1
)
?
(
gws
[
2
]
-
(
i
*
block_size
))
:
block_size
;
uint32_t
gws2
=
(
i
==
num_blocks
-
1
)
?
(
gws
[
2
]
-
(
i
*
block_size
))
:
block_size
;
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
kernel
,
cl
::
NDRange
(
0
,
0
,
i
*
block_size
),
kernel
,
cl
::
NDRange
(
0
,
0
,
i
*
block_size
),
cl
::
NDRange
(
gws
[
0
],
gws
[
1
],
gws2
),
cl
::
NDRange
(
params
[
0
],
params
[
1
],
params
[
2
]),
nullptr
,
&
event
);
MACE_CHECK
(
error
==
CL_SUCCESS
)
<<
"Error code: "
<<
error
;
...
...
@@ -247,15 +246,16 @@ void TuningOrRun3DKernel(cl::Kernel &kernel,
if
(
LimitKernelTime
())
{
double
elapse_time
=
timer
->
AccumulatedMicros
();
timer
->
ClearTiming
();
uint32_t
num_blocks
=
std
::
min
(
static_cast
<
uint32_t
>
(
elapse_time
/
kMaxKernelExeTime
)
+
1
,
gws
[
2
]);
uint32_t
num_blocks
=
std
::
min
(
static_cast
<
uint32_t
>
(
elapse_time
/
kMaxKernelExeTime
)
+
1
,
gws
[
2
]);
(
*
tuning_result
)[
3
]
=
num_blocks
;
const
uint32_t
block_size
=
gws
[
2
]
/
num_blocks
;
if
(
gws
[
2
]
%
num_blocks
>
0
)
num_blocks
++
;
for
(
uint32_t
i
=
0
;
i
<
num_blocks
;
++
i
)
{
uint32_t
gws2
=
(
i
==
num_blocks
-
1
)
?
(
gws
[
2
]
-
(
i
*
block_size
))
:
block_size
;
uint32_t
gws2
=
(
i
==
num_blocks
-
1
)
?
(
gws
[
2
]
-
(
i
*
block_size
))
:
block_size
;
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
kernel
,
cl
::
NDRange
(
0
,
0
,
i
*
block_size
),
kernel
,
cl
::
NDRange
(
0
,
0
,
i
*
block_size
),
cl
::
NDRange
(
gws
[
0
],
gws
[
1
],
gws2
),
cl
::
NDRange
(
params
[
0
],
params
[
1
],
params
[
2
]),
nullptr
,
&
event
);
MACE_CHECK
(
error
==
CL_SUCCESS
)
<<
"Error code: "
<<
error
;
...
...
@@ -300,34 +300,30 @@ void TuningOrRun2DKernel(cl::Kernel &kernel,
{
kwg_size
/
256
,
256
,
1
},
{
kwg_size
/
512
,
512
,
1
},
{
kwg_size
,
1
,
1
},
{
1
,
kwg_size
,
1
}
};
{
1
,
kwg_size
,
1
}};
};
cl
::
Event
event
;
auto
func
=
[
&
](
const
std
::
vector
<
uint32_t
>
&
params
,
Timer
*
timer
,
auto
func
=
[
&
](
const
std
::
vector
<
uint32_t
>
&
params
,
Timer
*
timer
,
std
::
vector
<
uint32_t
>
*
tuning_result
)
->
cl_int
{
MACE_CHECK
(
params
.
size
()
==
3
)
<<
"Tuning parameters of 2D kernel must be 3d"
;
MACE_CHECK
(
params
.
size
()
==
3
)
<<
"Tuning parameters of 2D kernel must be 3d"
;
cl_int
error
=
CL_SUCCESS
;
if
(
timer
==
nullptr
)
{
uint32_t
num_blocks
=
params
[
2
];
const
uint32_t
block_size
=
gws
[
1
]
/
num_blocks
;
if
(
gws
[
1
]
%
num_blocks
>
0
)
num_blocks
++
;
for
(
uint32_t
i
=
0
;
i
<
num_blocks
;
++
i
)
{
uint32_t
gws1
=
(
i
==
num_blocks
-
1
)
?
(
gws
[
1
]
-
(
i
*
block_size
))
:
block_size
;
uint32_t
gws1
=
(
i
==
num_blocks
-
1
)
?
(
gws
[
1
]
-
(
i
*
block_size
))
:
block_size
;
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
kernel
,
cl
::
NDRange
(
0
,
i
*
block_size
),
cl
::
NDRange
(
gws
[
0
],
gws1
),
cl
::
NDRange
(
params
[
0
],
params
[
1
]),
nullptr
,
&
event
);
kernel
,
cl
::
NDRange
(
0
,
i
*
block_size
),
cl
::
NDRange
(
gws
[
0
],
gws1
),
cl
::
NDRange
(
params
[
0
],
params
[
1
]),
nullptr
,
&
event
);
MACE_CHECK
(
error
==
CL_SUCCESS
)
<<
"Error code: "
<<
error
;
}
}
else
{
timer
->
ClearTiming
();
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
gws
[
0
],
gws
[
1
]),
kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
gws
[
0
],
gws
[
1
]),
cl
::
NDRange
(
params
[
0
],
params
[
1
]),
nullptr
,
&
event
);
MACE_CHECK
(
error
==
CL_SUCCESS
)
<<
"Error code: "
<<
error
;
timer
->
AccumulateTiming
();
...
...
@@ -336,16 +332,16 @@ void TuningOrRun2DKernel(cl::Kernel &kernel,
if
(
LimitKernelTime
())
{
double
elapse_time
=
timer
->
AccumulatedMicros
();
timer
->
ClearTiming
();
uint32_t
num_blocks
=
std
::
min
(
static_cast
<
uint32_t
>
(
elapse_time
/
kMaxKernelExeTime
)
+
1
,
gws
[
1
]);
uint32_t
num_blocks
=
std
::
min
(
static_cast
<
uint32_t
>
(
elapse_time
/
kMaxKernelExeTime
)
+
1
,
gws
[
1
]);
(
*
tuning_result
)[
2
]
=
num_blocks
;
const
uint32_t
block_size
=
gws
[
1
]
/
num_blocks
;
if
(
gws
[
1
]
%
num_blocks
>
0
)
num_blocks
++
;
for
(
uint32_t
i
=
0
;
i
<
num_blocks
;
++
i
)
{
uint32_t
gws1
=
(
i
==
num_blocks
-
1
)
?
(
gws
[
1
]
-
(
i
*
block_size
))
:
block_size
;
uint32_t
gws1
=
(
i
==
num_blocks
-
1
)
?
(
gws
[
1
]
-
(
i
*
block_size
))
:
block_size
;
error
=
runtime
->
command_queue
().
enqueueNDRangeKernel
(
kernel
,
cl
::
NDRange
(
0
,
i
*
block_size
),
cl
::
NDRange
(
gws
[
0
],
gws1
),
kernel
,
cl
::
NDRange
(
0
,
i
*
block_size
),
cl
::
NDRange
(
gws
[
0
],
gws1
),
cl
::
NDRange
(
params
[
0
],
params
[
1
]),
nullptr
,
&
event
);
MACE_CHECK
(
error
==
CL_SUCCESS
)
<<
"Error code: "
<<
error
;
timer
->
AccumulateTiming
();
...
...
@@ -355,11 +351,8 @@ void TuningOrRun2DKernel(cl::Kernel &kernel,
return
error
;
};
OpenCLProfilingTimer
timer
(
&
event
);
Tuner
<
uint32_t
>::
Get
()
->
template
TuneOrRun
<
cl_int
>(
tuning_key
,
lws
,
params_generator
,
func
,
&
timer
);
Tuner
<
uint32_t
>::
Get
()
->
template
TuneOrRun
<
cl_int
>(
tuning_key
,
lws
,
params_generator
,
func
,
&
timer
);
if
(
future
!=
nullptr
)
{
future
->
wait_fn
=
[
runtime
,
event
](
CallStats
*
stats
)
{
event
.
wait
();
...
...
@@ -368,7 +361,6 @@ void TuningOrRun2DKernel(cl::Kernel &kernel,
}
};
}
}
}
// namespace kernels
...
...
mace/kernels/opencl/helper.h
浏览文件 @
6b9aafd4
...
...
@@ -5,16 +5,16 @@
#ifndef MACE_KERNELS_OPENCL_HELPER_H_
#define MACE_KERNELS_OPENCL_HELPER_H_
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/types.h"
#include "mace/utils/utils.h"
#include "mace/core/future.h"
namespace
mace
{
namespace
kernels
{
const
float
kMaxKernelExeTime
=
1000.0
;
// microseconds
const
float
kMaxKernelExeTime
=
1000.0
;
// microseconds
enum
BufferType
{
CONV2D_FILTER
=
0
,
...
...
@@ -31,7 +31,7 @@ void CalImage2DShape(const std::vector<index_t> &shape, /* NHWC */
const
BufferType
type
,
std
::
vector
<
size_t
>
&
image_shape
);
std
::
vector
<
index_t
>
CalWinogradShape
(
const
std
::
vector
<
index_t
>
&
shape
,
std
::
vector
<
index_t
>
CalWinogradShape
(
const
std
::
vector
<
index_t
>
&
shape
,
const
BufferType
type
);
std
::
string
DtToCLCMDDt
(
const
DataType
dt
);
...
...
@@ -48,7 +48,6 @@ void TuningOrRun3DKernel(cl::Kernel &kernel,
const
std
::
vector
<
uint32_t
>
&
lws
,
StatsFuture
*
future
);
void
TuningOrRun2DKernel
(
cl
::
Kernel
&
kernel
,
const
std
::
string
tuning_key
,
const
uint32_t
*
gws
,
...
...
@@ -72,12 +71,12 @@ inline bool LimitKernelTime() {
}
namespace
{
template
<
typename
T
>
template
<
typename
T
>
void
AppendToStream
(
std
::
stringstream
*
ss
,
const
std
::
string
&
delimiter
,
T
v
)
{
(
*
ss
)
<<
v
;
}
template
<
typename
T
,
typename
...
Args
>
template
<
typename
T
,
typename
...
Args
>
void
AppendToStream
(
std
::
stringstream
*
ss
,
const
std
::
string
&
delimiter
,
T
first
,
...
...
@@ -87,7 +86,7 @@ void AppendToStream(std::stringstream *ss,
}
}
// namespace
template
<
typename
...
Args
>
template
<
typename
...
Args
>
std
::
string
Concat
(
Args
...
args
)
{
std
::
stringstream
ss
;
AppendToStream
(
&
ss
,
"_"
,
args
...);
...
...
mace/kernels/opencl/matmul.cc
浏览文件 @
6b9aafd4
...
...
@@ -11,12 +11,10 @@ namespace mace {
namespace
kernels
{
template
<
typename
T
>
void
MatMulFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
A
,
const
Tensor
*
B
,
Tensor
*
C
,
StatsFuture
*
future
)
{
void
MatMulFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
A
,
const
Tensor
*
B
,
Tensor
*
C
,
StatsFuture
*
future
)
{
std
::
vector
<
index_t
>
c_shape
=
{
A
->
dim
(
0
),
A
->
dim
(
1
),
B
->
dim
(
2
),
1
};
std
::
vector
<
size_t
>
c_image_shape
;
CalImage2DShape
(
c_shape
,
BufferType
::
IN_OUT_HEIGHT
,
c_image_shape
);
...
...
@@ -41,8 +39,7 @@ void MatMulFunctor<DeviceType::OPENCL, T>::operator()(
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
A
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
B
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
B
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
C
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
height
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int
>
(
width
));
...
...
@@ -57,20 +54,14 @@ void MatMulFunctor<DeviceType::OPENCL, T>::operator()(
};
const
std
::
vector
<
uint32_t
>
lws
=
{
16
,
64
,
1
};
std
::
stringstream
ss
;
ss
<<
"matmul_opencl_kernel_"
<<
C
->
dim
(
0
)
<<
"_"
<<
C
->
dim
(
1
)
<<
"_"
<<
C
->
dim
(
2
)
<<
"_"
<<
C
->
dim
(
3
);
ss
<<
"matmul_opencl_kernel_"
<<
C
->
dim
(
0
)
<<
"_"
<<
C
->
dim
(
1
)
<<
"_"
<<
C
->
dim
(
2
)
<<
"_"
<<
C
->
dim
(
3
);
TuningOrRun2DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
};
template
struct
MatMulFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
MatMulFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
MatMulFunctor
<
DeviceType
::
OPENCL
,
half
>;
template
struct
MatMulFunctor
<
DeviceType
::
OPENCL
,
half
>;
}
// namespace kernels
}
// namespace mace
mace/kernels/opencl/pooling_opencl.cc
浏览文件 @
6b9aafd4
...
...
@@ -11,17 +11,15 @@
namespace
mace
{
namespace
kernels
{
template
<
typename
T
>
template
<
typename
T
>
void
PoolingFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_CHECK
(
dilations_
[
0
]
==
1
&&
dilations_
[
1
]
==
1
)
<<
"Pooling opencl kernel not support dilation yet"
;
<<
"Pooling opencl kernel not support dilation yet"
;
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
index_t
>
filter_shape
=
{
kernels_
[
0
],
kernels_
[
1
],
input
->
dim
(
3
),
input
->
dim
(
3
)
};
std
::
vector
<
index_t
>
filter_shape
=
{
kernels_
[
0
],
kernels_
[
1
],
input
->
dim
(
3
),
input
->
dim
(
3
)};
std
::
vector
<
int
>
paddings
(
2
);
if
(
paddings_
.
empty
())
{
...
...
@@ -77,24 +75,17 @@ void PoolingFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
out_width
),
static_cast
<
uint32_t
>
(
channel_blocks
),
static_cast
<
uint32_t
>
(
out_width
),
static_cast
<
uint32_t
>
(
batch
*
out_height
),
};
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
"pooling_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
<<
output
->
dim
(
1
)
<<
"_"
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
3
);
ss
<<
"pooling_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
<<
output
->
dim
(
1
)
<<
"_"
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
3
);
TuningOrRun3DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
}
template
struct
PoolingFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
PoolingFunctor
<
DeviceType
::
OPENCL
,
half
>;
template
struct
PoolingFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
PoolingFunctor
<
DeviceType
::
OPENCL
,
half
>;
}
// namespace kernels
}
// namespace mace
mace/kernels/opencl/resize_bilinear_opencl.cc
浏览文件 @
6b9aafd4
...
...
@@ -2,12 +2,12 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/kernels/resize_bilinear.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/tensor.h"
#include "mace/kernels/resize_bilinear.h"
#include "mace/kernels/opencl/helper.h"
#include "mace/utils/utils.h"
#include "mace/utils/tuner.h"
#include "mace/utils/utils.h"
namespace
mace
{
namespace
kernels
{
...
...
@@ -29,14 +29,14 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
std
::
vector
<
index_t
>
output_shape
{
batch
,
out_height
,
out_width
,
channels
};
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
float
height_scale
=
CalculateResizeScale
(
in_height
,
out_height
,
align_corners_
);
float
width_scale
=
CalculateResizeScale
(
in_width
,
out_width
,
align_corners_
);
float
width_scale
=
CalculateResizeScale
(
in_width
,
out_width
,
align_corners_
);
auto
runtime
=
OpenCLRuntime
::
Global
();
std
::
set
<
std
::
string
>
built_options
;
...
...
@@ -45,7 +45,8 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
kernel_
=
runtime
->
BuildKernel
(
"resize_bilinear"
,
kernel_name
,
built_options
);
kernel_
=
runtime
->
BuildKernel
(
"resize_bilinear"
,
kernel_name
,
built_options
);
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
...
...
@@ -62,11 +63,8 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
static_cast
<
uint32_t
>
(
out_height
*
batch
)};
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
"resize_bilinear_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
<<
output
->
dim
(
1
)
<<
"_"
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
3
);
ss
<<
"resize_bilinear_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
<<
output
->
dim
(
1
)
<<
"_"
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
3
);
TuningOrRun3DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
}
...
...
mace/kernels/opencl/softmax_opencl.cc
浏览文件 @
6b9aafd4
...
...
@@ -6,13 +6,13 @@
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/opencl/helper.h"
#include "mace/utils/utils.h"
#include "mace/utils/tuner.h"
#include "mace/utils/utils.h"
namespace
mace
{
namespace
kernels
{
template
<
typename
T
>
template
<
typename
T
>
void
SoftmaxFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
logits
,
Tensor
*
output
,
StatsFuture
*
future
)
{
...
...
@@ -45,17 +45,12 @@ void SoftmaxFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *logits,
static_cast
<
uint32_t
>
(
height
*
batch
)};
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
"softmax_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
<<
output
->
dim
(
1
)
<<
"_"
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
3
);
ss
<<
"softmax_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
<<
output
->
dim
(
1
)
<<
"_"
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
3
);
TuningOrRun3DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
}
template
struct
SoftmaxFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
SoftmaxFunctor
<
DeviceType
::
OPENCL
,
half
>;
template
struct
SoftmaxFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
SoftmaxFunctor
<
DeviceType
::
OPENCL
,
half
>;
}
// namespace kernels
}
// namespace mace
mace/kernels/opencl/space_to_batch_opencl.cc
浏览文件 @
6b9aafd4
...
...
@@ -5,20 +5,21 @@
#ifndef MACE_KERNELS_OPENCL_SPACE_TO_BATCH_H_
#define MACE_KERNELS_OPENCL_SPACE_TO_BATCH_H_
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/space_to_batch.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/opencl/helper.h"
#include "mace/utils/utils.h"
#include "mace/utils/tuner.h"
#include "mace/utils/utils.h"
namespace
mace
{
namespace
kernels
{
template
<
typename
T
>
void
SpaceToBatchFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
Tensor
*
space_tensor
,
const
std
::
vector
<
index_t
>
&
output_shape
,
Tensor
*
batch_tensor
,
StatsFuture
*
future
)
{
void
SpaceToBatchFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
Tensor
*
space_tensor
,
const
std
::
vector
<
index_t
>
&
output_shape
,
Tensor
*
batch_tensor
,
StatsFuture
*
future
)
{
const
char
*
kernel_name
=
nullptr
;
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
output_image_shape
);
...
...
@@ -37,8 +38,10 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(Tensor *space_tensor
kernel_name_ss
<<
"-D"
<<
kernel_name
<<
"="
<<
obfuscated_kernel_name
;
built_options
.
emplace
(
kernel_name_ss
.
str
());
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToCLDt
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToCLCMDDt
(
DataTypeToEnum
<
T
>::
value
));
kernel_
=
runtime
->
BuildKernel
(
"space_to_batch"
,
kernel_name
,
built_options
);
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToCLCMDDt
(
DataTypeToEnum
<
T
>::
value
));
kernel_
=
runtime
->
BuildKernel
(
"space_to_batch"
,
kernel_name
,
built_options
);
uint32_t
idx
=
0
;
if
(
b2s_
)
{
...
...
@@ -59,15 +62,13 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(Tensor *space_tensor
}
const
uint32_t
chan_blk
=
RoundUpDiv4
<
uint32_t
>
(
batch_tensor
->
dim
(
3
));
const
uint32_t
gws
[
3
]
=
{
chan_blk
,
static_cast
<
uint32_t
>
(
batch_tensor
->
dim
(
2
)),
static_cast
<
uint32_t
>
(
batch_tensor
->
dim
(
0
)
*
batch_tensor
->
dim
(
1
))};
const
uint32_t
gws
[
3
]
=
{
chan_blk
,
static_cast
<
uint32_t
>
(
batch_tensor
->
dim
(
2
)),
static_cast
<
uint32_t
>
(
batch_tensor
->
dim
(
0
)
*
batch_tensor
->
dim
(
1
))};
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
kernel_name
<<
"_"
<<
batch_tensor
->
dim
(
0
)
<<
"_"
<<
batch_tensor
->
dim
(
1
)
<<
"_"
<<
batch_tensor
->
dim
(
2
)
<<
"_"
ss
<<
kernel_name
<<
"_"
<<
batch_tensor
->
dim
(
0
)
<<
"_"
<<
batch_tensor
->
dim
(
1
)
<<
"_"
<<
batch_tensor
->
dim
(
2
)
<<
"_"
<<
batch_tensor
->
dim
(
3
);
TuningOrRun3DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
}
...
...
mace/kernels/opencl/winograd_transform.cc
浏览文件 @
6b9aafd4
...
...
@@ -11,21 +11,21 @@
namespace
mace
{
namespace
kernels
{
template
<
typename
T
>
void
WinogradTransformFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input_tensor
,
Tensor
*
output_tensor
,
StatsFuture
*
future
)
{
template
<
typename
T
>
void
WinogradTransformFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input_tensor
,
Tensor
*
output_tensor
,
StatsFuture
*
future
)
{
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
index_t
>
filter_shape
=
{
3
,
3
,
input_tensor
->
dim
(
3
),
1
};
std
::
vector
<
int
>
paddings
(
2
);
if
(
paddings_
.
empty
())
{
kernels
::
CalcNHWCPaddingAndOutputSize
(
input_tensor
->
shape
().
data
(),
filter_shape
.
data
(),
dilations_
.
data
(),
strides_
.
data
(),
padding_type_
,
output_shape
.
data
(),
paddings
.
data
());
input_tensor
->
shape
().
data
(),
filter_shape
.
data
(),
dilations_
.
data
(),
strides_
.
data
(),
padding_type_
,
output_shape
.
data
(),
paddings
.
data
());
}
else
{
paddings
=
paddings_
;
CalcOutputSize
(
input_tensor
->
shape
().
data
(),
filter_shape
.
data
(),
paddings_
.
data
(),
dilations_
.
data
(),
strides_
.
data
(),
RoundType
::
FLOOR
,
output_shape
.
data
());
CalcOutputSize
(
input_tensor
->
shape
().
data
(),
filter_shape
.
data
(),
paddings_
.
data
(),
dilations_
.
data
(),
strides_
.
data
(),
RoundType
::
FLOOR
,
output_shape
.
data
());
}
const
index_t
round_h
=
(
output_shape
[
1
]
+
1
)
/
2
;
...
...
@@ -38,14 +38,16 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *i
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_HEIGHT
,
image_shape
);
output_tensor
->
ResizeImage
(
output_shape
,
image_shape
);
std
::
string
obfuscated_kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"winograd_transform_2x2"
);
std
::
string
obfuscated_kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"winograd_transform_2x2"
);
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-Dwinograd_transform_2x2="
+
obfuscated_kernel_name
);
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
DataTypeToEnum
<
T
>::
value
));
auto
runtime
=
OpenCLRuntime
::
Global
();
kernel_
=
runtime
->
BuildKernel
(
"winograd_transform"
,
obfuscated_kernel_name
,
kernel_
=
runtime
->
BuildKernel
(
"winograd_transform"
,
obfuscated_kernel_name
,
built_options
);
uint32_t
idx
=
0
;
...
...
@@ -60,34 +62,39 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *i
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
paddings
[
1
]
/
2
));
}
const
uint32_t
gws
[
2
]
=
{
static_cast
<
uint32_t
>
(
out_width
),
static_cast
<
uint32_t
>
(
RoundUpDiv4
(
input_tensor
->
dim
(
3
)))};
const
uint32_t
gws
[
2
]
=
{
static_cast
<
uint32_t
>
(
out_width
),
static_cast
<
uint32_t
>
(
RoundUpDiv4
(
input_tensor
->
dim
(
3
)))};
const
std
::
vector
<
uint32_t
>
lws
=
{
128
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
"winograd_transform_kernel_"
<<
input_tensor
->
dim
(
0
)
<<
"_"
<<
input_tensor
->
dim
(
1
)
<<
"_"
<<
input_tensor
->
dim
(
2
)
<<
"_"
ss
<<
"winograd_transform_kernel_"
<<
input_tensor
->
dim
(
0
)
<<
"_"
<<
input_tensor
->
dim
(
1
)
<<
"_"
<<
input_tensor
->
dim
(
2
)
<<
"_"
<<
input_tensor
->
dim
(
3
);
TuningOrRun2DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
}
template
<
typename
T
>
void
WinogradInverseTransformFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input_tensor
,
const
Tensor
*
bias
,
Tensor
*
output_tensor
,
StatsFuture
*
future
)
{
std
::
vector
<
index_t
>
output_shape
=
{
batch_
,
height_
,
width_
,
input_tensor
->
dim
(
1
)};
template
<
typename
T
>
void
WinogradInverseTransformFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input_tensor
,
const
Tensor
*
bias
,
Tensor
*
output_tensor
,
StatsFuture
*
future
)
{
std
::
vector
<
index_t
>
output_shape
=
{
batch_
,
height_
,
width_
,
input_tensor
->
dim
(
1
)};
std
::
vector
<
size_t
>
image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
image_shape
);
output_tensor
->
ResizeImage
(
output_shape
,
image_shape
);
if
(
kernel_
.
get
()
==
nullptr
)
{
std
::
string
obfuscated_kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"winograd_inverse_transform_2x2"
);
std
::
string
obfuscated_kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"winograd_inverse_transform_2x2"
);
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-Dwinograd_inverse_transform_2x2="
+
obfuscated_kernel_name
);
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-Dwinograd_inverse_transform_2x2="
+
obfuscated_kernel_name
);
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
bias
!=
nullptr
?
"-DBIAS"
:
""
);
switch
(
activation_
)
{
case
NOOP
:
...
...
@@ -112,18 +119,21 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(const Te
}
auto
runtime
=
OpenCLRuntime
::
Global
();
kernel_
=
runtime
->
BuildKernel
(
"winograd_transform"
,
obfuscated_kernel_name
,
kernel_
=
runtime
->
BuildKernel
(
"winograd_transform"
,
obfuscated_kernel_name
,
built_options
);
const
uint32_t
round_h
=
(
height_
+
1
)
/
2
;
const
uint32_t
round_w
=
(
width_
+
1
)
/
2
;
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input_tensor
->
opencl_image
())));
kernel_
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input_tensor
->
opencl_image
())));
if
(
bias
!=
nullptr
)
{
kernel_
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
bias
->
opencl_image
())));
kernel_
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
bias
->
opencl_image
())));
}
kernel_
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
output_tensor
->
opencl_image
())));
kernel_
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
output_tensor
->
opencl_image
())));
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
output_shape
[
1
]));
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
output_shape
[
2
]));
kernel_
.
setArg
(
idx
++
,
static_cast
<
uint32_t
>
(
round_h
*
round_w
));
...
...
@@ -131,28 +141,23 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(const Te
kernel_
.
setArg
(
idx
++
,
relux_max_limit_
);
}
const
uint32_t
gws
[
2
]
=
{
static_cast
<
uint32_t
>
(
input_tensor
->
dim
(
2
)),
static_cast
<
uint32_t
>
(
RoundUpDiv4
(
input_tensor
->
dim
(
1
)))};
const
uint32_t
gws
[
2
]
=
{
static_cast
<
uint32_t
>
(
input_tensor
->
dim
(
2
)),
static_cast
<
uint32_t
>
(
RoundUpDiv4
(
input_tensor
->
dim
(
1
)))};
const
std
::
vector
<
uint32_t
>
lws
=
{
128
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
"winograd_inverse_transform_kernel_"
<<
input_tensor
->
dim
(
0
)
<<
"_"
<<
input_tensor
->
dim
(
1
)
<<
"_"
<<
input_tensor
->
dim
(
2
)
<<
"_"
ss
<<
"winograd_inverse_transform_kernel_"
<<
input_tensor
->
dim
(
0
)
<<
"_"
<<
input_tensor
->
dim
(
1
)
<<
"_"
<<
input_tensor
->
dim
(
2
)
<<
"_"
<<
input_tensor
->
dim
(
3
);
TuningOrRun2DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
}
template
struct
WinogradTransformFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
WinogradTransformFunctor
<
DeviceType
::
OPENCL
,
half
>;
template
struct
WinogradTransformFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
WinogradTransformFunctor
<
DeviceType
::
OPENCL
,
half
>;
template
struct
WinogradInverseTransformFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
WinogradInverseTransformFunctor
<
DeviceType
::
OPENCL
,
half
>;
template
struct
WinogradInverseTransformFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
WinogradInverseTransformFunctor
<
DeviceType
::
OPENCL
,
half
>;
}
// namespace kernels
}
// namespace mace
mace/kernels/pooling.h
浏览文件 @
6b9aafd4
...
...
@@ -7,9 +7,9 @@
#include <limits>
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/kernels/conv_pool_2d_util.h"
#include "mace/core/runtime/opencl/cl2_header.h"
namespace
mace
{
...
...
@@ -42,7 +42,7 @@ struct PoolingFunctorBase {
const
int
*
dilations_
;
};
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
PoolingFunctor
:
PoolingFunctorBase
{
PoolingFunctor
(
const
PoolingType
pooling_type
,
const
int
*
kernels
,
...
...
@@ -50,29 +50,27 @@ struct PoolingFunctor : PoolingFunctorBase {
const
Padding
padding_type
,
const
std
::
vector
<
int
>
&
paddings
,
const
int
*
dilations
)
:
PoolingFunctorBase
(
pooling_type
,
kernels
,
strides
,
padding_type
,
paddings
,
dilations
)
{
}
:
PoolingFunctorBase
(
pooling_type
,
kernels
,
strides
,
padding_type
,
paddings
,
dilations
)
{
}
void
operator
()(
const
Tensor
*
input_tensor
,
Tensor
*
output_tensor
,
StatsFuture
*
future
)
{
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
index_t
>
filter_shape
=
{
kernels_
[
0
],
kernels_
[
1
],
input_tensor
->
dim
(
3
),
input_tensor
->
dim
(
3
)
};
kernels_
[
0
],
kernels_
[
1
],
input_tensor
->
dim
(
3
),
input_tensor
->
dim
(
3
)};
std
::
vector
<
int
>
paddings
(
2
);
if
(
paddings_
.
empty
())
{
kernels
::
CalcNHWCPaddingAndOutputSize
(
input_tensor
->
shape
().
data
(),
filter_shape
.
data
(),
dilations_
,
strides_
,
padding_type_
,
output_shape
.
data
(),
paddings
.
data
());
input_tensor
->
shape
().
data
(),
filter_shape
.
data
(),
dilations_
,
strides_
,
padding_type_
,
output_shape
.
data
(),
paddings
.
data
());
}
else
{
paddings
=
paddings_
;
CalcOutputSize
(
input_tensor
->
shape
().
data
(),
filter_shape
.
data
(),
paddings_
.
data
(),
dilations_
,
strides_
,
RoundType
::
CEIL
,
output_shape
.
data
());
CalcOutputSize
(
input_tensor
->
shape
().
data
(),
filter_shape
.
data
(),
paddings_
.
data
(),
dilations_
,
strides_
,
RoundType
::
CEIL
,
output_shape
.
data
());
}
output_tensor
->
Resize
(
output_shape
);
...
...
@@ -110,7 +108,8 @@ struct PoolingFunctor : PoolingFunctorBase {
for
(
int
h
=
0
;
h
<
height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width
;
++
w
)
{
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
index_t
out_offset
=
(((
b
*
height
)
+
h
)
*
width
+
w
)
*
channels
+
c
;
index_t
out_offset
=
(((
b
*
height
)
+
h
)
*
width
+
w
)
*
channels
+
c
;
index_t
in_offset
=
b
*
in_image_size
*
input_channels
+
c
;
T
res
=
std
::
numeric_limits
<
T
>::
lowest
();
for
(
int
kh
=
0
;
kh
<
kernel_h
;
++
kh
)
{
...
...
@@ -119,7 +118,8 @@ struct PoolingFunctor : PoolingFunctorBase {
int
inw
=
padded_w_start
+
w
*
stride_w
+
dilation_w
*
kw
;
if
(
inh
>=
0
&&
inh
<
input_height
&&
inw
>=
0
&&
inw
<
input_width
)
{
index_t
input_offset
=
in_offset
+
(
inh
*
input_width
+
inw
)
*
input_channels
;
index_t
input_offset
=
in_offset
+
(
inh
*
input_width
+
inw
)
*
input_channels
;
res
=
std
::
max
(
res
,
input
[
input_offset
]);
}
}
...
...
@@ -135,7 +135,8 @@ struct PoolingFunctor : PoolingFunctorBase {
for
(
int
h
=
0
;
h
<
height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width
;
++
w
)
{
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
index_t
out_offset
=
(((
b
*
height
)
+
h
)
*
width
+
w
)
*
channels
+
c
;
index_t
out_offset
=
(((
b
*
height
)
+
h
)
*
width
+
w
)
*
channels
+
c
;
index_t
in_offset
=
b
*
in_image_size
*
input_channels
+
c
;
T
sum
=
0
;
int
block_size
=
0
;
...
...
@@ -145,7 +146,8 @@ struct PoolingFunctor : PoolingFunctorBase {
int
inw
=
padded_w_start
+
w
*
stride_w
+
dilation_w
*
kw
;
if
(
inh
>=
0
&&
inh
<
input_height
&&
inw
>=
0
&&
inw
<
input_width
)
{
index_t
input_offset
=
in_offset
+
(
inh
*
input_width
+
inw
)
*
input_channels
;
index_t
input_offset
=
in_offset
+
(
inh
*
input_width
+
inw
)
*
input_channels
;
sum
+=
input
[
input_offset
];
block_size
+=
1
;
}
...
...
@@ -158,16 +160,13 @@ struct PoolingFunctor : PoolingFunctorBase {
}
}
}
};
template
<
>
template
<
>
void
PoolingFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
Tensor
*
input_tensor
,
Tensor
*
output_tensor
,
StatsFuture
*
future
);
const
Tensor
*
input_tensor
,
Tensor
*
output_tensor
,
StatsFuture
*
future
);
template
<
typename
T
>
template
<
typename
T
>
struct
PoolingFunctor
<
DeviceType
::
OPENCL
,
T
>
:
PoolingFunctorBase
{
PoolingFunctor
(
const
PoolingType
pooling_type
,
const
int
*
kernels
,
...
...
@@ -175,9 +174,9 @@ struct PoolingFunctor<DeviceType::OPENCL, T> : PoolingFunctorBase {
const
Padding
padding_type
,
const
std
::
vector
<
int
>
&
paddings
,
const
int
*
dilations
)
:
PoolingFunctorBase
(
pooling_type
,
kernels
,
strides
,
padding_type
,
paddings
,
dilations
)
{
}
:
PoolingFunctorBase
(
pooling_type
,
kernels
,
strides
,
padding_type
,
paddings
,
dilations
)
{
}
void
operator
()(
const
Tensor
*
input_tensor
,
Tensor
*
output_tensor
,
StatsFuture
*
future
);
...
...
mace/kernels/reshape.h
浏览文件 @
6b9aafd4
...
...
@@ -5,8 +5,8 @@
#define MACE_KERNELS_RESHAPE_H_
#include "mace/core/future.h"
#include "mace/core/tensor.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
namespace
mace
{
namespace
kernels
{
...
...
@@ -25,7 +25,6 @@ struct ReshapeFunctor {
}
};
}
// namespace kernels
}
// namespace mace
...
...
mace/kernels/resize_bilinear.h
浏览文件 @
6b9aafd4
...
...
@@ -5,8 +5,8 @@
#define MACE_KERNELS_RESIZE_BILINEAR_H_
#include "mace/core/future.h"
#include "mace/core/tensor.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
namespace
mace
{
namespace
kernels
{
...
...
@@ -74,9 +74,9 @@ void ResizeImage(const T *images,
const
T
*
batch_input_ptr
=
images
+
in_batch_num_values
*
b
;
T
*
batch_output_ptr
=
output
+
out_batch_num_values
*
b
;
const
T
*
y_lower_input_ptr
=
batch_input_ptr
+
ys
[
y
].
lower
*
in_width
*
channels
;
batch_input_ptr
+
ys
[
y
].
lower
*
in_width
*
channels
;
const
T
*
y_upper_input_ptr
=
batch_input_ptr
+
ys
[
y
].
upper
*
in_width
*
channels
;
batch_input_ptr
+
ys
[
y
].
upper
*
in_width
*
channels
;
T
*
y_output_ptr
=
batch_output_ptr
+
y
*
out_width
*
channels
;
const
float
ys_lerp
=
ys
[
y
].
lerp
;
...
...
@@ -95,7 +95,7 @@ void ResizeImage(const T *images,
const
T
bottom_right
=
bottom_right_ptr
[
c
];
output_ptr
[
c
]
=
ComputeLerp
(
top_left
,
top_right
,
bottom_left
,
bottom_right
,
xs_lerp
,
ys_lerp
);
bottom_right
,
xs_lerp
,
ys_lerp
);
}
}
}
...
...
@@ -107,10 +107,10 @@ struct ResizeBilinearFunctorBase {
ResizeBilinearFunctorBase
(
const
std
::
vector
<
index_t
>
&
size
,
bool
align_corners
)
:
align_corners_
(
align_corners
)
{
MACE_CHECK
(
size
.
size
()
==
2
);
out_height_
=
size
[
0
];
out_width_
=
size
[
1
];
}
MACE_CHECK
(
size
.
size
()
==
2
);
out_height_
=
size
[
0
];
out_width_
=
size
[
1
];
}
protected:
bool
align_corners_
;
...
...
@@ -163,8 +163,9 @@ struct ResizeBilinearFunctor : ResizeBilinearFunctorBase {
}
};
template
<
typename
T
>
struct
ResizeBilinearFunctor
<
DeviceType
::
OPENCL
,
T
>
:
ResizeBilinearFunctorBase
{
template
<
typename
T
>
struct
ResizeBilinearFunctor
<
DeviceType
::
OPENCL
,
T
>
:
ResizeBilinearFunctorBase
{
ResizeBilinearFunctor
(
const
std
::
vector
<
index_t
>
&
size
,
bool
align_corners
)
:
ResizeBilinearFunctorBase
(
size
,
align_corners
)
{}
...
...
mace/kernels/space_to_batch.h
浏览文件 @
6b9aafd4
...
...
@@ -6,9 +6,9 @@
#define MACE_KERNELS_CONV_2D_H_
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/public/mace.h"
#include "mace/core/runtime/opencl/cl2_header.h"
namespace
mace
{
namespace
kernels
{
...
...
@@ -16,11 +16,10 @@ namespace kernels {
struct
SpaceToBatchFunctorBase
{
SpaceToBatchFunctorBase
(
const
std
::
vector
<
int
>
&
paddings
,
const
std
::
vector
<
int
>
&
block_shape
,
bool
b2s
)
:
paddings_
(
paddings
.
begin
(),
paddings
.
end
()),
block_shape_
(
block_shape
.
begin
(),
block_shape
.
end
()),
b2s_
(
b2s
)
{}
bool
b2s
)
:
paddings_
(
paddings
.
begin
(),
paddings
.
end
()),
block_shape_
(
block_shape
.
begin
(),
block_shape
.
end
()),
b2s_
(
b2s
)
{}
std
::
vector
<
int
>
paddings_
;
std
::
vector
<
int
>
block_shape_
;
...
...
@@ -28,10 +27,11 @@ struct SpaceToBatchFunctorBase {
};
template
<
DeviceType
D
,
typename
T
>
struct
SpaceToBatchFunctor
:
SpaceToBatchFunctorBase
{
struct
SpaceToBatchFunctor
:
SpaceToBatchFunctorBase
{
SpaceToBatchFunctor
(
const
std
::
vector
<
int
>
&
paddings
,
const
std
::
vector
<
int
>
&
block_shape
,
bool
b2s
)
:
SpaceToBatchFunctorBase
(
paddings
,
block_shape
,
b2s
){}
bool
b2s
)
:
SpaceToBatchFunctorBase
(
paddings
,
block_shape
,
b2s
)
{}
void
operator
()(
Tensor
*
space_tensor
,
const
std
::
vector
<
index_t
>
&
output_shape
,
...
...
@@ -42,10 +42,11 @@ struct SpaceToBatchFunctor : SpaceToBatchFunctorBase{
};
template
<
typename
T
>
struct
SpaceToBatchFunctor
<
DeviceType
::
OPENCL
,
T
>
:
SpaceToBatchFunctorBase
{
struct
SpaceToBatchFunctor
<
DeviceType
::
OPENCL
,
T
>
:
SpaceToBatchFunctorBase
{
SpaceToBatchFunctor
(
const
std
::
vector
<
int
>
&
paddings
,
const
std
::
vector
<
int
>
&
block_shape
,
bool
b2s
)
:
SpaceToBatchFunctorBase
(
paddings
,
block_shape
,
b2s
){}
bool
b2s
)
:
SpaceToBatchFunctorBase
(
paddings
,
block_shape
,
b2s
)
{}
void
operator
()(
Tensor
*
space_tensor
,
const
std
::
vector
<
index_t
>
&
output_shape
,
...
...
@@ -53,7 +54,6 @@ struct SpaceToBatchFunctor<DeviceType::OPENCL, T>: SpaceToBatchFunctorBase{
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
};
}
// namespace kernels
...
...
mace/kernels/winograd_transform.h
浏览文件 @
6b9aafd4
...
...
@@ -6,10 +6,10 @@
#define MACE_KERNELS_WINOGRAD_TRANSFORM_H_
#include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/tensor.h"
#include "mace/kernels/conv_pool_2d_util.h"
#include "mace/kernels/activation.h"
#include "mace/
core/runtime/opencl/cl2_header
.h"
#include "mace/
kernels/conv_pool_2d_util
.h"
namespace
mace
{
namespace
kernels
{
...
...
@@ -17,38 +17,36 @@ namespace kernels {
struct
WinogradTransformFunctorBase
{
WinogradTransformFunctorBase
(
const
Padding
&
padding_type
,
const
std
::
vector
<
int
>
&
paddings
)
:
strides_
({
1
,
1
}),
dilations_
({
1
,
1
}),
padding_type_
(
padding_type
),
paddings_
(
paddings
)
{}
:
strides_
({
1
,
1
}),
dilations_
({
1
,
1
}),
padding_type_
(
padding_type
),
paddings_
(
paddings
)
{}
const
std
::
vector
<
int
>
strides_
;
// [stride_h, stride_w]
const
std
::
vector
<
int
>
dilations_
;
// [dilation_h, dilation_w]
const
std
::
vector
<
int
>
strides_
;
// [stride_h, stride_w]
const
std
::
vector
<
int
>
dilations_
;
// [dilation_h, dilation_w]
Padding
padding_type_
;
std
::
vector
<
int
>
paddings_
;
};
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
WinogradTransformFunctor
:
WinogradTransformFunctorBase
{
WinogradTransformFunctor
(
const
Padding
&
padding_type
,
const
std
::
vector
<
int
>
&
paddings
)
:
WinogradTransformFunctorBase
(
padding_type
,
paddings
)
{}
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_NOT_IMPLEMENTED
;
}
};
template
<
typename
T
>
struct
WinogradTransformFunctor
<
DeviceType
::
OPENCL
,
T
>
:
WinogradTransformFunctorBase
{
template
<
typename
T
>
struct
WinogradTransformFunctor
<
DeviceType
::
OPENCL
,
T
>
:
WinogradTransformFunctorBase
{
WinogradTransformFunctor
(
const
Padding
&
padding_type
,
const
std
::
vector
<
int
>
&
paddings
)
:
WinogradTransformFunctorBase
(
padding_type
,
paddings
)
{}
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
);
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
};
...
...
@@ -72,14 +70,15 @@ struct WinogradInverseTransformFunctorBase {
const
float
relux_max_limit_
;
};
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
WinogradInverseTransformFunctor
:
WinogradInverseTransformFunctorBase
{
WinogradInverseTransformFunctor
(
const
int
batch
,
const
int
height
,
const
int
width
,
const
ActivationType
activation
,
const
float
relux_max_limit
)
:
WinogradInverseTransformFunctorBase
(
batch
,
height
,
width
,
activation
,
relux_max_limit
)
{}
:
WinogradInverseTransformFunctorBase
(
batch
,
height
,
width
,
activation
,
relux_max_limit
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
bias
,
...
...
@@ -87,17 +86,18 @@ struct WinogradInverseTransformFunctor : WinogradInverseTransformFunctorBase {
StatsFuture
*
future
)
{
MACE_NOT_IMPLEMENTED
;
}
};
template
<
typename
T
>
struct
WinogradInverseTransformFunctor
<
DeviceType
::
OPENCL
,
T
>
:
WinogradInverseTransformFunctorBase
{
template
<
typename
T
>
struct
WinogradInverseTransformFunctor
<
DeviceType
::
OPENCL
,
T
>
:
WinogradInverseTransformFunctorBase
{
WinogradInverseTransformFunctor
(
const
int
batch
,
const
int
height
,
const
int
width
,
const
ActivationType
activation
,
const
float
relux_max_limit
)
:
WinogradInverseTransformFunctorBase
(
batch
,
height
,
width
,
activation
,
relux_max_limit
)
{}
:
WinogradInverseTransformFunctorBase
(
batch
,
height
,
width
,
activation
,
relux_max_limit
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
bias
,
...
...
mace/ops/activation.h
浏览文件 @
6b9aafd4
...
...
@@ -22,7 +22,8 @@ class ActivationOp : public Operator<D, T> {
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
input_tensor
=
this
->
Input
(
0
);
const
Tensor
*
alpha_tensor
=
this
->
InputSize
()
>=
2
?
this
->
Input
(
1
)
:
nullptr
;
const
Tensor
*
alpha_tensor
=
this
->
InputSize
()
>=
2
?
this
->
Input
(
1
)
:
nullptr
;
Tensor
*
output_tensor
=
this
->
outputs_
[
0
];
output_tensor
->
ResizeLike
(
input_tensor
);
...
...
mace/ops/activation_test.cc
浏览文件 @
6b9aafd4
...
...
@@ -214,9 +214,7 @@ void TestSimplePrelu() {
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
2
,
2
,
2
,
2
},
{
-
7
,
7
,
-
6
,
6
,
-
5
,
-
5
,
-
4
,
-
4
,
-
3
,
3
,
-
2
,
2
,
-
1
,
-
1
,
0
,
0
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Alpha"
,
{
2
},
{
2.0
,
3.0
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Alpha"
,
{
2
},
{
2.0
,
3.0
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
...
...
@@ -250,7 +248,8 @@ void TestSimplePrelu() {
}
auto
expected
=
CreateTensor
<
float
>
(
{
2
,
2
,
2
,
2
},
{
-
14
,
7
,
-
12
,
6
,
-
10
,
-
15
,
-
8
,
-
12
,
-
6
,
3
,
-
4
,
2
,
-
2
,
-
3
,
0
,
0
});
{
2
,
2
,
2
,
2
},
{
-
14
,
7
,
-
12
,
6
,
-
10
,
-
15
,
-
8
,
-
12
,
-
6
,
3
,
-
4
,
2
,
-
2
,
-
3
,
0
,
0
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
...
...
mace/ops/addn.h
浏览文件 @
6b9aafd4
...
...
@@ -26,12 +26,10 @@ class AddNOp : public Operator<D, T> {
for
(
int
i
=
1
;
i
<
n
;
++
i
)
{
inputs
[
i
]
=
this
->
Input
(
i
);
MACE_CHECK
(
inputs
[
0
]
->
dim_size
()
==
inputs
[
i
]
->
dim_size
());
MACE_CHECK
(
inputs
[
0
]
->
size
()
==
inputs
[
i
]
->
size
())
<<
"Input 0: "
<<
MakeString
(
inputs
[
0
]
->
shape
())
<<
", size: "
<<
inputs
[
0
]
->
size
()
<<
". Input "
<<
i
<<
": "
<<
MakeString
(
inputs
[
i
]
->
shape
())
<<
", size: "
<<
inputs
[
i
]
->
size
();
MACE_CHECK
(
inputs
[
0
]
->
size
()
==
inputs
[
i
]
->
size
())
<<
"Input 0: "
<<
MakeString
(
inputs
[
0
]
->
shape
())
<<
", size: "
<<
inputs
[
0
]
->
size
()
<<
". Input "
<<
i
<<
": "
<<
MakeString
(
inputs
[
i
]
->
shape
())
<<
", size: "
<<
inputs
[
i
]
->
size
();
}
functor_
(
inputs
,
output_tensor
,
future
);
...
...
mace/ops/addn_benchmark.cc
浏览文件 @
6b9aafd4
...
...
@@ -15,8 +15,7 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
OpsTestNet
net
;
// Add input data
for
(
int
i
=
0
;
i
<
inputs
;
++
i
)
{
net
.
AddRandomInput
<
D
,
float
>
(
MakeString
(
"Input"
,
i
).
c_str
(),
{
n
,
h
,
w
,
c
});
net
.
AddRandomInput
<
D
,
float
>
(
MakeString
(
"Input"
,
i
).
c_str
(),
{
n
,
h
,
w
,
c
});
}
if
(
D
==
DeviceType
::
OPENCL
)
{
...
...
mace/ops/batch_norm_benchmark.cc
浏览文件 @
6b9aafd4
...
...
@@ -76,7 +76,7 @@ static void BatchNorm(
static void BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::MaccProcessed(tot); \
mace::testing::MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BatchNorm<DEVICE, TYPE>(iters, N, C, H, W); \
} \
...
...
mace/ops/batch_to_space.h
浏览文件 @
6b9aafd4
...
...
@@ -12,15 +12,14 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
class
BatchToSpaceNDOp
:
public
Operator
<
D
,
T
>
{
public:
BatchToSpaceNDOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
op_def
,
ws
),
functor_
(
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"crops"
,
{
0
,
0
,
0
,
0
}),
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"block_shape"
,
{
1
,
1
}),
true
)
{}
functor_
(
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"crops"
,
{
0
,
0
,
0
,
0
}),
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"block_shape"
,
{
1
,
1
}),
true
)
{}
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
batch_tensor
=
this
->
Input
(
INPUT
);
...
...
@@ -28,7 +27,8 @@ class BatchToSpaceNDOp : public Operator<D, T> {
std
::
vector
<
index_t
>
output_shape
(
4
,
0
);
CalculateOutputShape
(
batch_tensor
,
space_tensor
,
output_shape
.
data
());
functor_
(
space_tensor
,
output_shape
,
const_cast
<
Tensor
*>
(
batch_tensor
),
future
);
functor_
(
space_tensor
,
output_shape
,
const_cast
<
Tensor
*>
(
batch_tensor
),
future
);
return
true
;
}
...
...
@@ -37,7 +37,8 @@ class BatchToSpaceNDOp : public Operator<D, T> {
Tensor
*
output
,
index_t
*
output_shape
)
{
auto
crops
=
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"crops"
,
{
0
,
0
,
0
,
0
});
auto
block_shape
=
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"block_shape"
,
{
1
,
1
});
auto
block_shape
=
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"block_shape"
,
{
1
,
1
});
MACE_CHECK
(
input_tensor
->
dim_size
()
==
4
,
"Input's shape should be 4D"
);
MACE_CHECK
(
block_shape
.
size
()
==
2
,
"Block's shape should be 1D"
);
MACE_CHECK
(
crops
.
size
()
==
4
,
"Crops' shape should be 2D"
);
...
...
@@ -45,13 +46,13 @@ class BatchToSpaceNDOp : public Operator<D, T> {
const
index_t
block_dims
=
block_shape
.
size
();
index_t
block_shape_product
=
1
;
for
(
uint32_t
block_dim
=
0
;
block_dim
<
block_dims
;
++
block_dim
)
{
MACE_CHECK
(
block_shape
[
block_dim
]
>
1
,
"block_shape's value should be great to 1"
);
MACE_CHECK
(
block_shape
[
block_dim
]
>
1
,
"block_shape's value should be great to 1"
);
const
index_t
block_shape_value
=
block_shape
[
block_dim
];
const
index_t
cropped_input_size
=
input_tensor
->
dim
(
block_dim
+
1
)
*
block_shape_value
-
crops
[
block_dim
*
2
]
-
crops
[
block_dim
*
2
+
1
];
MACE_CHECK
(
cropped_input_size
>=
0
,
"cropped size must be non-negative"
);
const
index_t
cropped_input_size
=
input_tensor
->
dim
(
block_dim
+
1
)
*
block_shape_value
-
crops
[
block_dim
*
2
]
-
crops
[
block_dim
*
2
+
1
];
MACE_CHECK
(
cropped_input_size
>=
0
,
"cropped size must be non-negative"
);
block_shape_product
*=
block_shape_value
;
output_shape
[
block_dim
+
1
]
=
cropped_input_size
;
}
...
...
mace/ops/batch_to_space_benchmark.cc
浏览文件 @
6b9aafd4
...
...
@@ -41,7 +41,7 @@ static void BMBatchToSpace(
BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::MaccProcessed(tot); \
mace::testing::MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMBatchToSpace<DEVICE, TYPE>(iters, N, C, H, W, ARG); \
} \
...
...
mace/ops/bias_add_benchmark.cc
浏览文件 @
6b9aafd4
...
...
@@ -53,7 +53,7 @@ static void BiasAdd(int iters, int batch, int channels, int height, int width) {
static void BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::MaccProcessed(tot); \
mace::testing::MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BiasAdd<DEVICE, TYPE>(iters, N, C, H, W); \
} \
...
...
mace/ops/buffer_to_image.h
浏览文件 @
6b9aafd4
...
...
@@ -11,16 +11,17 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
class
BufferToImageOp
:
public
Operator
<
D
,
T
>
{
class
BufferToImageOp
:
public
Operator
<
D
,
T
>
{
public:
BufferToImageOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
op_def
,
ws
)
{}
:
Operator
<
D
,
T
>
(
op_def
,
ws
)
{}
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
input_tensor
=
this
->
Input
(
INPUT
);
kernels
::
BufferType
type
=
static_cast
<
kernels
::
BufferType
>
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"buffer_type"
,
static_cast
<
int
>
(
kernels
::
CONV2D_FILTER
)));
kernels
::
BufferType
type
=
static_cast
<
kernels
::
BufferType
>
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"buffer_type"
,
static_cast
<
int
>
(
kernels
::
CONV2D_FILTER
)));
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
functor_
(
const_cast
<
Tensor
*>
(
input_tensor
),
type
,
output
,
future
);
...
...
mace/ops/buffer_to_image_test.cc
浏览文件 @
6b9aafd4
...
...
@@ -7,8 +7,9 @@
using
namespace
mace
;
template
<
DeviceType
D
,
typename
T
>
void
TestBidirectionTransform
(
const
int
type
,
const
std
::
vector
<
index_t
>
&
input_shape
)
{
template
<
DeviceType
D
,
typename
T
>
void
TestBidirectionTransform
(
const
int
type
,
const
std
::
vector
<
index_t
>
&
input_shape
)
{
OpsTestNet
net
;
OpDefBuilder
(
"BufferToImage"
,
"BufferToImageTest"
)
.
Input
(
"Input"
)
...
...
@@ -34,7 +35,8 @@ void TestBidirectionTransform(const int type, const std::vector<index_t> &input_
net
.
RunOp
(
D
);
// Check
ExpectTensorNear
<
T
>
(
*
net
.
GetOutput
(
"Input"
),
*
net
.
GetOutput
(
"I2BOutput"
),
1e-5
);
ExpectTensorNear
<
T
>
(
*
net
.
GetOutput
(
"Input"
),
*
net
.
GetOutput
(
"I2BOutput"
),
1e-5
);
}
TEST
(
BufferToImageTest
,
ArgSmall
)
{
...
...
@@ -54,51 +56,63 @@ TEST(BufferToImageTest, ArgLarge) {
}
TEST
(
BufferToImageTest
,
InputSmallSingleChannel
)
{
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
IN_OUT_CHANNEL
,
{
1
,
2
,
3
,
1
});
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
IN_OUT_CHANNEL
,
{
1
,
2
,
3
,
1
});
}
TEST
(
BufferToImageTest
,
InputSmallMultipleChannel
)
{
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
IN_OUT_CHANNEL
,
{
1
,
2
,
3
,
3
});
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
IN_OUT_CHANNEL
,
{
1
,
2
,
3
,
3
});
}
TEST
(
BufferToImageTest
,
InputSmallMultipleBatchAndChannel
)
{
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
IN_OUT_CHANNEL
,
{
3
,
2
,
3
,
3
});
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
IN_OUT_CHANNEL
,
{
3
,
2
,
3
,
3
});
}
TEST
(
BufferToImageTest
,
InputMedia
)
{
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
IN_OUT_CHANNEL
,
{
3
,
13
,
17
,
128
});
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
IN_OUT_CHANNEL
,
{
3
,
13
,
17
,
128
});
}
TEST
(
BufferToImageTest
,
InputLarge
)
{
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
IN_OUT_CHANNEL
,
{
3
,
64
,
64
,
256
});
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
IN_OUT_CHANNEL
,
{
3
,
64
,
64
,
256
});
}
TEST
(
BufferToImageTest
,
Filter1x1Small
)
{
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
CONV2D_FILTER
,
{
1
,
1
,
3
,
5
});
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
CONV2D_FILTER
,
{
1
,
1
,
3
,
5
});
}
TEST
(
BufferToImageTest
,
Filter1x1Media
)
{
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
CONV2D_FILTER
,
{
1
,
1
,
13
,
17
});
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
CONV2D_FILTER
,
{
1
,
1
,
13
,
17
});
}
TEST
(
BufferToImageTest
,
Filter1x1Large
)
{
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
CONV2D_FILTER
,
{
1
,
1
,
128
,
512
});
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
CONV2D_FILTER
,
{
1
,
1
,
128
,
512
});
}
TEST
(
BufferToImageTest
,
Filter3x3Small
)
{
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
CONV2D_FILTER
,
{
3
,
3
,
3
,
5
});
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
CONV2D_FILTER
,
{
3
,
3
,
3
,
5
});
}
TEST
(
BufferToImageTest
,
Filter3x3Meida
)
{
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
CONV2D_FILTER
,
{
3
,
3
,
13
,
17
});
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
CONV2D_FILTER
,
{
3
,
3
,
13
,
17
});
}
TEST
(
BufferToImageTest
,
Filter3x3Large
)
{
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
CONV2D_FILTER
,
{
3
,
3
,
128
,
256
});
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
CONV2D_FILTER
,
{
3
,
3
,
128
,
256
});
}
template
<
DeviceType
D
,
typename
T
>
void
TestDiffTypeBidirectionTransform
(
const
int
type
,
const
std
::
vector
<
index_t
>
&
input_shape
)
{
template
<
DeviceType
D
,
typename
T
>
void
TestDiffTypeBidirectionTransform
(
const
int
type
,
const
std
::
vector
<
index_t
>
&
input_shape
)
{
OpsTestNet
net
;
OpDefBuilder
(
"BufferToImage"
,
"BufferToImageTest"
)
.
Input
(
"Input"
)
...
...
@@ -123,14 +137,16 @@ void TestDiffTypeBidirectionTransform(const int type, const std::vector<index_t>
net
.
RunOp
(
D
);
// Check
ExpectTensorNear
<
float
>
(
*
net
.
GetOutput
(
"Input"
),
*
net
.
GetOutput
(
"I2BOutput"
),
1e-2
);
ExpectTensorNear
<
float
>
(
*
net
.
GetOutput
(
"Input"
),
*
net
.
GetOutput
(
"I2BOutput"
),
1e-2
);
}
TEST
(
BufferToImageTest
,
ArgFloatToHalfSmall
)
{
TestDiffTypeBidirectionTransform
<
DeviceType
::
OPENCL
,
half
>
(
kernels
::
ARGUMENT
,
{
11
});
TestDiffTypeBidirectionTransform
<
DeviceType
::
OPENCL
,
half
>
(
kernels
::
ARGUMENT
,
{
11
});
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
void
TestStringHalfBidirectionTransform
(
const
int
type
,
const
std
::
vector
<
index_t
>
&
input_shape
,
const
unsigned
char
*
input_data
)
{
...
...
@@ -142,9 +158,10 @@ void TestStringHalfBidirectionTransform(const int type,
.
AddIntArg
(
"T"
,
DataTypeToEnum
<
T
>::
value
)
.
Finalize
(
net
.
NewOperatorDef
());
const
half
*
h_data
=
reinterpret_cast
<
const
half
*>
(
input_data
);
const
half
*
h_data
=
reinterpret_cast
<
const
half
*>
(
input_data
);
net
.
AddInputFromArray
<
D
,
half
>
(
"Input"
,
input_shape
,
std
::
vector
<
half
>
(
h_data
,
h_data
+
2
));
net
.
AddInputFromArray
<
D
,
half
>
(
"Input"
,
input_shape
,
std
::
vector
<
half
>
(
h_data
,
h_data
+
2
));
// Run
net
.
RunOp
(
D
);
...
...
@@ -160,12 +177,14 @@ void TestStringHalfBidirectionTransform(const int type,
net
.
RunOp
(
D
);
// Check
ExpectTensorNear
<
half
>
(
*
net
.
GetOutput
(
"Input"
),
*
net
.
GetOutput
(
"I2BOutput"
),
1e-2
);
ExpectTensorNear
<
half
>
(
*
net
.
GetOutput
(
"Input"
),
*
net
.
GetOutput
(
"I2BOutput"
),
1e-2
);
}
TEST
(
BufferToImageTest
,
ArgStringHalfToHalfSmall
)
{
const
unsigned
char
input_data
[]
=
{
0xCD
,
0x3C
,
0x33
,
0x40
,};
TestStringHalfBidirectionTransform
<
DeviceType
::
OPENCL
,
half
>
(
kernels
::
ARGUMENT
,
{
2
},
input_data
);
const
unsigned
char
input_data
[]
=
{
0xCD
,
0x3C
,
0x33
,
0x40
,
};
TestStringHalfBidirectionTransform
<
DeviceType
::
OPENCL
,
half
>
(
kernels
::
ARGUMENT
,
{
2
},
input_data
);
}
mace/ops/channel_shuffle.h
浏览文件 @
6b9aafd4
...
...
@@ -28,8 +28,8 @@ class ChannelShuffleOp : public Operator<D, T> {
input
->
shape
()[
1
]);
output
->
ResizeLike
(
input
);
functor_
(
input
->
data
<
T
>
(),
input
->
shape
().
data
(),
output
->
mutable_data
<
T
>
(),
future
);
functor_
(
input
->
data
<
T
>
(),
input
->
shape
().
data
(),
output
->
mutable_data
<
T
>
(),
future
);
return
true
;
}
...
...
mace/ops/channel_shuffle_benchmark.cc
浏览文件 @
6b9aafd4
...
...
@@ -41,7 +41,7 @@ static void ChannelShuffle(
static void BM_CHANNEL_SHUFFLE_##N##_##C##_##H##_##W##_##G##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::MaccProcessed(tot); \
mace::testing::MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(float))); \
ChannelShuffle<DEVICE>(iters, N, C, H, W, G); \
} \
...
...
mace/ops/concat.h
浏览文件 @
6b9aafd4
...
...
@@ -14,10 +14,11 @@ class ConcatOp : public Operator<D, T> {
public:
ConcatOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
op_def
,
ws
),
functor_
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"axis"
,
3
)){}
functor_
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"axis"
,
3
))
{}
bool
Run
(
StatsFuture
*
future
)
override
{
MACE_CHECK
(
this
->
InputSize
()
>=
2
)
<<
"There must be at least two inputs to concat"
;
MACE_CHECK
(
this
->
InputSize
()
>=
2
)
<<
"There must be at least two inputs to concat"
;
const
std
::
vector
<
const
Tensor
*>
input_list
=
this
->
Inputs
();
const
int32_t
concat_axis
=
OperatorBase
::
GetSingleArgument
<
int
>
(
"axis"
,
3
);
const
int32_t
input_dims
=
input_list
[
0
]
->
dim_size
();
...
...
mace/ops/concat_benchmark.cc
浏览文件 @
6b9aafd4
...
...
@@ -37,11 +37,10 @@ static void ConcatHelper(int iters, int concat_dim, int dim1) {
}
}
#define BM_CONCAT_CPU_MACRO(DIM0, DIM1) \
static void BM_CONCAT_CPU_##DIM0##_##DIM1( \
int iters) { \
#define BM_CONCAT_CPU_MACRO(DIM0, DIM1) \
static void BM_CONCAT_CPU_##DIM0##_##DIM1(int iters) { \
ConcatHelper<DeviceType::CPU, float>(iters, DIM0, DIM1); \
} \
}
\
BENCHMARK(BM_CONCAT_CPU_##DIM0##_##DIM1)
BM_CONCAT_CPU_MACRO
(
0
,
1000
);
...
...
@@ -90,13 +89,11 @@ static void OpenclConcatHelper(int iters,
}
}
#define BM_CONCAT_OPENCL_MACRO(N, H, W, C, TYPE) \
static void BM_CONCAT_OPENCL_##N##_##H##_##W##_##C##_##TYPE( \
int iters) { \
std::vector<index_t> shape = {N, H, W, C}; \
OpenclConcatHelper<TYPE>(iters, shape, shape, 3); \
} \
#define BM_CONCAT_OPENCL_MACRO(N, H, W, C, TYPE) \
static void BM_CONCAT_OPENCL_##N##_##H##_##W##_##C##_##TYPE(int iters) { \
std::vector<index_t> shape = {N, H, W, C}; \
OpenclConcatHelper<TYPE>(iters, shape, shape, 3); \
} \
BENCHMARK(BM_CONCAT_OPENCL_##N##_##H##_##W##_##C##_##TYPE)
BM_CONCAT_OPENCL_MACRO
(
3
,
32
,
32
,
32
,
float
);
...
...
mace/ops/concat_test.cc
浏览文件 @
6b9aafd4
...
...
@@ -112,8 +112,8 @@ TEST_F(ConcatOpTest, CPURandom) {
concat_axis_size
+=
input_shapes
[
i
][
axis
];
GenerateRandomRealTypeData
(
input_shapes
[
i
],
inputs
[
i
]);
input_ptrs
[
i
]
=
inputs
[
i
].
data
();
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
MakeString
(
"Input"
,
i
),
input_shapes
[
i
],
inputs
[
i
]);
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
MakeString
(
"Input"
,
i
),
input_shapes
[
i
],
inputs
[
i
]);
}
// Run
...
...
@@ -214,6 +214,6 @@ TEST_F(ConcatOpTest, OPENCLUnAligned) {
}
TEST_F
(
ConcatOpTest
,
OPENCLAlignedMultiInput
)
{
OpenclRandomTest
<
float
>
(
{{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
32
}},
3
);
OpenclRandomTest
<
float
>
(
{{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
32
}},
3
);
}
\ No newline at end of file
mace/ops/conv_2d_test.cc
浏览文件 @
6b9aafd4
...
...
@@ -2,8 +2,8 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <fstream>
#include "mace/ops/conv_2d.h"
#include <fstream>
#include "mace/ops/ops_test_util.h"
using
namespace
mace
;
...
...
@@ -342,7 +342,8 @@ TEST_F(Conv2dOpTest, CPUConv1x1) { TestConv1x1<DeviceType::CPU>(); }
TEST_F
(
Conv2dOpTest
,
OPENCLConv1x1
)
{
TestConv1x1
<
DeviceType
::
OPENCL
>
();
}
template
<
DeviceType
D
,
typename
T
>
static
void
TestComplexConvNxNS12
(
const
std
::
vector
<
index_t
>
&
shape
,
const
int
stride
)
{
static
void
TestComplexConvNxNS12
(
const
std
::
vector
<
index_t
>
&
shape
,
const
int
stride
)
{
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
Padding
type
)
{
...
...
@@ -412,27 +413,21 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape, const int s
}
TEST_F
(
Conv2dOpTest
,
OPENCLAlignedConvNxNS12
)
{
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
32
,
16
,
16
,
32
},
1
);
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
32
,
16
,
16
,
32
},
2
);
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
32
,
16
,
16
,
32
},
1
);
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
32
,
16
,
16
,
32
},
2
);
}
TEST_F
(
Conv2dOpTest
,
OPENCLUnalignedConvNxNS12
)
{
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
17
,
113
,
5
,
7
},
1
);
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
17
,
113
,
5
,
7
},
2
);
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
17
,
113
,
5
,
7
},
1
);
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
17
,
113
,
5
,
7
},
2
);
}
TEST_F
(
Conv2dOpTest
,
OPENCLUnalignedConvNxNS34
)
{
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
31
,
113
,
13
,
17
},
3
);
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
32
,
32
,
13
,
17
},
4
);
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
31
,
113
,
13
,
17
},
3
);
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
32
,
32
,
13
,
17
},
4
);
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
static
void
TestHalfComplexConvNxNS12
(
const
std
::
vector
<
index_t
>
&
input_shape
,
const
std
::
vector
<
index_t
>
&
filter_shape
,
const
std
::
vector
<
int
>
&
dilations
)
{
...
...
@@ -519,67 +514,58 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
}
TEST_F
(
Conv2dOpTest
,
OPENCLHalfAlignedConv1x1S12
)
{
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
},
{
1
,
1
,
32
,
64
},
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
},
{
1
,
1
,
32
,
64
},
{
1
,
1
});
}
TEST_F
(
Conv2dOpTest
,
OPENCLHalfAlignedConv3x3S12
)
{
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
},
{
3
,
3
,
32
,
64
},
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
},
{
3
,
3
,
32
,
64
},
{
1
,
1
});
}
TEST_F
(
Conv2dOpTest
,
OPENCLHalfAlignedConv15x1S12
)
{
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
},
{
15
,
1
,
256
,
2
},
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
},
{
15
,
1
,
256
,
2
},
{
1
,
1
});
}
TEST_F
(
Conv2dOpTest
,
OPENCLHalfAlignedConv1x15S12
)
{
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
},
{
1
,
15
,
256
,
2
},
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
},
{
1
,
15
,
256
,
2
},
{
1
,
1
});
}
TEST_F
(
Conv2dOpTest
,
OPENCLHalfAlignedConv7x75S12
)
{
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
},
{
7
,
7
,
3
,
64
},
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
},
{
7
,
7
,
3
,
64
},
{
1
,
1
});
}
TEST_F
(
Conv2dOpTest
,
OPENCLHalfUnalignedConv1x1S12
)
{
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
107
,
113
},
{
1
,
1
,
5
,
7
},
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
107
,
113
},
{
1
,
1
,
5
,
7
},
{
1
,
1
});
}
TEST_F
(
Conv2dOpTest
,
OPENCLHalfUnalignedConv3x3S12
)
{
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
107
,
113
},
{
3
,
3
,
5
,
7
},
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
107
,
113
},
{
3
,
3
,
5
,
7
},
{
1
,
1
});
}
TEST_F
(
Conv2dOpTest
,
OPENCLHalfConv5x5Dilation2
)
{
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
64
,
64
},
{
5
,
5
,
16
,
16
},
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
64
,
64
},
{
5
,
5
,
16
,
16
},
{
2
,
2
});
}
TEST_F
(
Conv2dOpTest
,
OPENCLHalfConv7x7Dilation2
)
{
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
64
,
64
},
{
7
,
7
,
16
,
16
},
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
64
,
64
},
{
7
,
7
,
16
,
16
},
{
2
,
2
});
}
TEST_F
(
Conv2dOpTest
,
OPENCLHalfConv7x7Dilation4
)
{
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
63
,
67
},
{
7
,
7
,
16
,
16
},
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
63
,
67
},
{
7
,
7
,
16
,
16
},
{
4
,
4
});
}
template
<
DeviceType
D
,
typename
T
>
static
void
TestDilationConvNxN
(
const
std
::
vector
<
index_t
>
&
shape
,
const
int
dilation_rate
)
{
template
<
DeviceType
D
,
typename
T
>
static
void
TestDilationConvNxN
(
const
std
::
vector
<
index_t
>
&
shape
,
const
int
dilation_rate
)
{
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
Padding
type
)
{
...
...
@@ -617,9 +603,12 @@ static void TestDilationConvNxN(const std::vector<index_t> &shape, const int dil
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run on gpu
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
CONV2D_FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
CONV2D_FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -634,7 +623,8 @@ static void TestDilationConvNxN(const std::vector<index_t> &shape, const int dil
// Run on device
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.001
);
};
...
...
@@ -647,22 +637,20 @@ static void TestDilationConvNxN(const std::vector<index_t> &shape, const int dil
}
TEST_F
(
Conv2dOpTest
,
OPENCLAlignedDilation2
)
{
TestDilationConvNxN
<
DeviceType
::
OPENCL
,
float
>
({
32
,
32
,
32
,
64
},
2
);
TestDilationConvNxN
<
DeviceType
::
OPENCL
,
float
>
({
32
,
32
,
32
,
64
},
2
);
}
TEST_F
(
Conv2dOpTest
,
OPENCLAligned2Dilation4
)
{
TestDilationConvNxN
<
DeviceType
::
OPENCL
,
float
>
({
128
,
128
,
16
,
16
},
4
);
TestDilationConvNxN
<
DeviceType
::
OPENCL
,
float
>
({
128
,
128
,
16
,
16
},
4
);
}
TEST_F
(
Conv2dOpTest
,
OPENCLUnalignedDilation4
)
{
TestDilationConvNxN
<
DeviceType
::
OPENCL
,
float
>
({
107
,
113
,
5
,
7
},
4
);
TestDilationConvNxN
<
DeviceType
::
OPENCL
,
float
>
({
107
,
113
,
5
,
7
},
4
);
}
template
<
DeviceType
D
,
typename
T
>
static
void
TestArbitraryPadConvNxN
(
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
int
>
&
paddings
)
{
template
<
DeviceType
D
,
typename
T
>
static
void
TestArbitraryPadConvNxN
(
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
int
>
&
paddings
)
{
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
)
{
srand
(
time
(
NULL
));
...
...
@@ -698,9 +686,12 @@ static void TestArbitraryPadConvNxN(const std::vector<index_t> &shape, const std
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run on gpu
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
CONV2D_FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
CONV2D_FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -714,7 +705,8 @@ static void TestArbitraryPadConvNxN(const std::vector<index_t> &shape, const std
// Run on device
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.001
);
};
...
...
@@ -726,8 +718,7 @@ static void TestArbitraryPadConvNxN(const std::vector<index_t> &shape, const std
}
TEST_F
(
Conv2dOpTest
,
OPENCLAlignedPad1
)
{
TestArbitraryPadConvNxN
<
DeviceType
::
OPENCL
,
float
>
({
32
,
32
,
32
,
64
},
{
1
,
1
});
TestArbitraryPadConvNxN
<
DeviceType
::
OPENCL
,
float
>
({
32
,
32
,
32
,
64
},
{
1
,
1
});
}
TEST_F
(
Conv2dOpTest
,
OPENCLAlignedPad2
)
{
...
...
@@ -736,6 +727,5 @@ TEST_F(Conv2dOpTest, OPENCLAlignedPad2) {
}
TEST_F
(
Conv2dOpTest
,
OPENCLUnalignedPad4
)
{
TestArbitraryPadConvNxN
<
DeviceType
::
OPENCL
,
float
>
({
107
,
113
,
5
,
7
},
{
4
,
4
});
TestArbitraryPadConvNxN
<
DeviceType
::
OPENCL
,
float
>
({
107
,
113
,
5
,
7
},
{
4
,
4
});
}
mace/ops/eltwise.h
浏览文件 @
6b9aafd4
...
...
@@ -18,15 +18,17 @@ class EltwiseOp : public Operator<D, T> {
functor_
(
static_cast
<
kernels
::
EltwiseType
>
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"type"
,
static_cast
<
int
>
(
kernels
::
EltwiseType
::
SUM
))),
OperatorBase
::
GetRepeatedArgument
<
float
>
(
"coeff"
)){}
OperatorBase
::
GetRepeatedArgument
<
float
>
(
"coeff"
))
{}
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
input0
=
this
->
Input
(
0
);
const
Tensor
*
input1
=
this
->
Input
(
1
);
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
MACE_CHECK
(
input0
->
dim_size
()
==
input1
->
dim_size
())
<<
"Inputs of Eltwise op must be same shape"
;
for
(
int
i
=
0
;
i
<
input0
->
dim_size
();
++
i
)
{
MACE_CHECK
(
input0
->
dim
(
i
)
==
input1
->
dim
(
i
))
<<
"Inputs of Eltwise op must be same shape"
;
MACE_CHECK
(
input0
->
dim_size
()
==
input1
->
dim_size
())
<<
"Inputs of Eltwise op must be same shape"
;
for
(
int
i
=
0
;
i
<
input0
->
dim_size
();
++
i
)
{
MACE_CHECK
(
input0
->
dim
(
i
)
==
input1
->
dim
(
i
))
<<
"Inputs of Eltwise op must be same shape"
;
}
output
->
ResizeLike
(
input0
);
...
...
mace/ops/eltwise_benchmark.cc
浏览文件 @
6b9aafd4
...
...
@@ -61,7 +61,7 @@ static void EltwiseBenchmark(
BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * H * W * C; \
mace::testing::MaccProcessed(tot); \
mace::testing::MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
EltwiseBenchmark<DEVICE, TYPE>( \
iters, static_cast<kernels::EltwiseType>(ELT_TYPE), N, H, W, C); \
...
...
mace/ops/eltwise_test.cc
浏览文件 @
6b9aafd4
...
...
@@ -2,15 +2,15 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/kernels/eltwise.h"
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
#include "mace/kernels/eltwise.h"
namespace
mace
{
class
EltwiseOpTest
:
public
OpsTestBase
{};
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
Simple
(
const
kernels
::
EltwiseType
type
,
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
float
>
&
input0
,
...
...
@@ -36,8 +36,10 @@ void Simple(const kernels::EltwiseType type,
// Run
net
.
RunOp
(
D
);
}
else
{
BufferToImage
<
D
,
half
>
(
net
,
"Input1"
,
"InputImg1"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
half
>
(
net
,
"Input2"
,
"InputImg2"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
half
>
(
net
,
"Input1"
,
"InputImg1"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
half
>
(
net
,
"Input2"
,
"InputImg2"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
OpDefBuilder
(
"Eltwise"
,
"EltwiseTest"
)
.
Input
(
"InputImg1"
)
.
Input
(
"InputImg2"
)
...
...
@@ -49,7 +51,8 @@ void Simple(const kernels::EltwiseType type,
// Run
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImg"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImg"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
}
auto
expected
=
CreateTensor
<
float
>
(
shape
,
output
);
...
...
@@ -58,64 +61,42 @@ void Simple(const kernels::EltwiseType type,
}
TEST_F
(
EltwiseOpTest
,
CPUSimple
)
{
Simple
<
DeviceType
::
CPU
>
(
kernels
::
EltwiseType
::
PROD
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
2
,
3
,
4
,
5
,
6
},
Simple
<
DeviceType
::
CPU
>
(
kernels
::
EltwiseType
::
PROD
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
4
,
9
,
16
,
25
,
36
});
Simple
<
DeviceType
::
CPU
>
(
kernels
::
EltwiseType
::
SUM
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
2
,
3
,
4
,
5
,
6
},
Simple
<
DeviceType
::
CPU
>
(
kernels
::
EltwiseType
::
SUM
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
2
,
4
,
6
,
8
,
10
,
12
});
Simple
<
DeviceType
::
CPU
>
(
kernels
::
EltwiseType
::
SUM
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
3
,
6
,
9
,
12
,
15
,
18
},
{
2
,
1
});
Simple
<
DeviceType
::
CPU
>
(
kernels
::
EltwiseType
::
MAX
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
1
,
3
,
3
,
6
,
6
},
Simple
<
DeviceType
::
CPU
>
(
kernels
::
EltwiseType
::
SUM
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
3
,
6
,
9
,
12
,
15
,
18
},
{
2
,
1
});
Simple
<
DeviceType
::
CPU
>
(
kernels
::
EltwiseType
::
MAX
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
1
,
3
,
3
,
6
,
6
},
{
1
,
2
,
3
,
4
,
6
,
6
});
Simple
<
DeviceType
::
CPU
>
(
kernels
::
EltwiseType
::
MIN
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
1
,
3
,
3
,
6
,
6
},
Simple
<
DeviceType
::
CPU
>
(
kernels
::
EltwiseType
::
MIN
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
1
,
3
,
3
,
6
,
6
},
{
1
,
1
,
3
,
3
,
5
,
6
});
}
TEST_F
(
EltwiseOpTest
,
GPUSimple
)
{
Simple
<
DeviceType
::
OPENCL
>
(
kernels
::
EltwiseType
::
PROD
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
2
,
3
,
4
,
5
,
6
},
Simple
<
DeviceType
::
OPENCL
>
(
kernels
::
EltwiseType
::
PROD
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
4
,
9
,
16
,
25
,
36
});
Simple
<
DeviceType
::
OPENCL
>
(
kernels
::
EltwiseType
::
SUM
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
2
,
3
,
4
,
5
,
6
},
Simple
<
DeviceType
::
OPENCL
>
(
kernels
::
EltwiseType
::
SUM
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
2
,
4
,
6
,
8
,
10
,
12
});
Simple
<
DeviceType
::
OPENCL
>
(
kernels
::
EltwiseType
::
SUM
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
3
,
6
,
9
,
12
,
15
,
18
},
{
2
,
1
});
Simple
<
DeviceType
::
OPENCL
>
(
kernels
::
EltwiseType
::
MAX
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
1
,
3
,
3
,
6
,
6
},
Simple
<
DeviceType
::
OPENCL
>
(
kernels
::
EltwiseType
::
SUM
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
3
,
6
,
9
,
12
,
15
,
18
},
{
2
,
1
});
Simple
<
DeviceType
::
OPENCL
>
(
kernels
::
EltwiseType
::
MAX
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
1
,
3
,
3
,
6
,
6
},
{
1
,
2
,
3
,
4
,
6
,
6
});
Simple
<
DeviceType
::
OPENCL
>
(
kernels
::
EltwiseType
::
MIN
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
1
,
3
,
3
,
6
,
6
},
Simple
<
DeviceType
::
OPENCL
>
(
kernels
::
EltwiseType
::
MIN
,
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
1
,
3
,
3
,
6
,
6
},
{
1
,
1
,
3
,
3
,
5
,
6
});
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
void
RandomTest
(
const
kernels
::
EltwiseType
type
,
const
std
::
vector
<
index_t
>
&
shape
)
{
testing
::
internal
::
LogToStderr
();
...
...
@@ -139,8 +120,10 @@ void RandomTest(const kernels::EltwiseType type,
// Run
net
.
RunOp
();
BufferToImage
<
D
,
T
>
(
net
,
"Input1"
,
"InputImg1"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
T
>
(
net
,
"Input2"
,
"InputImg2"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
T
>
(
net
,
"Input1"
,
"InputImg1"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
T
>
(
net
,
"Input2"
,
"InputImg2"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
OpDefBuilder
(
"Eltwise"
,
"EltwiseTest"
)
.
Input
(
"InputImg1"
)
.
Input
(
"InputImg2"
)
...
...
@@ -153,12 +136,15 @@ void RandomTest(const kernels::EltwiseType type,
// Run
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImg"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImg"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
if
(
DataTypeToEnum
<
T
>::
value
==
DT_FLOAT
)
{
ExpectTensorNear
<
float
>
(
*
net
.
GetTensor
(
"Output"
),
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-3
);
ExpectTensorNear
<
float
>
(
*
net
.
GetTensor
(
"Output"
),
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-3
);
}
else
{
ExpectTensorNear
<
float
>
(
*
net
.
GetTensor
(
"Output"
),
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-1
);
ExpectTensorNear
<
float
>
(
*
net
.
GetTensor
(
"Output"
),
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-1
);
}
}
...
...
mace/ops/folded_batch_norm.cc
浏览文件 @
6b9aafd4
...
...
@@ -7,25 +7,22 @@
namespace
mace
{
void
Register_FoldedBatchNorm
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FoldedBatchNorm"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FoldedBatchNorm"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
FoldedBatchNormOp
<
DeviceType
::
CPU
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FoldedBatchNorm"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FoldedBatchNorm"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
FoldedBatchNormOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FoldedBatchNorm"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FoldedBatchNorm"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
FoldedBatchNormOp
<
DeviceType
::
OPENCL
,
half
>
);
}
...
...
mace/ops/folded_batch_norm_test.cc
浏览文件 @
6b9aafd4
...
...
@@ -17,7 +17,7 @@ void CalculateScaleOffset(const std::vector<float> &gamma,
std
::
vector
<
float
>
&
scale
,
std
::
vector
<
float
>
&
offset
)
{
size_t
size
=
gamma
.
size
();
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
scale
[
i
]
=
gamma
[
i
]
/
std
::
sqrt
(
var
[
i
]
+
epsilon
);
offset
[
i
]
=
offset
[
i
]
-
mean
[
i
]
*
scale
[
i
];
}
...
...
mace/ops/fully_connected.h
浏览文件 @
6b9aafd4
...
...
@@ -15,11 +15,10 @@ class FullyConnectedOp : public Operator<D, T> {
public:
FullyConnectedOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
),
functor_
(
kernels
::
StringToActivationType
(
OperatorBase
::
GetSingleArgument
<
std
::
string
>
(
"activation"
,
"NOOP"
)),
OperatorBase
::
GetSingleArgument
<
float
>
(
"max_limit"
,
0.0
f
))
{}
functor_
(
kernels
::
StringToActivationType
(
OperatorBase
::
GetSingleArgument
<
std
::
string
>
(
"activation"
,
"NOOP"
)),
OperatorBase
::
GetSingleArgument
<
float
>
(
"max_limit"
,
0.0
f
))
{}
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
...
...
mace/ops/fully_connected_benchmark.cc
浏览文件 @
6b9aafd4
...
...
@@ -17,16 +17,17 @@ static void FCBenchmark(
// Add input data
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channel
});
net
.
AddRandomInput
<
D
,
float
>
(
"Weight"
,
{
out_channel
,
height
*
width
*
channel
});
net
.
AddRandomInput
<
D
,
float
>
(
"Weight"
,
{
out_channel
,
height
*
width
*
channel
});
net
.
AddRandomInput
<
D
,
float
>
(
"Bias"
,
{
out_channel
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
T
>
(
net
,
"Weight"
,
"WeightImage"
,
kernels
::
BufferType
::
WEIGHT_HEIGHT
);
kernels
::
BufferType
::
WEIGHT_HEIGHT
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FC"
,
"FullyConnectedTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -57,14 +58,17 @@ static void FCBenchmark(
net
.
Sync
();
}
#define BM_FC_MACRO(N, H, W, C, OC, TYPE, DEVICE) \
static void BM_FC_##N##_##H##_##W##_##C##_##OC##_##TYPE##_##DEVICE(int iters) { \
const int64_t macc = static_cast<int64_t>(iters) * N * C * H * W * OC + OC; \
const int64_t tot = static_cast<int64_t>(iters) * (N + OC) * C * H * W + OC; \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
FCBenchmark<DEVICE, TYPE>(iters, N, H, W, C, OC); \
} \
#define BM_FC_MACRO(N, H, W, C, OC, TYPE, DEVICE) \
static void BM_FC_##N##_##H##_##W##_##C##_##OC##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t macc = \
static_cast<int64_t>(iters) * N * C * H * W * OC + OC; \
const int64_t tot = \
static_cast<int64_t>(iters) * (N + OC) * C * H * W + OC; \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
FCBenchmark<DEVICE, TYPE>(iters, N, H, W, C, OC); \
} \
BENCHMARK(BM_FC_##N##_##H##_##W##_##C##_##OC##_##TYPE##_##DEVICE)
#define BM_FC(N, H, W, C, OC) \
...
...
mace/ops/fully_connected_test.cc
浏览文件 @
6b9aafd4
...
...
@@ -10,7 +10,7 @@ namespace mace {
class
FullyConnectedOpTest
:
public
OpsTestBase
{};
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
Simple
(
const
std
::
vector
<
index_t
>
&
input_shape
,
const
std
::
vector
<
float
>
&
input_value
,
const
std
::
vector
<
index_t
>
&
weight_shape
,
...
...
@@ -58,83 +58,52 @@ void Simple(const std::vector<index_t> &input_shape,
}
// Check
auto
expected
=
CreateTensor
<
float
>
(
output_shape
,
output_value
);
auto
expected
=
CreateTensor
<
float
>
(
output_shape
,
output_value
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
TEST_F
(
FullyConnectedOpTest
,
SimpleCPU
)
{
Simple
<
DeviceType
::
CPU
>
({
1
,
2
,
2
,
2
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
},
{
1
,
8
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
},
{
1
},
{
2
},
{
1
,
1
,
1
,
1
},
{
206
});
Simple
<
DeviceType
::
CPU
>
({
1
,
1
,
2
,
5
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
},
{
2
,
10
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
10
,
20
,
30
,
40
,
50
,
60
,
70
,
80
,
90
,
100
},
{
2
},
{
2
,
3
},
{
1
,
1
,
1
,
2
},
{
387
,
3853
});
Simple
<
DeviceType
::
CPU
>
({
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
5
,
6
},
{
1
,
2
,
3
,
4
,
5
,
6
,
10
,
20
,
30
,
40
,
50
,
60
,
1
,
2
,
3
,
4
,
5
,
6
,
10
,
20
,
30
,
40
,
50
,
60
,
1
,
2
,
3
,
4
,
5
,
6
},
{
5
},
{
1
,
2
,
3
,
4
,
5
},
{
1
,
1
,
1
,
5
},
{
92
,
912
,
94
,
914
,
96
});
Simple
<
DeviceType
::
CPU
>
({
1
,
2
,
2
,
2
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
},
{
1
,
8
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
},
{
1
},
{
2
},
{
1
,
1
,
1
,
1
},
{
206
});
Simple
<
DeviceType
::
CPU
>
(
{
1
,
1
,
2
,
5
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
},
{
2
,
10
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
10
,
20
,
30
,
40
,
50
,
60
,
70
,
80
,
90
,
100
},
{
2
},
{
2
,
3
},
{
1
,
1
,
1
,
2
},
{
387
,
3853
});
Simple
<
DeviceType
::
CPU
>
(
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
5
,
6
},
{
1
,
2
,
3
,
4
,
5
,
6
,
10
,
20
,
30
,
40
,
50
,
60
,
1
,
2
,
3
,
4
,
5
,
6
,
10
,
20
,
30
,
40
,
50
,
60
,
1
,
2
,
3
,
4
,
5
,
6
},
{
5
},
{
1
,
2
,
3
,
4
,
5
},
{
1
,
1
,
1
,
5
},
{
92
,
912
,
94
,
914
,
96
});
}
TEST_F
(
FullyConnectedOpTest
,
SimpleCPUWithBatch
)
{
Simple
<
DeviceType
::
CPU
>
({
2
,
1
,
2
,
2
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
},
{
1
,
4
},
{
1
,
2
,
3
,
4
},
{
1
},
{
2
},
{
2
,
1
,
1
,
1
},
{
32
,
72
});
Simple
<
DeviceType
::
CPU
>
({
2
,
1
,
2
,
2
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
},
{
1
,
4
},
{
1
,
2
,
3
,
4
},
{
1
},
{
2
},
{
2
,
1
,
1
,
1
},
{
32
,
72
});
}
TEST_F
(
FullyConnectedOpTest
,
SimpleOPENCL
)
{
Simple
<
DeviceType
::
OPENCL
>
({
1
,
2
,
2
,
2
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
},
{
1
,
8
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
},
{
1
},
{
2
},
{
1
,
1
,
1
,
1
},
{
206
});
Simple
<
DeviceType
::
OPENCL
>
({
1
,
1
,
2
,
5
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
},
{
2
,
10
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
10
,
20
,
30
,
40
,
50
,
60
,
70
,
80
,
90
,
100
},
{
2
},
{
2
,
3
},
{
1
,
1
,
1
,
2
},
{
387
,
3853
});
Simple
<
DeviceType
::
OPENCL
>
({
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
5
,
6
},
{
1
,
2
,
3
,
4
,
5
,
6
,
10
,
20
,
30
,
40
,
50
,
60
,
1
,
2
,
3
,
4
,
5
,
6
,
10
,
20
,
30
,
40
,
50
,
60
,
1
,
2
,
3
,
4
,
5
,
6
},
{
5
},
{
1
,
2
,
3
,
4
,
5
},
{
1
,
1
,
1
,
5
},
{
92
,
912
,
94
,
914
,
96
});
Simple
<
DeviceType
::
OPENCL
>
({
1
,
2
,
2
,
2
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
},
{
1
,
8
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
},
{
1
},
{
2
},
{
1
,
1
,
1
,
1
},
{
206
});
Simple
<
DeviceType
::
OPENCL
>
(
{
1
,
1
,
2
,
5
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
},
{
2
,
10
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
10
,
20
,
30
,
40
,
50
,
60
,
70
,
80
,
90
,
100
},
{
2
},
{
2
,
3
},
{
1
,
1
,
1
,
2
},
{
387
,
3853
});
Simple
<
DeviceType
::
OPENCL
>
(
{
1
,
1
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
5
,
6
},
{
1
,
2
,
3
,
4
,
5
,
6
,
10
,
20
,
30
,
40
,
50
,
60
,
1
,
2
,
3
,
4
,
5
,
6
,
10
,
20
,
30
,
40
,
50
,
60
,
1
,
2
,
3
,
4
,
5
,
6
},
{
5
},
{
1
,
2
,
3
,
4
,
5
},
{
1
,
1
,
1
,
5
},
{
92
,
912
,
94
,
914
,
96
});
}
TEST_F
(
FullyConnectedOpTest
,
SimpleGPUWithBatch
)
{
Simple
<
DeviceType
::
OPENCL
>
({
2
,
1
,
2
,
2
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
},
{
1
,
4
},
{
1
,
2
,
3
,
4
},
{
1
},
{
2
},
{
2
,
1
,
1
,
1
},
{
32
,
72
});
Simple
<
DeviceType
::
OPENCL
>
({
2
,
1
,
2
,
2
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
},
{
1
,
4
},
{
1
,
2
,
3
,
4
},
{
1
},
{
2
},
{
2
,
1
,
1
,
1
},
{
32
,
72
});
}
template
<
typename
T
>
template
<
typename
T
>
void
Complex
(
const
index_t
batch
,
const
index_t
height
,
const
index_t
width
,
...
...
@@ -156,8 +125,7 @@ void Complex(const index_t batch,
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Weight"
,
{
out_channel
,
height
*
width
*
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Bias"
,
{
out_channel
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Bias"
,
{
out_channel
});
// run cpu
net
.
RunOp
();
...
...
@@ -215,6 +183,4 @@ TEST_F(FullyConnectedOpTest, OPENCLHalfUnAlignedWithBatch) {
Complex
<
half
>
(
16
,
13
,
12
,
31
,
113
);
Complex
<
half
>
(
31
,
21
,
11
,
23
,
103
);
}
}
mace/ops/fused_conv_2d_test.cc
浏览文件 @
6b9aafd4
...
...
@@ -511,8 +511,9 @@ TEST_F(FusedConv2dOpTest, OPENCL15X1ConvNxNS12) {
TestGeneralConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
40
,
40
},
{
15
,
1
,
32
,
64
});
}
template
<
DeviceType
D
,
typename
T
>
static
void
TestAtrousConvNxN
(
const
std
::
vector
<
index_t
>
&
shape
,
const
int
dilation
)
{
template
<
DeviceType
D
,
typename
T
>
static
void
TestAtrousConvNxN
(
const
std
::
vector
<
index_t
>
&
shape
,
const
int
dilation
)
{
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
Padding
type
)
{
...
...
@@ -550,9 +551,12 @@ static void TestAtrousConvNxN(const std::vector<index_t> &shape, const int dilat
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run on gpu
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
CONV2D_FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
CONV2D_FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -567,7 +571,8 @@ static void TestAtrousConvNxN(const std::vector<index_t> &shape, const int dilat
// Run on device
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.001
);
};
...
...
@@ -591,7 +596,7 @@ TEST_F(FusedConv2dOpTest, OPENCLUnalignedAtrousConvNxN) {
TestAtrousConvNxN
<
DeviceType
::
OPENCL
,
float
>
({
107
,
113
,
5
,
7
},
2
);
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
static
void
TestGeneralHalfAtrousConv
(
const
std
::
vector
<
index_t
>
&
image_shape
,
const
std
::
vector
<
index_t
>
&
filter_shape
,
const
std
::
vector
<
int
>
&
dilations
)
{
...
...
@@ -620,7 +625,8 @@ static void TestGeneralHalfAtrousConv(const std::vector<index_t> &image_shape,
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
input_channels
});
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
input_channels
});
net
.
AddRandomInput
<
D
,
float
>
(
"Filter"
,
{
kernel_h
,
kernel_w
,
output_channels
,
input_channels
});
net
.
AddRandomInput
<
D
,
float
>
(
"Bias"
,
{
output_channels
});
...
...
@@ -632,9 +638,12 @@ static void TestGeneralHalfAtrousConv(const std::vector<index_t> &image_shape,
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run on gpu
BufferToImage
<
D
,
half
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
half
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
CONV2D_FILTER
);
BufferToImage
<
D
,
half
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
half
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
half
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
CONV2D_FILTER
);
BufferToImage
<
D
,
half
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -649,7 +658,8 @@ static void TestGeneralHalfAtrousConv(const std::vector<index_t> &image_shape,
// Run on device
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.7
);
};
...
...
@@ -658,13 +668,11 @@ static void TestGeneralHalfAtrousConv(const std::vector<index_t> &image_shape,
}
TEST_F
(
FusedConv2dOpTest
,
OPENCL7X7AtrousConvD2
)
{
TestGeneralHalfAtrousConv
<
DeviceType
::
OPENCL
>
({
32
,
32
},
{
7
,
7
,
3
,
16
},
TestGeneralHalfAtrousConv
<
DeviceType
::
OPENCL
>
({
32
,
32
},
{
7
,
7
,
3
,
16
},
{
2
,
2
});
}
TEST_F
(
FusedConv2dOpTest
,
OPENCL15X15AtrousConvD4
)
{
TestGeneralHalfAtrousConv
<
DeviceType
::
OPENCL
>
({
63
,
71
},
{
15
,
15
,
16
,
16
},
TestGeneralHalfAtrousConv
<
DeviceType
::
OPENCL
>
({
63
,
71
},
{
15
,
15
,
16
,
16
},
{
2
,
2
});
}
mace/ops/global_avg_pooling.h
浏览文件 @
6b9aafd4
//DMACE_ENABLE_NEON
//
DMACE_ENABLE_NEON
// Copyright (c) 2017 XiaoMi All rights reserved.
//
...
...
mace/ops/global_avg_pooling_benchmark.cc
浏览文件 @
6b9aafd4
...
...
@@ -40,13 +40,13 @@ static void GlobalAvgPooling(
static void BM_GLOBAL_AVG_POOLING_##N##_##C##_##H##_##W##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::MaccProcessed(tot); \
mace::testing::MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(float))); \
GlobalAvgPooling<DEVICE>(iters, N, C, H, W); \
} \
BENCHMARK(BM_GLOBAL_AVG_POOLING_##N##_##C##_##H##_##W##_##DEVICE)
#define BM_GLOBAL_AVG_POOLING(N, C, H, W)
\
#define BM_GLOBAL_AVG_POOLING(N, C, H, W) \
BM_GLOBAL_AVG_POOLING_MACRO(N, C, H, W, CPU);
// BM_GLOBAL_AVG_POOLING_MACRO(N, C, H, W, NEON);
...
...
mace/ops/image_to_buffer.h
浏览文件 @
6b9aafd4
...
...
@@ -11,17 +11,18 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
class
ImageToBufferOp
:
public
Operator
<
D
,
T
>
{
class
ImageToBufferOp
:
public
Operator
<
D
,
T
>
{
public:
ImageToBufferOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
op_def
,
ws
),
functor_
(
true
)
{}
:
Operator
<
D
,
T
>
(
op_def
,
ws
),
functor_
(
true
)
{}
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
input_tensor
=
this
->
Input
(
INPUT
);
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
kernels
::
BufferType
type
=
static_cast
<
kernels
::
BufferType
>
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"buffer_type"
,
static_cast
<
int
>
(
kernels
::
CONV2D_FILTER
)));
kernels
::
BufferType
type
=
static_cast
<
kernels
::
BufferType
>
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"buffer_type"
,
static_cast
<
int
>
(
kernels
::
CONV2D_FILTER
)));
functor_
(
output
,
type
,
const_cast
<
Tensor
*>
(
input_tensor
),
future
);
return
true
;
}
...
...
mace/ops/matmul.h
浏览文件 @
6b9aafd4
...
...
@@ -24,8 +24,8 @@ class MatMulOp : public Operator<D, T> {
<<
"The dimension of A and B should be 4"
;
MACE_CHECK
(
A
->
dim
(
0
)
==
B
->
dim
(
0
))
<<
"A and B must have same batch size"
;
MACE_CHECK
(
A
->
dim
(
2
)
==
B
->
dim
(
1
))
<<
"the number of A's column "
<<
A
->
dim
(
2
)
<<
" must be equal to B's row "
<<
B
->
dim
(
1
);
<<
"the number of A's column "
<<
A
->
dim
(
2
)
<<
" must be equal to B's row "
<<
B
->
dim
(
1
);
functor_
(
A
,
B
,
C
,
future
);
return
true
;
...
...
mace/ops/matmul_test.cc
浏览文件 @
6b9aafd4
...
...
@@ -10,7 +10,7 @@ namespace mace {
class
MatMulOpTest
:
public
OpsTestBase
{};
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
Simple
(
const
std
::
vector
<
index_t
>
&
A_shape
,
const
std
::
vector
<
float
>
&
A_value
,
const
std
::
vector
<
index_t
>
&
B_shape
,
...
...
@@ -51,29 +51,24 @@ void Simple(const std::vector<index_t> &A_shape,
}
// Check
auto
expected
=
CreateTensor
<
float
>
(
C_shape
,
C_value
);
auto
expected
=
CreateTensor
<
float
>
(
C_shape
,
C_value
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
TEST_F
(
MatMulOpTest
,
SimpleCPU
)
{
Simple
<
DeviceType
::
CPU
>
({
1
,
2
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
3
,
2
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
2
,
2
,
1
},
{
22
,
28
,
49
,
64
});
Simple
<
DeviceType
::
CPU
>
({
1
,
5
,
5
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
},
{
1
,
5
,
5
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
},
{
1
,
5
,
5
,
1
},
{
215
,
230
,
245
,
260
,
275
,
490
,
530
,
570
,
610
,
650
,
765
,
830
,
895
,
960
,
1025
,
1040
,
1130
,
1220
,
1310
,
1400
,
1315
,
1430
,
1545
,
1660
,
1775
});
Simple
<
DeviceType
::
CPU
>
({
1
,
2
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
3
,
2
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
2
,
2
,
1
},
{
22
,
28
,
49
,
64
});
Simple
<
DeviceType
::
CPU
>
(
{
1
,
5
,
5
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
},
{
1
,
5
,
5
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
},
{
1
,
5
,
5
,
1
},
{
215
,
230
,
245
,
260
,
275
,
490
,
530
,
570
,
610
,
650
,
765
,
830
,
895
,
960
,
1025
,
1040
,
1130
,
1220
,
1310
,
1400
,
1315
,
1430
,
1545
,
1660
,
1775
});
}
TEST_F
(
MatMulOpTest
,
SimpleCPUWithBatch
)
{
Simple
<
DeviceType
::
CPU
>
({
2
,
2
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
1
,
2
,
3
,
4
,
5
,
6
},
{
2
,
3
,
2
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
1
,
2
,
3
,
4
,
5
,
6
},
...
...
@@ -81,19 +76,17 @@ TEST_F(MatMulOpTest, SimpleCPUWithBatch) {
}
TEST_F
(
MatMulOpTest
,
SimpleOPENCL
)
{
Simple
<
DeviceType
::
OPENCL
>
({
1
,
2
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
3
,
2
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
2
,
2
,
1
},
{
22
,
28
,
49
,
64
});
Simple
<
DeviceType
::
OPENCL
>
({
1
,
5
,
5
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
},
{
1
,
5
,
5
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
},
{
1
,
5
,
5
,
1
},
{
215
,
230
,
245
,
260
,
275
,
490
,
530
,
570
,
610
,
650
,
765
,
830
,
895
,
960
,
1025
,
1040
,
1130
,
1220
,
1310
,
1400
,
1315
,
1430
,
1545
,
1660
,
1775
});
Simple
<
DeviceType
::
OPENCL
>
({
1
,
2
,
3
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
3
,
2
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
},
{
1
,
2
,
2
,
1
},
{
22
,
28
,
49
,
64
});
Simple
<
DeviceType
::
OPENCL
>
(
{
1
,
5
,
5
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
},
{
1
,
5
,
5
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
},
{
1
,
5
,
5
,
1
},
{
215
,
230
,
245
,
260
,
275
,
490
,
530
,
570
,
610
,
650
,
765
,
830
,
895
,
960
,
1025
,
1040
,
1130
,
1220
,
1310
,
1400
,
1315
,
1430
,
1545
,
1660
,
1775
});
}
TEST_F
(
MatMulOpTest
,
SimpleGPUWithBatch
)
{
...
...
@@ -118,8 +111,8 @@ void Complex(const index_t batch,
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"A"
,
{
batch
,
height
,
channels
,
1
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"A"
,
{
batch
,
height
,
channels
,
1
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"B"
,
{
batch
,
channels
,
out_width
,
1
});
...
...
@@ -132,9 +125,9 @@ void Complex(const index_t batch,
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
,
T
>
(
net
,
"A"
,
"AImage"
,
kernels
::
BufferType
::
IN_OUT_WIDTH
);
kernels
::
BufferType
::
IN_OUT_WIDTH
);
BufferToImage
<
DeviceType
::
OPENCL
,
T
>
(
net
,
"B"
,
"BImage"
,
kernels
::
BufferType
::
IN_OUT_HEIGHT
);
kernels
::
BufferType
::
IN_OUT_HEIGHT
);
OpDefBuilder
(
"MatMul"
,
"MatMulTest"
)
.
Input
(
"AImage"
)
...
...
@@ -177,5 +170,4 @@ TEST_F(MatMulOpTest, OPENCLHalfUnAlignedWithBatch) {
Complex
<
half
>
(
16
,
32
,
64
,
64
);
Complex
<
half
>
(
31
,
31
,
61
,
67
);
}
}
mace/ops/ops_test_util.h
浏览文件 @
6b9aafd4
...
...
@@ -95,7 +95,7 @@ class OpDefBuilder {
class
OpsTestNet
{
public:
OpsTestNet
()
:
op_registry_
(
new
OperatorRegistry
())
{};
OpsTestNet
()
:
op_registry_
(
new
OperatorRegistry
()){};
template
<
DeviceType
D
,
typename
T
>
void
AddInputFromArray
(
const
std
::
string
&
name
,
...
...
@@ -239,7 +239,7 @@ void GenerateRandomIntTypeData(const std::vector<index_t> &shape,
template
<
typename
T
>
std
::
unique_ptr
<
Tensor
>
CreateTensor
(
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
T
>
&
data
)
{
const
std
::
vector
<
T
>
&
data
)
{
std
::
unique_ptr
<
Tensor
>
res
(
new
Tensor
(
GetDeviceAllocator
(
DeviceType
::
CPU
),
DataTypeToEnum
<
T
>::
v
()));
res
->
Resize
(
shape
);
...
...
@@ -334,9 +334,8 @@ struct Expector<EXP_TYPE, RES_TYPE, true> {
for
(
int
h
=
0
;
h
<
x
.
dim
(
1
);
++
h
)
{
for
(
int
w
=
0
;
w
<
x
.
dim
(
2
);
++
w
)
{
for
(
int
c
=
0
;
c
<
x
.
dim
(
3
);
++
c
)
{
EXPECT_NEAR
(
*
a
,
*
b
,
abs_err
)
<<
"with index = ["
<<
n
<<
", "
<<
h
<<
", "
<<
w
<<
", "
<<
c
<<
"]"
;
EXPECT_NEAR
(
*
a
,
*
b
,
abs_err
)
<<
"with index = ["
<<
n
<<
", "
<<
h
<<
", "
<<
w
<<
", "
<<
c
<<
"]"
;
a
++
;
b
++
;
}
...
...
mace/ops/pooling.h
浏览文件 @
6b9aafd4
...
...
@@ -20,8 +20,12 @@ class PoolingOp : public ConvPool2dOpBase<D, T> {
pooling_type_
(
static_cast
<
PoolingType
>
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"pooling_type"
,
static_cast
<
int
>
(
AVG
)))),
functor_
(
pooling_type_
,
kernels_
.
data
(),
this
->
strides_
.
data
(),
this
->
padding_type_
,
this
->
paddings_
,
this
->
dilations_
.
data
()){};
functor_
(
pooling_type_
,
kernels_
.
data
(),
this
->
strides_
.
data
(),
this
->
padding_type_
,
this
->
paddings_
,
this
->
dilations_
.
data
()){};
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
...
...
mace/ops/pooling_benchmark.cc
浏览文件 @
6b9aafd4
...
...
@@ -54,7 +54,7 @@ static void Pooling(int iters,
BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::MaccProcessed(tot); \
mace::testing::MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(float))); \
Pooling<DEVICE>(iters, N, C, H, W, KE, STRIDE, Padding::PA, \
PoolingType::PO); \
...
...
@@ -62,7 +62,7 @@ static void Pooling(int iters,
BENCHMARK( \
BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE)
#define BM_POOLING(N, C, H, W, K, S, PA, PO)
\
#define BM_POOLING(N, C, H, W, K, S, PA, PO) \
BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, CPU);
// BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, NEON);
...
...
mace/ops/pooling_test.cc
浏览文件 @
6b9aafd4
...
...
@@ -198,7 +198,8 @@ static void MaxPooling3S2(const std::vector<index_t> &input_shape,
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
...
...
@@ -333,7 +334,8 @@ static void AvgPoolingTest(const std::vector<index_t> &shape,
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
...
...
mace/ops/reshape.h
浏览文件 @
6b9aafd4
...
...
@@ -15,7 +15,7 @@ class ReshapeOp : public Operator<D, T> {
public:
ReshapeOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
op_def
,
ws
),
shape_
(
OperatorBase
::
GetRepeatedArgument
<
int64_t
>
(
"shape"
)){}
shape_
(
OperatorBase
::
GetRepeatedArgument
<
int64_t
>
(
"shape"
))
{}
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
...
...
@@ -38,9 +38,11 @@ class ReshapeOp : public Operator<D, T> {
}
if
(
unknown_idx
!=
-
1
)
{
MACE_CHECK
(
product
!=
0
)
<<
"Cannot infer shape if there is zero shape size."
;
MACE_CHECK
(
product
!=
0
)
<<
"Cannot infer shape if there is zero shape size."
;
const
index_t
missing
=
input
->
size
()
/
product
;
MACE_CHECK
(
missing
*
product
==
input
->
size
())
<<
"Input size not match reshaped tensor size"
;
MACE_CHECK
(
missing
*
product
==
input
->
size
())
<<
"Input size not match reshaped tensor size"
;
out_shape
[
unknown_idx
]
=
missing
;
}
...
...
mace/ops/reshape_test.cc
浏览文件 @
6b9aafd4
...
...
@@ -13,7 +13,6 @@ class ReshapeTest : public OpsTestBase {};
void
TestReshape
(
const
std
::
vector
<
index_t
>
&
org_shape
,
const
std
::
vector
<
int
>
&
output_shape
,
const
std
::
vector
<
index_t
>
&
res_shape
)
{
// Construct graph
OpsTestNet
net
;
OpDefBuilder
(
"Reshape"
,
"ReshapeTest"
)
...
...
mace/ops/softmax.cc
浏览文件 @
6b9aafd4
...
...
@@ -20,9 +20,9 @@ void Register_Softmax(OperatorRegistry *op_registry) {
SoftmaxOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Softmax"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
SoftmaxOp
<
DeviceType
::
OPENCL
,
half
>
);
}
...
...
mace/ops/softmax.h
浏览文件 @
6b9aafd4
...
...
@@ -14,11 +14,10 @@ template <DeviceType D, class T>
class
SoftmaxOp
:
public
Operator
<
D
,
T
>
{
public:
SoftmaxOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
)
{
}
:
Operator
<
D
,
T
>
(
operator_def
,
ws
)
{}
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
logits
=
this
->
Input
(
LOGITS
);
const
Tensor
*
logits
=
this
->
Input
(
LOGITS
);
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
output
->
ResizeLike
(
logits
);
...
...
mace/ops/softmax_test.cc
浏览文件 @
6b9aafd4
...
...
@@ -14,7 +14,8 @@ void Simple() {
// Construct graph
OpsTestNet
net
;
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
1
,
2
,
4
},
{
1
,
1
,
1
,
1
,
1
,
2
,
3
,
4
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
1
,
2
,
4
},
{
1
,
1
,
1
,
1
,
1
,
2
,
3
,
4
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
...
...
@@ -41,18 +42,15 @@ void Simple() {
net
.
RunOp
(
D
);
}
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
4
},
{
0.25
,
0.25
,
0.25
,
0.25
,
0.0320586
,
0.08714432
,
0.23688282
,
0.64391426
});
auto
expected
=
CreateTensor
<
float
>
(
{
1
,
1
,
2
,
4
},
{
0.25
,
0.25
,
0.25
,
0.25
,
0.0320586
,
0.08714432
,
0.23688282
,
0.64391426
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-7
);
}
TEST_F
(
SoftmaxOpTest
,
CPUSimple
)
{
Simple
<
DeviceType
::
CPU
>
();
}
TEST_F
(
SoftmaxOpTest
,
OPENCLSimple
)
{
Simple
<
DeviceType
::
OPENCL
>
();
}
TEST_F
(
SoftmaxOpTest
,
CPUSimple
)
{
Simple
<
DeviceType
::
CPU
>
();
}
TEST_F
(
SoftmaxOpTest
,
OPENCLSimple
)
{
Simple
<
DeviceType
::
OPENCL
>
();
}
template
<
DeviceType
D
>
void
Complex
(
const
std
::
vector
<
index_t
>
&
logits_shape
)
{
...
...
mace/ops/space_to_batch.h
浏览文件 @
6b9aafd4
...
...
@@ -12,7 +12,7 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
class
SpaceToBatchNDOp
:
public
Operator
<
D
,
T
>
{
public:
SpaceToBatchNDOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
...
...
@@ -28,17 +28,19 @@ class SpaceToBatchNDOp : public Operator<D, T> {
std
::
vector
<
index_t
>
output_shape
(
4
,
0
);
CalculateOutputShape
(
space_tensor
,
batch_tensor
,
output_shape
.
data
());
functor_
(
const_cast
<
Tensor
*>
(
space_tensor
),
output_shape
,
batch_tensor
,
future
);
functor_
(
const_cast
<
Tensor
*>
(
space_tensor
),
output_shape
,
batch_tensor
,
future
);
return
true
;
}
private:
inline
void
CalculateOutputShape
(
const
Tensor
*
input_tensor
,
Tensor
*
output
,
index_t
*
output_shape
)
{
auto
paddings
=
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"paddings"
,
{
0
,
0
,
0
,
0
});
auto
block_shape
=
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"block_shape"
,
{
1
,
1
});
auto
paddings
=
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"paddings"
,
{
0
,
0
,
0
,
0
});
auto
block_shape
=
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"block_shape"
,
{
1
,
1
});
MACE_CHECK
(
input_tensor
->
dim_size
()
==
4
,
"Input's shape should be 4D"
);
MACE_CHECK
(
block_shape
.
size
()
==
2
,
"Block's shape should be 1D"
);
MACE_CHECK
(
paddings
.
size
()
==
4
,
"Paddings' shape should be 2D"
);
...
...
@@ -46,13 +48,14 @@ class SpaceToBatchNDOp : public Operator<D, T> {
const
index_t
block_dims
=
block_shape
.
size
();
index_t
block_shape_product
=
1
;
for
(
uint32_t
block_dim
=
0
;
block_dim
<
block_dims
;
++
block_dim
)
{
MACE_CHECK
(
block_shape
[
block_dim
]
>
1
,
"block_shape's value should be great to 1"
);
MACE_CHECK
(
block_shape
[
block_dim
]
>
1
,
"block_shape's value should be great to 1"
);
const
index_t
block_shape_value
=
block_shape
[
block_dim
];
const
index_t
padded_input_size
=
input_tensor
->
dim
(
block_dim
+
1
)
+
paddings
[
block_dim
*
2
]
+
paddings
[
block_dim
*
2
+
1
];
MACE_CHECK
(
padded_input_size
%
block_shape_value
==
0
,
"padded input "
,
padded_input_size
,
" is not divisible by block_shape"
);
const
index_t
padded_input_size
=
input_tensor
->
dim
(
block_dim
+
1
)
+
paddings
[
block_dim
*
2
]
+
paddings
[
block_dim
*
2
+
1
];
MACE_CHECK
(
padded_input_size
%
block_shape_value
==
0
,
"padded input "
,
padded_input_size
,
" is not divisible by block_shape"
);
block_shape_product
*=
block_shape_value
;
output_shape
[
block_dim
+
1
]
=
padded_input_size
/
block_shape_value
;
}
...
...
mace/ops/space_to_batch_benchmark.cc
浏览文件 @
6b9aafd4
...
...
@@ -42,7 +42,7 @@ static void BMSpaceToBatch(
BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::MaccProcessed(tot); \
mace::testing::MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMSpaceToBatch<DEVICE, TYPE>(iters, N, H, W, C, SHAPE); \
} \
...
...
mace/ops/winograd_convolution_test.cc
浏览文件 @
6b9aafd4
...
...
@@ -4,8 +4,8 @@
#include <fstream>
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
#include "mace/kernels/conv_pool_2d_util.h"
#include "mace/ops/ops_test_util.h"
namespace
mace
{
...
...
@@ -21,7 +21,9 @@ void TransposeFilter(const std::vector<float> &input,
for
(
index_t
w
=
0
;
w
<
input_shape
[
1
];
++
w
)
{
for
(
index_t
oc
=
0
;
oc
<
input_shape
[
2
];
++
oc
)
{
for
(
index_t
ic
=
0
;
ic
<
input_shape
[
3
];
++
ic
)
{
int
offset
=
((
oc
*
input_shape
[
3
]
+
ic
)
*
input_shape
[
0
]
+
h
)
*
input_shape
[
1
]
+
w
;
int
offset
=
((
oc
*
input_shape
[
3
]
+
ic
)
*
input_shape
[
0
]
+
h
)
*
input_shape
[
1
]
+
w
;
output
[
offset
]
=
*
input_ptr
;
++
input_ptr
;
}
...
...
@@ -30,7 +32,7 @@ void TransposeFilter(const std::vector<float> &input,
}
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
void
WinogradConvolution
(
const
index_t
batch
,
const
index_t
height
,
const
index_t
width
,
...
...
@@ -53,8 +55,7 @@ void WinogradConvolution(const index_t batch,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
CONV2D_FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
...
...
@@ -78,8 +79,10 @@ void WinogradConvolution(const index_t batch,
// transform filter
std
::
vector
<
float
>
wino_filter_data
;
TransposeFilter
(
filter_data
,
filter_shape
,
wino_filter_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"WinoFilterData"
,
{
out_channels
,
in_channels
,
3
,
3
},
wino_filter_data
);
BufferToImage
<
D
,
T
>
(
net
,
"WinoFilterData"
,
"WinoFilter"
,
kernels
::
BufferType
::
WINOGRAD_FILTER
);
net
.
AddInputFromArray
<
D
,
float
>
(
"WinoFilterData"
,
{
out_channels
,
in_channels
,
3
,
3
},
wino_filter_data
);
BufferToImage
<
D
,
T
>
(
net
,
"WinoFilterData"
,
"WinoFilter"
,
kernels
::
BufferType
::
WINOGRAD_FILTER
);
// transform input
OpDefBuilder
(
"WinogradTransform"
,
"WinogradTransformTest"
)
...
...
@@ -126,18 +129,23 @@ void WinogradConvolution(const index_t batch,
}
TEST_F
(
WinogradConvlutionTest
,
AlignedConvolution
)
{
WinogradConvolution
<
DeviceType
::
OPENCL
,
float
>
(
1
,
32
,
32
,
32
,
16
,
Padding
::
VALID
);
WinogradConvolution
<
DeviceType
::
OPENCL
,
float
>
(
1
,
32
,
32
,
32
,
16
,
Padding
::
SAME
);
WinogradConvolution
<
DeviceType
::
OPENCL
,
float
>
(
1
,
32
,
32
,
32
,
16
,
Padding
::
VALID
);
WinogradConvolution
<
DeviceType
::
OPENCL
,
float
>
(
1
,
32
,
32
,
32
,
16
,
Padding
::
SAME
);
}
TEST_F
(
WinogradConvlutionTest
,
UnAlignedConvolution
)
{
WinogradConvolution
<
DeviceType
::
OPENCL
,
float
>
(
1
,
61
,
67
,
31
,
37
,
Padding
::
VALID
);
WinogradConvolution
<
DeviceType
::
OPENCL
,
float
>
(
1
,
61
,
67
,
37
,
31
,
Padding
::
SAME
);
WinogradConvolution
<
DeviceType
::
OPENCL
,
float
>
(
1
,
61
,
67
,
31
,
37
,
Padding
::
VALID
);
WinogradConvolution
<
DeviceType
::
OPENCL
,
float
>
(
1
,
61
,
67
,
37
,
31
,
Padding
::
SAME
);
}
TEST_F
(
WinogradConvlutionTest
,
BatchConvolution
)
{
WinogradConvolution
<
DeviceType
::
OPENCL
,
float
>
(
3
,
64
,
64
,
32
,
32
,
Padding
::
VALID
);
WinogradConvolution
<
DeviceType
::
OPENCL
,
float
>
(
5
,
61
,
67
,
37
,
31
,
Padding
::
SAME
);
WinogradConvolution
<
DeviceType
::
OPENCL
,
float
>
(
3
,
64
,
64
,
32
,
32
,
Padding
::
VALID
);
WinogradConvolution
<
DeviceType
::
OPENCL
,
float
>
(
5
,
61
,
67
,
37
,
31
,
Padding
::
SAME
);
}
}
mace/ops/winograd_inverse_transform.h
浏览文件 @
6b9aafd4
...
...
@@ -8,12 +8,12 @@
#include <memory>
#include "mace/core/operator.h"
#include "mace/kernels/winograd_transform.h"
#include "mace/kernels/activation.h"
#include "mace/kernels/winograd_transform.h"
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
class
WinogradInverseTransformOp
:
public
Operator
<
D
,
T
>
{
public:
WinogradInverseTransformOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
...
...
mace/ops/winograd_transform.h
浏览文件 @
6b9aafd4
...
...
@@ -12,14 +12,14 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
class
WinogradTransformOp
:
public
Operator
<
D
,
T
>
{
public:
WinogradTransformOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
op_def
,
ws
),
functor_
(
static_cast
<
Padding
>
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"padding"
,
static_cast
<
int
>
(
VALID
))),
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"padding_values"
))
{}
"padding"
,
static_cast
<
int
>
(
VALID
))),
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"padding_values"
))
{}
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
input_tensor
=
this
->
Input
(
INPUT
);
...
...
mace/ops/winograd_transform_benchmark.cc
浏览文件 @
6b9aafd4
...
...
@@ -16,7 +16,7 @@ static void BMWinogradTransform(
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
OpDefBuilder
(
"WinogradTransform"
,
"WinogradTransformTest"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
...
...
@@ -36,17 +36,15 @@ static void BMWinogradTransform(
net
.
Sync
();
}
#define BM_WINOGRAD_TRANSFORM_MACRO(N, H, W, C, TYPE, DEVICE) \
static void \
BM_WINOGRAD_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::MaccProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMWinogradTransform<DEVICE, TYPE>(iters, N, H, W, C); \
} \
BENCHMARK( \
BM_WINOGRAD_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE)
#define BM_WINOGRAD_TRANSFORM_MACRO(N, H, W, C, TYPE, DEVICE) \
static void BM_WINOGRAD_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::MaccProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMWinogradTransform<DEVICE, TYPE>(iters, N, H, W, C); \
} \
BENCHMARK(BM_WINOGRAD_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE)
#define BM_WINOGRAD_TRANSFORM(N, H, W, C) \
BM_WINOGRAD_TRANSFORM_MACRO(N, H, W, C, half, OPENCL);
...
...
@@ -88,16 +86,16 @@ static void BMWinogradInverseTransform(
net
.
Sync
();
}
#define BM_WINOGRAD_INVERSE_TRANSFORM_MACRO(N, H, W, C, TYPE, DEVICE)
\
static void \
BM_WINOGRAD_INVERSE_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE(
\
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::MaccProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMWinogradInverseTransform<DEVICE, TYPE>(iters, N, H, W, C);
\
} \
BENCHMARK( \
#define BM_WINOGRAD_INVERSE_TRANSFORM_MACRO(N, H, W, C, TYPE, DEVICE) \
static void
\
BM_WINOGRAD_INVERSE_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE( \
int iters) {
\
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W;
\
mace::testing::MaccProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE)));
\
BMWinogradInverseTransform<DEVICE, TYPE>(iters, N, H, W, C); \
}
\
BENCHMARK(
\
BM_WINOGRAD_INVERSE_TRANSFORM_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE)
#define BM_WINOGRAD_INVERSE_TRANSFORM(N, H, W, C) \
...
...
mace/public/mace.h
浏览文件 @
6b9aafd4
...
...
@@ -6,10 +6,10 @@
#define MACE_CORE_MACE_H_
#include <cstdint>
#include <vector>
#include <string>
#include <memory>
#include <map>
#include <memory>
#include <string>
#include <vector>
namespace
mace
{
...
...
@@ -25,13 +25,11 @@ namespace mace {
#define MACE_STR(x) MACE_STR_HELPER(x)
// e.g. "0.5.0" or "0.6.0-alpha".
#define MACE_VERSION_STRING \
#define MACE_VERSION_STRING
\
(MACE_STR(MACE_MAJOR_VERSION) "." MACE_STR(MACE_MINOR_VERSION) "." MACE_STR( \
MACE_PATCH_VERSION) MACE_VERSION_SUFFIX)
inline
const
char
*
MaceVersion
()
{
return
MACE_VERSION_STRING
;
}
inline
const
char
*
MaceVersion
()
{
return
MACE_VERSION_STRING
;
}
extern
const
char
*
MaceGitVersion
();
...
...
@@ -43,17 +41,9 @@ extern const char *MaceGitVersion();
classname &operator=(const classname &) = delete
#endif
enum
NetMode
{
INIT
=
0
,
NORMAL
=
1
};
enum
NetMode
{
INIT
=
0
,
NORMAL
=
1
};
enum
DeviceType
{
CPU
=
0
,
NEON
=
1
,
OPENCL
=
2
,
HEXAGON
=
3
};
enum
DeviceType
{
CPU
=
0
,
NEON
=
1
,
OPENCL
=
2
,
HEXAGON
=
3
};
enum
DataType
{
DT_INVALID
=
0
,
...
...
@@ -104,6 +94,7 @@ class Argument {
public:
Argument
();
void
CopyFrom
(
const
Argument
&
from
);
public:
const
std
::
string
&
name
()
const
;
void
set_name
(
const
std
::
string
&
value
);
...
...
@@ -147,11 +138,13 @@ class NodeInput {
NodeInput
()
{}
NodeInput
(
int
node_id
,
int
output_port
);
void
CopyFrom
(
const
NodeInput
&
from
);
public:
int
node_id
()
const
;
void
set_node_id
(
int
node_id
);
int
output_port
()
const
;
void
set_output_port
(
int
output_port
);
private:
int
node_id_
;
int
output_port_
;
...
...
@@ -162,8 +155,10 @@ class OutputShape {
OutputShape
();
OutputShape
(
const
std
::
vector
<
int64_t
>
&
dims
);
void
CopyFrom
(
const
OutputShape
&
from
);
public:
const
std
::
vector
<
int64_t
>
&
dims
()
const
;
private:
std
::
vector
<
int64_t
>
dims_
;
};
...
...
@@ -240,10 +235,12 @@ class OperatorDef {
class
MemoryBlock
{
public:
MemoryBlock
(
int
mem_id
,
uint32_t
x
,
uint32_t
y
);
public:
int
mem_id
()
const
;
uint32_t
x
()
const
;
uint32_t
y
()
const
;
private:
int
mem_id_
;
uint32_t
x_
;
...
...
@@ -255,9 +252,9 @@ class MemoryArena {
const
std
::
vector
<
MemoryBlock
>
&
mem_block
()
const
;
std
::
vector
<
MemoryBlock
>
&
mutable_mem_block
();
int
mem_block_size
()
const
;
private:
std
::
vector
<
MemoryBlock
>
mem_block_
;
};
// for hexagon mace-nnlib
...
...
@@ -268,10 +265,11 @@ class InputInfo {
int32_t
max_byte_size
()
const
;
DataType
data_type
()
const
;
const
std
::
vector
<
int32_t
>
&
dims
()
const
;
private:
std
::
string
name_
;
int32_t
node_id_
;
int32_t
max_byte_size_
;
// only support 32-bit len
int32_t
max_byte_size_
;
// only support 32-bit len
DataType
data_type_
;
std
::
vector
<
int32_t
>
dims_
;
};
...
...
@@ -285,10 +283,11 @@ class OutputInfo {
void
set_data_type
(
DataType
data_type
);
const
std
::
vector
<
int32_t
>
&
dims
()
const
;
void
set_dims
(
const
std
::
vector
<
int32_t
>
&
dims
);
private:
std
::
string
name_
;
int32_t
node_id_
;
int32_t
max_byte_size_
;
// only support 32-bit len
int32_t
max_byte_size_
;
// only support 32-bit len
DataType
data_type_
;
std
::
vector
<
int32_t
>
dims_
;
};
...
...
@@ -299,6 +298,7 @@ class NetDef {
int
op_size
()
const
;
const
OperatorDef
&
op
(
const
int
idx
)
const
;
public:
const
std
::
string
&
name
()
const
;
bool
has_name
()
const
;
...
...
@@ -359,7 +359,6 @@ struct RunMetadata {
std
::
vector
<
OperatorStats
>
op_stats
;
};
class
Workspace
;
class
NetBase
;
class
OperatorRegistry
;
...
...
@@ -374,8 +373,7 @@ struct MaceInputInfo {
class
MaceEngine
{
public:
// Single input and output
explicit
MaceEngine
(
const
NetDef
*
net_def
,
DeviceType
device_type
);
explicit
MaceEngine
(
const
NetDef
*
net_def
,
DeviceType
device_type
);
// Multiple input or output
explicit
MaceEngine
(
const
NetDef
*
net_def
,
DeviceType
device_type
,
...
...
@@ -394,7 +392,7 @@ class MaceEngine {
// Multiple input or output
bool
Run
(
const
std
::
vector
<
MaceInputInfo
>
&
input
,
std
::
map
<
std
::
string
,
float
*>
&
output
,
RunMetadata
*
run_metadata
=
nullptr
);
RunMetadata
*
run_metadata
=
nullptr
);
MaceEngine
(
const
MaceEngine
&
)
=
delete
;
MaceEngine
&
operator
=
(
const
MaceEngine
&
)
=
delete
;
...
...
mace/utils/command_line_flags.h
浏览文件 @
6b9aafd4
...
...
@@ -45,7 +45,7 @@ class Flags {
// Return a usage message with command line cmdline, and the
// usage_text strings in flag_list[].
static
std
::
string
Usage
(
const
std
::
string
&
cmdline
,
const
std
::
vector
<
Flag
>
&
flag_list
);
const
std
::
vector
<
Flag
>
&
flag_list
);
};
}
// namespace mace
...
...
mace/utils/env_time.h
浏览文件 @
6b9aafd4
...
...
@@ -9,7 +9,6 @@
#include <sys/time.h>
#include <time.h>
namespace
mace
{
inline
int64_t
NowMicros
()
{
...
...
mace/utils/logging.h
浏览文件 @
6b9aafd4
...
...
@@ -10,8 +10,8 @@
#include <string>
#include <vector>
#include "mace/utils/env_time.h"
#include "mace/public/mace.h"
#include "mace/utils/env_time.h"
#include "mace/utils/string_util.h"
#undef ERROR
...
...
mace/utils/string_util.h
浏览文件 @
6b9aafd4
...
...
@@ -27,7 +27,7 @@ inline void MakeStringInternal(std::stringstream &ss,
MakeStringInternal
(
ss
,
args
...);
}
}
// namespace
}
// namespace
template
<
typename
...
Args
>
std
::
string
MakeString
(
const
Args
&
...
args
)
{
...
...
mace/utils/timer.h
浏览文件 @
6b9aafd4
...
...
@@ -24,13 +24,9 @@ class WallClockTimer : public Timer {
public:
WallClockTimer
()
:
accumulated_micros_
(
0
)
{}
void
StartTiming
()
override
{
start_micros_
=
NowMicros
();
}
void
StartTiming
()
override
{
start_micros_
=
NowMicros
();
}
void
StopTiming
()
override
{
stop_micros_
=
NowMicros
();
}
void
StopTiming
()
override
{
stop_micros_
=
NowMicros
();
}
void
AccumulateTiming
()
override
{
StopTiming
();
...
...
@@ -43,13 +39,9 @@ class WallClockTimer : public Timer {
accumulated_micros_
=
0
;
}
double
ElapsedMicros
()
override
{
return
stop_micros_
-
start_micros_
;
}
double
ElapsedMicros
()
override
{
return
stop_micros_
-
start_micros_
;
}
double
AccumulatedMicros
()
override
{
return
accumulated_micros_
;
}
double
AccumulatedMicros
()
override
{
return
accumulated_micros_
;
}
private:
double
start_micros_
;
...
...
mace/utils/tuner_test.cc
浏览文件 @
6b9aafd4
...
...
@@ -30,20 +30,14 @@ TEST_F(TunerTest, SimpleRun) {
WallClockTimer
timer
;
std
::
vector
<
unsigned
int
>
default_params
(
1
,
1
);
int
res
=
Tuner
<
unsigned
int
>::
Get
()
->
template
TuneOrRun
<
unsigned
int
>(
"SimpleRun"
,
default_params
,
nullptr
,
TunerFunc
,
&
timer
);
int
res
=
Tuner
<
unsigned
int
>::
Get
()
->
template
TuneOrRun
<
unsigned
int
>(
"SimpleRun"
,
default_params
,
nullptr
,
TunerFunc
,
&
timer
);
EXPECT_EQ
(
expect
,
res
);
default_params
[
0
]
=
2
;
res
=
Tuner
<
unsigned
int
>::
Get
()
->
template
TuneOrRun
<
unsigned
int
>(
"SimpleRun"
,
default_params
,
nullptr
,
TunerFunc
,
&
timer
);
res
=
Tuner
<
unsigned
int
>::
Get
()
->
template
TuneOrRun
<
unsigned
int
>(
"SimpleRun"
,
default_params
,
nullptr
,
TunerFunc
,
&
timer
);
EXPECT_EQ
(
expect
+
1
,
res
);
}
...
...
@@ -64,20 +58,13 @@ TEST_F(TunerTest, SimpleTune) {
};
// tune
WallClockTimer
timer
;
int
res
=
Tuner
<
unsigned
int
>::
Get
()
->
template
TuneOrRun
<
unsigned
int
>(
"SimpleRun"
,
default_params
,
*
params_generator
,
TunerFunc
,
&
timer
);
int
res
=
Tuner
<
unsigned
int
>::
Get
()
->
template
TuneOrRun
<
unsigned
int
>(
"SimpleRun"
,
default_params
,
*
params_generator
,
TunerFunc
,
&
timer
);
EXPECT_EQ
(
expect
,
res
);
// run
res
=
Tuner
<
unsigned
int
>::
Get
()
->
template
TuneOrRun
<
unsigned
int
>(
"SimpleRun"
,
default_params
,
nullptr
,
TunerFunc
,
&
timer
);
res
=
Tuner
<
unsigned
int
>::
Get
()
->
template
TuneOrRun
<
unsigned
int
>(
"SimpleRun"
,
default_params
,
nullptr
,
TunerFunc
,
&
timer
);
EXPECT_EQ
(
expect
,
res
);
}
...
...
mace/utils/utils.h
浏览文件 @
6b9aafd4
...
...
@@ -62,9 +62,9 @@ inline std::string ObfuscateSymbol(const std::string &src) {
if
(
dest
.
empty
())
{
return
dest
;
}
dest
[
0
]
=
src
[
0
];
// avoid invalid symbol which starts from 0-9
dest
[
0
]
=
src
[
0
];
// avoid invalid symbol which starts from 0-9
const
std
::
string
encode_dict
=
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_"
;
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_"
;
for
(
size_t
i
=
1
;
i
<
src
.
size
();
i
++
)
{
char
ch
=
src
[
i
];
int
idx
;
...
...
mace/utils/utils_test.cc
浏览文件 @
6b9aafd4
...
...
@@ -30,20 +30,14 @@ TEST_F(TunerTest, SimpleRun) {
WallClockTimer
timer
;
std
::
vector
<
unsigned
int
>
default_params
(
1
,
1
);
int
res
=
Tuner
<
unsigned
int
>::
Get
()
->
template
TuneOrRun
<
unsigned
int
>(
"SimpleRun"
,
default_params
,
nullptr
,
TunerFunc
,
&
timer
);
int
res
=
Tuner
<
unsigned
int
>::
Get
()
->
template
TuneOrRun
<
unsigned
int
>(
"SimpleRun"
,
default_params
,
nullptr
,
TunerFunc
,
&
timer
);
EXPECT_EQ
(
expect
,
res
);
default_params
[
0
]
=
2
;
res
=
Tuner
<
unsigned
int
>::
Get
()
->
template
TuneOrRun
<
unsigned
int
>(
"SimpleRun"
,
default_params
,
nullptr
,
TunerFunc
,
&
timer
);
res
=
Tuner
<
unsigned
int
>::
Get
()
->
template
TuneOrRun
<
unsigned
int
>(
"SimpleRun"
,
default_params
,
nullptr
,
TunerFunc
,
&
timer
);
EXPECT_EQ
(
expect
+
1
,
res
);
}
...
...
@@ -64,20 +58,13 @@ TEST_F(TunerTest, SimpleTune) {
};
// tune
WallClockTimer
timer
;
int
res
=
Tuner
<
unsigned
int
>::
Get
()
->
template
TuneOrRun
<
unsigned
int
>(
"SimpleRun"
,
default_params
,
*
params_generator
,
TunerFunc
,
&
timer
);
int
res
=
Tuner
<
unsigned
int
>::
Get
()
->
template
TuneOrRun
<
unsigned
int
>(
"SimpleRun"
,
default_params
,
*
params_generator
,
TunerFunc
,
&
timer
);
EXPECT_EQ
(
expect
,
res
);
// run
res
=
Tuner
<
unsigned
int
>::
Get
()
->
template
TuneOrRun
<
unsigned
int
>(
"SimpleRun"
,
default_params
,
nullptr
,
TunerFunc
,
&
timer
);
res
=
Tuner
<
unsigned
int
>::
Get
()
->
template
TuneOrRun
<
unsigned
int
>(
"SimpleRun"
,
default_params
,
nullptr
,
TunerFunc
,
&
timer
);
EXPECT_EQ
(
expect
,
res
);
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录