Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
6376fe6f
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
6376fe6f
编写于
5月 09, 2018
作者:
W
wuchenghui
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix warnings
上级
22676b19
变更
72
显示空白变更内容
内联
并排
Showing
72 changed file
with
238 addition
and
484 deletion
+238
-484
WORKSPACE
WORKSPACE
+12
-11
mace/benchmark/statistics.cc
mace/benchmark/statistics.cc
+3
-3
mace/benchmark/statistics.h
mace/benchmark/statistics.h
+6
-6
mace/core/allocator.h
mace/core/allocator.h
+10
-1
mace/core/buffer.h
mace/core/buffer.h
+28
-4
mace/core/net.cc
mace/core/net.cc
+5
-1
mace/core/runtime/cpu/cpu_runtime.cc
mace/core/runtime/cpu/cpu_runtime.cc
+7
-37
mace/core/runtime/opencl/opencl_runtime.cc
mace/core/runtime/opencl/opencl_runtime.cc
+26
-23
mace/core/runtime/opencl/opencl_runtime.h
mace/core/runtime/opencl/opencl_runtime.h
+6
-7
mace/kernels/activation.h
mace/kernels/activation.h
+1
-0
mace/kernels/addn.h
mace/kernels/addn.h
+1
-0
mace/kernels/arm/conv_2d_neon_3x3.cc
mace/kernels/arm/conv_2d_neon_3x3.cc
+9
-4
mace/kernels/arm/conv_2d_neon_5x5.cc
mace/kernels/arm/conv_2d_neon_5x5.cc
+3
-1
mace/kernels/arm/conv_2d_neon_7x7.cc
mace/kernels/arm/conv_2d_neon_7x7.cc
+9
-3
mace/kernels/arm/depthwise_conv2d_neon_3x3.cc
mace/kernels/arm/depthwise_conv2d_neon_3x3.cc
+9
-0
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+1
-0
mace/kernels/bias_add.h
mace/kernels/bias_add.h
+1
-0
mace/kernels/buffer_to_image.h
mace/kernels/buffer_to_image.h
+4
-0
mace/kernels/channel_shuffle.h
mace/kernels/channel_shuffle.h
+2
-1
mace/kernels/concat.h
mace/kernels/concat.h
+3
-2
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+5
-4
mace/kernels/depth_to_space.h
mace/kernels/depth_to_space.h
+1
-0
mace/kernels/depthwise_conv2d.h
mace/kernels/depthwise_conv2d.h
+1
-0
mace/kernels/eltwise.h
mace/kernels/eltwise.h
+1
-0
mace/kernels/fully_connected.h
mace/kernels/fully_connected.h
+1
-0
mace/kernels/gemm.cc
mace/kernels/gemm.cc
+3
-2
mace/kernels/local_response_norm.h
mace/kernels/local_response_norm.h
+1
-0
mace/kernels/matmul.h
mace/kernels/matmul.h
+1
-8
mace/kernels/opencl/addn.cc
mace/kernels/opencl/addn.cc
+1
-1
mace/kernels/opencl/concat.cc
mace/kernels/opencl/concat.cc
+0
-1
mace/kernels/opencl/conv_2d_1x1.cc
mace/kernels/opencl/conv_2d_1x1.cc
+2
-0
mace/kernels/opencl/depthwise_conv.cc
mace/kernels/opencl/depthwise_conv.cc
+0
-1
mace/kernels/opencl/eltwise.cc
mace/kernels/opencl/eltwise.cc
+1
-0
mace/kernels/opencl/helper.cc
mace/kernels/opencl/helper.cc
+0
-11
mace/kernels/opencl/helper.h
mace/kernels/opencl/helper.h
+2
-2
mace/kernels/opencl/matmul.cc
mace/kernels/opencl/matmul.cc
+1
-0
mace/kernels/opencl/out_of_range_check_test.cc
mace/kernels/opencl/out_of_range_check_test.cc
+4
-4
mace/kernels/opencl/pad.cc
mace/kernels/opencl/pad.cc
+2
-1
mace/kernels/opencl/slice.cc
mace/kernels/opencl/slice.cc
+1
-1
mace/kernels/pad.h
mace/kernels/pad.h
+3
-1
mace/kernels/pooling.h
mace/kernels/pooling.h
+1
-0
mace/kernels/proposal.h
mace/kernels/proposal.h
+3
-2
mace/kernels/psroi_align.h
mace/kernels/psroi_align.h
+1
-0
mace/kernels/quantize.h
mace/kernels/quantize.h
+3
-0
mace/kernels/reshape.h
mace/kernels/reshape.h
+1
-0
mace/kernels/resize_bilinear.h
mace/kernels/resize_bilinear.h
+1
-0
mace/kernels/slice.h
mace/kernels/slice.h
+1
-0
mace/kernels/softmax.h
mace/kernels/softmax.h
+1
-0
mace/kernels/space_to_batch.h
mace/kernels/space_to_batch.h
+4
-0
mace/kernels/transpose.h
mace/kernels/transpose.h
+1
-0
mace/kernels/winograd_transform.h
mace/kernels/winograd_transform.h
+6
-0
mace/ops/batch_to_space.cc
mace/ops/batch_to_space.cc
+2
-0
mace/ops/batch_to_space.h
mace/ops/batch_to_space.h
+1
-2
mace/ops/channel_shuffle.h
mace/ops/channel_shuffle.h
+0
-1
mace/ops/folded_batch_norm_test.cc
mace/ops/folded_batch_norm_test.cc
+1
-1
mace/ops/proposal_test.cc
mace/ops/proposal_test.cc
+1
-1
mace/ops/quantize_test.cc
mace/ops/quantize_test.cc
+0
-2
mace/ops/space_to_batch.cc
mace/ops/space_to_batch.cc
+2
-0
mace/ops/space_to_batch.h
mace/ops/space_to_batch.h
+1
-2
mace/ops/transpose.h
mace/ops/transpose.h
+3
-3
mace/ops/winograd_inverse_transform.cc
mace/ops/winograd_inverse_transform.cc
+2
-0
mace/ops/winograd_transform.cc
mace/ops/winograd_transform.cc
+2
-0
mace/tools/validation/BUILD
mace/tools/validation/BUILD
+5
-3
mace/tools/validation/mace_run.cc
mace/tools/validation/mace_run.cc
+2
-3
mace/utils/BUILD
mace/utils/BUILD
+1
-21
mace/utils/command_line_flags.cc
mace/utils/command_line_flags.cc
+0
-232
mace/utils/command_line_flags.h
mace/utils/command_line_flags.h
+0
-63
mace/utils/string_util.h
mace/utils/string_util.h
+1
-1
mace/utils/tuner.h
mace/utils/tuner.h
+2
-2
mace/utils/tuner_production.cc
mace/utils/tuner_production.cc
+1
-0
mace/utils/tuner_test.cc
mace/utils/tuner_test.cc
+1
-1
tools/sh_commands.py
tools/sh_commands.py
+10
-3
未找到文件。
WORKSPACE
浏览文件 @
6376fe6f
...
...
@@ -5,11 +5,11 @@ workspace(name = "mace")
# This statement defines the @com_google_protobuf repo.
http_archive
(
name
=
"com_google_protobuf"
,
sha256
=
"
40d39d97a7b514b3e34daef732f822eca0081960b269863f5b573db5548cb237
"
,
strip_prefix
=
"protobuf-3.4.0
rc3
"
,
sha256
=
"
542703acadc3f690d998f4641e1b988f15ba57ebca05fdfb1cd9095bec007948
"
,
strip_prefix
=
"protobuf-3.4.0"
,
urls
=
[
"https://cnbj1.fds.api.xiaomi.com/mace/third-party/protobuf/protobuf-3.4.0
rc3
.zip"
,
"https://github.com/google/protobuf/archive/v3.4.0
rc3
.zip"
"https://cnbj1.fds.api.xiaomi.com/mace/third-party/protobuf/protobuf-3.4.0.zip"
,
"https://github.com/google/protobuf/archive/v3.4.0.zip"
],
)
...
...
@@ -38,21 +38,22 @@ new_http_archive(
new_http_archive
(
name
=
"opencl_clhpp"
,
build_file
=
"mace/third_party/opencl-clhpp/opencl-clhpp.BUILD"
,
sha256
=
"d
4eb63372ad31f7efcae626852f75f7929ff28d1cabb5f50ef11035963a69b46
"
,
strip_prefix
=
"OpenCL-CLHPP-
2.0.10
"
,
sha256
=
"d
ab6f1834ec6e3843438cc0f97d63817902aadd04566418c1fcc7fb78987d4e7
"
,
strip_prefix
=
"OpenCL-CLHPP-
4c6f7d56271727e37fb19a9b47649dd175df2b12
"
,
urls
=
[
"https://cnbj1.fds.api.xiaomi.com/mace/third-party/OpenCL-CLHPP/OpenCL-CLHPP-
2.0.10
.zip"
,
"https://github.com/KhronosGroup/OpenCL-CLHPP/archive/
v2.0.10
.zip"
"https://cnbj1.fds.api.xiaomi.com/mace/third-party/OpenCL-CLHPP/OpenCL-CLHPP-
4c6f7d56271727e37fb19a9b47649dd175df2b12
.zip"
,
"https://github.com/KhronosGroup/OpenCL-CLHPP/archive/
4c6f7d56271727e37fb19a9b47649dd175df2b12
.zip"
],
)
new_http_archive
(
name
=
"half"
,
build_file
=
"mace/third_party/half/half.BUILD"
,
sha256
=
"cdd70d3bf3fe091b688e7ab3f48471c881a197d2c186c95cca8bf156961fb41c"
,
sha256
=
"0f514a1e877932b21dc5edc26a148ddc700b6af2facfed4c030ca72f74d0219e"
,
strip_prefix
=
"half-code-356-trunk"
,
urls
=
[
"https://cnbj1.fds.api.xiaomi.com/mace/third-party/half/half-
1.12.0
.zip"
,
"https://
jaist.dl.sourceforge.net/project/half/half/1.12.0/half-1.12.0
.zip"
"https://cnbj1.fds.api.xiaomi.com/mace/third-party/half/half-
code-356-trunk
.zip"
,
"https://
sourceforge.net/code-snapshots/svn/h/ha/half/code/half-code-356-trunk
.zip"
],
)
...
...
mace/benchmark/statistics.cc
浏览文件 @
6376fe6f
...
...
@@ -58,9 +58,9 @@ std::string ShapeToString(const std::vector<OutputShape> &output_shape) {
std
::
stringstream
stream
;
stream
<<
"["
;
for
(
in
t
i
=
0
;
i
<
output_shape
.
size
();
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
output_shape
.
size
();
++
i
)
{
const
std
::
vector
<
index_t
>
&
dims
=
output_shape
[
i
].
dims
();
for
(
in
t
j
=
0
;
j
<
dims
.
size
();
++
j
)
{
for
(
size_
t
j
=
0
;
j
<
dims
.
size
();
++
j
)
{
stream
<<
dims
[
j
];
if
(
j
!=
dims
.
size
()
-
1
)
{
stream
<<
","
;
...
...
@@ -83,7 +83,7 @@ std::string VectorToString(const std::vector<T> &vec) {
std
::
stringstream
stream
;
stream
<<
"["
;
for
(
in
t
i
=
0
;
i
<
vec
.
size
();
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
vec
.
size
();
++
i
)
{
stream
<<
vec
[
i
];
if
(
i
!=
vec
.
size
()
-
1
)
{
stream
<<
","
;
...
...
mace/benchmark/statistics.h
浏览文件 @
6376fe6f
...
...
@@ -54,24 +54,24 @@ class TimeInfo {
sum_
(
0
),
square_sum
(
0
)
{}
const
int64_t
round
()
const
{
int64_t
round
()
const
{
return
round_
;
}
const
T
first
()
const
{
T
first
()
const
{
return
first_
;
}
const
T
sum
()
const
{
T
sum
()
const
{
return
sum_
;
}
const
double
avg
()
const
{
double
avg
()
const
{
return
round_
==
0
?
std
::
numeric_limits
<
double
>::
quiet_NaN
()
:
sum_
*
1.0
f
/
round_
;
}
const
double
std_deviation
()
const
{
double
std_deviation
()
const
{
if
(
round_
==
0
||
min_
==
max_
)
{
return
0
;
}
...
...
@@ -111,12 +111,12 @@ class TimeInfo {
}
private:
int64_t
round_
;
T
first_
;
T
curr_
;
T
min_
;
T
max_
;
T
sum_
;
int64_t
round_
;
double
square_sum
;
};
...
...
mace/core/allocator.h
浏览文件 @
6376fe6f
...
...
@@ -21,6 +21,7 @@
#include <vector>
#include <cstring>
#include "mace/core/macros.h"
#include "mace/core/registry.h"
#include "mace/core/types.h"
#include "mace/public/mace.h"
...
...
@@ -83,6 +84,8 @@ class CPUAllocator : public Allocator {
void
*
NewImage
(
const
std
::
vector
<
size_t
>
&
shape
,
const
DataType
dt
)
const
override
{
MACE_UNUSED
(
shape
);
MACE_UNUSED
(
dt
);
LOG
(
FATAL
)
<<
"Allocate CPU image"
;
return
nullptr
;
}
...
...
@@ -96,14 +99,20 @@ class CPUAllocator : public Allocator {
free
(
data
);
};
void
*
Map
(
void
*
buffer
,
size_t
offset
,
size_t
nbytes
)
const
override
{
MACE_UNUSED
(
nbytes
);
return
reinterpret_cast
<
char
*>
(
buffer
)
+
offset
;
}
void
*
MapImage
(
void
*
buffer
,
const
std
::
vector
<
size_t
>
&
image_shape
,
std
::
vector
<
size_t
>
*
mapped_image_pitch
)
const
override
{
MACE_UNUSED
(
image_shape
);
MACE_UNUSED
(
mapped_image_pitch
);
return
buffer
;
}
void
Unmap
(
void
*
buffer
,
void
*
mapper_ptr
)
const
override
{}
void
Unmap
(
void
*
buffer
,
void
*
mapper_ptr
)
const
override
{
MACE_UNUSED
(
buffer
);
MACE_UNUSED
(
mapper_ptr
);
}
bool
OnHost
()
const
override
{
return
true
;
}
};
...
...
mace/core/buffer.h
浏览文件 @
6376fe6f
...
...
@@ -21,6 +21,7 @@
#include <functional>
#include "mace/core/allocator.h"
#include "mace/core/macros.h"
#include "mace/core/types.h"
namespace
mace
{
...
...
@@ -133,6 +134,7 @@ class Buffer : public BufferBase {
void
*
Map
(
index_t
offset
,
index_t
length
,
std
::
vector
<
size_t
>
*
pitch
)
const
{
MACE_CHECK_NOTNULL
(
buf_
);
MACE_UNUSED
(
pitch
);
return
allocator_
->
Map
(
buf_
,
offset
,
length
);
}
...
...
@@ -232,6 +234,9 @@ class Image : public BufferBase {
std
::
vector
<
size_t
>
image_shape
()
const
{
return
shape_
;
}
void
*
Map
(
index_t
offset
,
index_t
length
,
std
::
vector
<
size_t
>
*
pitch
)
const
{
MACE_UNUSED
(
offset
);
MACE_UNUSED
(
length
);
MACE_UNUSED
(
pitch
);
MACE_NOT_IMPLEMENTED
;
return
nullptr
;
}
...
...
@@ -254,9 +259,17 @@ class Image : public BufferBase {
mapped_buf_
=
nullptr
;
}
void
Resize
(
index_t
size
)
{
MACE_NOT_IMPLEMENTED
;
}
void
Resize
(
index_t
size
)
{
MACE_UNUSED
(
size
);
MACE_NOT_IMPLEMENTED
;
}
void
Copy
(
void
*
src
,
index_t
offset
,
index_t
length
)
{
MACE_NOT_IMPLEMENTED
;
}
void
Copy
(
void
*
src
,
index_t
offset
,
index_t
length
)
{
MACE_UNUSED
(
src
);
MACE_UNUSED
(
offset
);
MACE_UNUSED
(
length
);
MACE_NOT_IMPLEMENTED
;
}
bool
OnHost
()
const
{
return
allocator_
->
OnHost
();
}
...
...
@@ -327,11 +340,17 @@ class BufferSlice : public BufferBase {
}
void
*
Map
(
index_t
offset
,
index_t
length
,
std
::
vector
<
size_t
>
*
pitch
)
const
{
MACE_UNUSED
(
offset
);
MACE_UNUSED
(
length
);
MACE_UNUSED
(
pitch
);
MACE_NOT_IMPLEMENTED
;
return
nullptr
;
}
void
UnMap
(
void
*
mapped_ptr
)
const
{
MACE_NOT_IMPLEMENTED
;
}
void
UnMap
(
void
*
mapped_ptr
)
const
{
MACE_UNUSED
(
mapped_ptr
);
MACE_NOT_IMPLEMENTED
;
}
void
Map
(
std
::
vector
<
size_t
>
*
pitch
)
{
MACE_CHECK_NOTNULL
(
buffer_
);
...
...
@@ -350,7 +369,12 @@ class BufferSlice : public BufferBase {
" to "
,
size
,
" is illegal"
);
}
void
Copy
(
void
*
src
,
index_t
offset
,
index_t
length
)
{
MACE_NOT_IMPLEMENTED
;
}
void
Copy
(
void
*
src
,
index_t
offset
,
index_t
length
)
{
MACE_UNUSED
(
src
);
MACE_UNUSED
(
offset
);
MACE_UNUSED
(
length
);
MACE_NOT_IMPLEMENTED
;
}
index_t
offset
()
const
{
return
offset_
;
}
...
...
mace/core/net.cc
浏览文件 @
6376fe6f
...
...
@@ -14,6 +14,7 @@
#include <utility>
#include "mace/core/macros.h"
#include "mace/core/net.h"
#include "mace/utils/memory_logging.h"
#include "mace/utils/timer.h"
...
...
@@ -25,7 +26,10 @@ NetBase::NetBase(const std::shared_ptr<const OperatorRegistry> op_registry,
const
std
::
shared_ptr
<
const
NetDef
>
net_def
,
Workspace
*
ws
,
DeviceType
type
)
:
name_
(
net_def
->
name
()),
op_registry_
(
op_registry
)
{}
:
name_
(
net_def
->
name
()),
op_registry_
(
op_registry
)
{
MACE_UNUSED
(
ws
);
MACE_UNUSED
(
type
);
}
SerialNet
::
SerialNet
(
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
const
std
::
shared_ptr
<
const
NetDef
>
net_def
,
...
...
mace/core/runtime/cpu/cpu_runtime.cc
浏览文件 @
6376fe6f
...
...
@@ -33,6 +33,7 @@ namespace mace {
namespace
{
#ifndef MACE_ENABLE_OPENMP
int
GetCPUCount
()
{
char
path
[
32
];
int
cpu_count
=
0
;
...
...
@@ -50,12 +51,14 @@ int GetCPUCount() {
cpu_count
++
;
}
}
#endif
int
GetCPUMaxFreq
(
int
cpu_id
)
{
char
path
[
64
];
snprintf
(
path
,
sizeof
(
path
),
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq"
,
cpu_id
);
FILE
*
fp
=
fopen
(
path
,
"rb"
);
if
(
!
fp
)
{
LOG
(
WARNING
)
<<
"File: "
<<
path
<<
" not exists."
;
...
...
@@ -63,47 +66,14 @@ int GetCPUMaxFreq(int cpu_id) {
}
int
freq
=
0
;
fscanf
(
fp
,
"%d"
,
&
freq
);
int
items_read
=
fscanf
(
fp
,
"%d"
,
&
freq
);
if
(
items_read
!=
1
)
{
LOG
(
WARNING
)
<<
"Read file: "
<<
path
<<
" failed."
;
}
fclose
(
fp
);
return
freq
;
}
void
SortCPUIdsByMaxFreqAsc
(
std
::
vector
<
int
>
*
cpu_ids
,
int
*
big_core_offset
)
{
MACE_CHECK_NOTNULL
(
cpu_ids
);
int
cpu_count
=
cpu_ids
->
size
();
std
::
vector
<
int
>
cpu_max_freq
;
cpu_max_freq
.
resize
(
cpu_count
);
// set cpu max frequency
for
(
int
i
=
0
;
i
<
cpu_count
;
++
i
)
{
cpu_max_freq
[
i
]
=
GetCPUMaxFreq
(
i
);
(
*
cpu_ids
)[
i
]
=
i
;
}
// sort cpu ids by max frequency asc, bubble sort
for
(
int
i
=
0
;
i
<
cpu_count
-
1
;
++
i
)
{
for
(
int
j
=
i
+
1
;
j
<
cpu_count
;
++
j
)
{
if
(
cpu_max_freq
[
i
]
>
cpu_max_freq
[
j
])
{
int
tmp
=
(
*
cpu_ids
)[
i
];
(
*
cpu_ids
)[
i
]
=
(
*
cpu_ids
)[
j
];
(
*
cpu_ids
)[
j
]
=
tmp
;
tmp
=
cpu_max_freq
[
i
];
cpu_max_freq
[
i
]
=
cpu_max_freq
[
j
];
cpu_max_freq
[
j
]
=
tmp
;
}
}
}
*
big_core_offset
=
0
;
for
(
int
i
=
1
;
i
<
cpu_count
;
++
i
)
{
if
(
cpu_max_freq
[
i
]
>
cpu_max_freq
[
i
-
1
])
{
*
big_core_offset
=
i
;
break
;
}
}
}
void
SetThreadAffinity
(
cpu_set_t
mask
)
{
#if defined(__ANDROID__)
pid_t
pid
=
gettid
();
...
...
mace/core/runtime/opencl/opencl_runtime.cc
浏览文件 @
6376fe6f
...
...
@@ -23,6 +23,7 @@
#include <utility>
#include "mace/public/mace_runtime.h"
#include "mace/core/macros.h"
#include "mace/core/file_storage.h"
#include "mace/core/runtime/opencl/opencl_extension.h"
#include "mace/public/mace.h"
...
...
@@ -176,6 +177,8 @@ void OpenCLPrintfCallback(const char *buffer,
size_t
length
,
size_t
final
,
void
*
user_data
)
{
MACE_UNUSED
(
final
);
MACE_UNUSED
(
user_data
);
fwrite
(
buffer
,
1
,
length
,
stdout
);
}
...
...
@@ -218,6 +221,22 @@ void GetAdrenoContextProperties(std::vector<cl_context_properties> *properties,
// The properties list should be terminated with 0
properties
->
push_back
(
0
);
}
GPUType
ParseGPUType
(
const
std
::
string
&
device_name
)
{
constexpr
const
char
*
kQualcommAdrenoGPUStr
=
"QUALCOMM Adreno(TM)"
;
constexpr
const
char
*
kMaliGPUStr
=
"Mali"
;
constexpr
const
char
*
kPowerVRGPUStr
=
"PowerVR"
;
if
(
device_name
==
kQualcommAdrenoGPUStr
)
{
return
GPUType
::
QUALCOMM_ADRENO
;
}
else
if
(
device_name
.
find
(
kMaliGPUStr
)
!=
std
::
string
::
npos
)
{
return
GPUType
::
MALI
;
}
else
if
(
device_name
.
find
(
kPowerVRGPUStr
)
!=
std
::
string
::
npos
)
{
return
GPUType
::
PowerVR
;
}
else
{
return
GPUType
::
UNKNOWN
;
}
}
}
// namespace
void
OpenCLProfilingTimer
::
StartTiming
()
{}
...
...
@@ -389,11 +408,11 @@ cl::Device &OpenCLRuntime::device() { return *device_; }
cl
::
CommandQueue
&
OpenCLRuntime
::
command_queue
()
{
return
*
command_queue_
;
}
const
uint64_t
OpenCLRuntime
::
device_global_mem_cache_size
()
const
{
uint64_t
OpenCLRuntime
::
device_global_mem_cache_size
()
const
{
return
device_gloabl_mem_cache_size_
;
}
const
uint32_t
OpenCLRuntime
::
device_compute_units
()
const
{
uint32_t
OpenCLRuntime
::
device_compute_units
()
const
{
return
device_compute_units_
;
}
...
...
@@ -597,12 +616,12 @@ uint64_t OpenCLRuntime::GetKernelWaveSize(const cl::Kernel &kernel) {
return
size
;
}
const
bool
OpenCLRuntime
::
IsNonUniformWorkgroupsSupported
()
{
bool
OpenCLRuntime
::
IsNonUniformWorkgroupsSupported
()
const
{
return
(
gpu_type_
==
GPUType
::
QUALCOMM_ADRENO
&&
opencl_version_
==
"2.0"
);
}
const
GPUType
OpenCLRuntime
::
gpu_type
()
const
{
GPUType
OpenCLRuntime
::
gpu_type
()
const
{
return
gpu_type_
;
}
...
...
@@ -610,36 +629,20 @@ const std::string OpenCLRuntime::platform_info() const {
return
platform_info_
;
}
const
GPUType
OpenCLRuntime
::
ParseGPUType
(
const
std
::
string
&
device_name
)
{
constexpr
const
char
*
kQualcommAdrenoGPUStr
=
"QUALCOMM Adreno(TM)"
;
constexpr
const
char
*
kMaliGPUStr
=
"Mali"
;
constexpr
const
char
*
kPowerVRGPUStr
=
"PowerVR"
;
if
(
device_name
==
kQualcommAdrenoGPUStr
)
{
return
GPUType
::
QUALCOMM_ADRENO
;
}
else
if
(
device_name
.
find
(
kMaliGPUStr
)
!=
std
::
string
::
npos
)
{
return
GPUType
::
MALI
;
}
else
if
(
device_name
.
find
(
kPowerVRGPUStr
)
!=
std
::
string
::
npos
)
{
return
GPUType
::
PowerVR
;
}
else
{
return
GPUType
::
UNKNOWN
;
}
}
const
std
::
string
OpenCLRuntime
::
ParseDeviceVersion
(
const
std
::
string
&
device_version
)
{
// OpenCL Device version string format:
// OpenCL<space><major_version.minor_version><space>
\
// OpenCL<space><major_version.minor_version><space>
// <vendor-specific information>
auto
words
=
Split
(
device_version
,
' '
);
return
words
[
1
];
}
const
bool
OpenCLRuntime
::
IsOutOfRangeCheckEnabled
()
const
{
bool
OpenCLRuntime
::
IsOutOfRangeCheckEnabled
()
const
{
return
out_of_range_check_
;
}
const
bool
OpenCLRuntime
::
is_profiling_enabled
()
const
{
bool
OpenCLRuntime
::
is_profiling_enabled
()
const
{
return
is_profiling_enabled_
;
}
...
...
mace/core/runtime/opencl/opencl_runtime.h
浏览文件 @
6376fe6f
...
...
@@ -70,18 +70,18 @@ class OpenCLRuntime {
cl
::
Context
&
context
();
cl
::
Device
&
device
();
cl
::
CommandQueue
&
command_queue
();
const
GPUType
gpu_type
()
const
;
GPUType
gpu_type
()
const
;
const
std
::
string
platform_info
()
const
;
const
uint64_t
device_global_mem_cache_size
()
const
;
const
uint32_t
device_compute_units
()
const
;
uint64_t
device_global_mem_cache_size
()
const
;
uint32_t
device_compute_units
()
const
;
void
GetCallStats
(
const
cl
::
Event
&
event
,
CallStats
*
stats
);
uint64_t
GetDeviceMaxWorkGroupSize
();
uint64_t
GetKernelMaxWorkGroupSize
(
const
cl
::
Kernel
&
kernel
);
uint64_t
GetKernelWaveSize
(
const
cl
::
Kernel
&
kernel
);
const
bool
IsNonUniformWorkgroupsSupported
()
;
const
bool
IsOutOfRangeCheckEnabled
()
const
;
const
bool
is_profiling_enabled
()
const
;
bool
IsNonUniformWorkgroupsSupported
()
const
;
bool
IsOutOfRangeCheckEnabled
()
const
;
bool
is_profiling_enabled
()
const
;
cl
::
Kernel
BuildKernel
(
const
std
::
string
&
program_name
,
const
std
::
string
&
kernel_name
,
...
...
@@ -112,7 +112,6 @@ class OpenCLRuntime {
const
std
::
string
&
built_program_key
,
const
std
::
string
&
build_options_str
,
cl
::
Program
*
program
);
const
GPUType
ParseGPUType
(
const
std
::
string
&
device_name
);
const
std
::
string
ParseDeviceVersion
(
const
std
::
string
&
device_version
);
private:
...
...
mace/kernels/activation.h
浏览文件 @
6376fe6f
...
...
@@ -136,6 +136,7 @@ class ActivationFunctor<DeviceType::CPU, float> {
const
Tensor
*
alpha
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
const
float
*
input_ptr
=
input
->
data
<
float
>
();
float
*
output_ptr
=
output
->
mutable_data
<
float
>
();
if
(
activation_
==
PRELU
)
{
...
...
mace/kernels/addn.h
浏览文件 @
6376fe6f
...
...
@@ -39,6 +39,7 @@ struct AddNFunctor {
void
operator
()(
const
std
::
vector
<
const
Tensor
*>
&
input_tensors
,
Tensor
*
output_tensor
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
output_tensor
->
ResizeLike
(
input_tensors
[
0
]);
index_t
size
=
output_tensor
->
size
();
Tensor
::
MappingGuard
output_map
(
output_tensor
);
...
...
mace/kernels/arm/conv_2d_neon_3x3.cc
浏览文件 @
6376fe6f
...
...
@@ -42,22 +42,25 @@ void Conv2dNeonK3x3S1(const float *input,
for
(
index_t
m
=
0
;
m
<
out_channels
;
m
+=
2
)
{
if
(
m
+
1
<
out_channels
)
{
float
*
out_ptr0_base
=
output
+
b
*
out_batch_size
+
m
*
out_image_size
;
#if defined(MACE_ENABLE_NEON)
float
*
out_ptr1_base
=
output
+
b
*
out_batch_size
+
(
m
+
1
)
*
out_image_size
;
#endif
for
(
index_t
c
=
0
;
c
<
in_channels
;
++
c
)
{
float
*
out_ptr0
=
out_ptr0_base
;
float
*
out_ptr1
=
out_ptr1_base
;
const
float
*
in_ptr0
=
input
+
b
*
in_batch_size
+
c
*
in_image_size
;
const
float
*
filter_ptr0
=
filter
+
m
*
in_channels
*
9
+
c
*
9
;
#if defined(MACE_ENABLE_NEON)
float
*
out_ptr1
=
out_ptr1_base
;
const
float
*
in_ptr1
=
input
+
b
*
in_batch_size
+
c
*
in_image_size
+
1
*
in_width
;
const
float
*
in_ptr2
=
input
+
b
*
in_batch_size
+
c
*
in_image_size
+
2
*
in_width
;
const
float
*
in_ptr3
=
input
+
b
*
in_batch_size
+
c
*
in_image_size
+
3
*
in_width
;
const
float
*
filter_ptr0
=
filter
+
m
*
in_channels
*
9
+
c
*
9
;
const
float
*
filter_ptr1
=
filter
+
(
m
+
1
)
*
in_channels
*
9
+
c
*
9
;
#endif
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
// load filter (2 outch x 3 height x 3 width): vf_outch_height
float32x4_t
vf00
,
vf01
,
vf02
;
...
...
@@ -321,12 +324,14 @@ void Conv2dNeonK3x3S1(const float *input,
const
float
*
in_ptr0
=
input
+
b
*
in_batch_size
+
c
*
in_image_size
;
#if defined(MACE_ENABLE_NEON)
const
float
*
in_ptr1
=
input
+
b
*
in_batch_size
+
c
*
in_image_size
+
1
*
in_width
;
const
float
*
in_ptr2
=
input
+
b
*
in_batch_size
+
c
*
in_image_size
+
2
*
in_width
;
const
float
*
in_ptr3
=
input
+
b
*
in_batch_size
+
c
*
in_image_size
+
3
*
in_width
;
#endif
const
float
*
filter_ptr0
=
filter
+
mm
*
in_channels
*
9
+
c
*
9
;
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
...
...
mace/kernels/arm/conv_2d_neon_5x5.cc
浏览文件 @
6376fe6f
...
...
@@ -121,23 +121,25 @@ void Conv2dNeonK5x5S1(const float *input,
for
(
index_t
m
=
0
;
m
<
out_channels
;
m
+=
4
)
{
if
(
m
+
3
<
out_channels
)
{
float
*
out_ptr0_base
=
output
+
b
*
out_batch_size
+
m
*
out_image_size
;
#if defined(MACE_ENABLE_NEON) && !defined(__aarch64__)
float
*
out_ptr1_base
=
output
+
b
*
out_batch_size
+
(
m
+
1
)
*
out_image_size
;
float
*
out_ptr2_base
=
output
+
b
*
out_batch_size
+
(
m
+
2
)
*
out_image_size
;
float
*
out_ptr3_base
=
output
+
b
*
out_batch_size
+
(
m
+
3
)
*
out_image_size
;
#endif
for
(
index_t
c
=
0
;
c
<
in_channels
;
++
c
)
{
const
float
*
in_ptr_base
=
input
+
b
*
in_batch_size
+
c
*
in_image_size
;
const
float
*
filter_ptr0
=
filter
+
m
*
in_channels
*
25
+
c
*
25
;
#if defined(MACE_ENABLE_NEON) && !defined(__aarch64__)
const
float
*
filter_ptr1
=
filter
+
(
m
+
1
)
*
in_channels
*
25
+
c
*
25
;
const
float
*
filter_ptr2
=
filter
+
(
m
+
2
)
*
in_channels
*
25
+
c
*
25
;
const
float
*
filter_ptr3
=
filter
+
(
m
+
3
)
*
in_channels
*
25
+
c
*
25
;
#if defined(MACE_ENABLE_NEON) && !defined(__aarch64__)
for
(
index_t
h
=
0
;
h
<
out_height
;
++
h
)
{
for
(
index_t
w
=
0
;
w
+
3
<
out_width
;
w
+=
4
)
{
// input offset
...
...
mace/kernels/arm/conv_2d_neon_7x7.cc
浏览文件 @
6376fe6f
...
...
@@ -198,23 +198,25 @@ void Conv2dNeonK7x7S1(const float *input,
for
(
index_t
m
=
0
;
m
<
out_channels
;
m
+=
4
)
{
if
(
m
+
3
<
out_channels
)
{
float
*
out_ptr0_base
=
output
+
b
*
out_batch_size
+
m
*
out_image_size
;
#if defined(MACE_ENABLE_NEON)
float
*
out_ptr1_base
=
output
+
b
*
out_batch_size
+
(
m
+
1
)
*
out_image_size
;
float
*
out_ptr2_base
=
output
+
b
*
out_batch_size
+
(
m
+
2
)
*
out_image_size
;
float
*
out_ptr3_base
=
output
+
b
*
out_batch_size
+
(
m
+
3
)
*
out_image_size
;
#endif
for
(
index_t
c
=
0
;
c
<
in_channels
;
++
c
)
{
const
float
*
in_ptr_base
=
input
+
b
*
in_batch_size
+
c
*
in_image_size
;
const
float
*
filter_ptr0
=
filter
+
m
*
in_channels
*
49
+
c
*
49
;
#if defined(MACE_ENABLE_NEON)
const
float
*
filter_ptr1
=
filter
+
(
m
+
1
)
*
in_channels
*
49
+
c
*
49
;
const
float
*
filter_ptr2
=
filter
+
(
m
+
2
)
*
in_channels
*
49
+
c
*
49
;
const
float
*
filter_ptr3
=
filter
+
(
m
+
3
)
*
in_channels
*
49
+
c
*
49
;
#if defined(MACE_ENABLE_NEON)
for
(
index_t
h
=
0
;
h
<
out_height
;
++
h
)
{
for
(
index_t
w
=
0
;
w
+
3
<
out_width
;
w
+=
4
)
{
// input offset
...
...
@@ -352,23 +354,25 @@ void Conv2dNeonK7x7S2(const float *input,
for
(
index_t
m
=
0
;
m
<
out_channels
;
m
+=
4
)
{
if
(
m
+
3
<
out_channels
)
{
float
*
out_ptr0_base
=
output
+
b
*
out_batch_size
+
m
*
out_image_size
;
#if defined(MACE_ENABLE_NEON)
float
*
out_ptr1_base
=
output
+
b
*
out_batch_size
+
(
m
+
1
)
*
out_image_size
;
float
*
out_ptr2_base
=
output
+
b
*
out_batch_size
+
(
m
+
2
)
*
out_image_size
;
float
*
out_ptr3_base
=
output
+
b
*
out_batch_size
+
(
m
+
3
)
*
out_image_size
;
#endif
for
(
index_t
c
=
0
;
c
<
in_channels
;
++
c
)
{
const
float
*
in_ptr_base
=
input
+
b
*
in_batch_size
+
c
*
in_image_size
;
const
float
*
filter_ptr0
=
filter
+
m
*
in_channels
*
49
+
c
*
49
;
#if defined(MACE_ENABLE_NEON)
const
float
*
filter_ptr1
=
filter
+
(
m
+
1
)
*
in_channels
*
49
+
c
*
49
;
const
float
*
filter_ptr2
=
filter
+
(
m
+
2
)
*
in_channels
*
49
+
c
*
49
;
const
float
*
filter_ptr3
=
filter
+
(
m
+
3
)
*
in_channels
*
49
+
c
*
49
;
#if defined(MACE_ENABLE_NEON)
for
(
index_t
h
=
0
;
h
<
out_height
;
++
h
)
{
for
(
index_t
w
=
0
;
w
+
3
<
out_width
;
w
+=
4
)
{
// input offset
...
...
@@ -516,23 +520,25 @@ void Conv2dNeonK7x7S3(const float *input,
for
(
index_t
m
=
0
;
m
<
out_channels
;
m
+=
4
)
{
if
(
m
+
3
<
out_channels
)
{
float
*
out_ptr0_base
=
output
+
b
*
out_batch_size
+
m
*
out_image_size
;
#if defined(MACE_ENABLE_NEON)
float
*
out_ptr1_base
=
output
+
b
*
out_batch_size
+
(
m
+
1
)
*
out_image_size
;
float
*
out_ptr2_base
=
output
+
b
*
out_batch_size
+
(
m
+
2
)
*
out_image_size
;
float
*
out_ptr3_base
=
output
+
b
*
out_batch_size
+
(
m
+
3
)
*
out_image_size
;
#endif
for
(
index_t
c
=
0
;
c
<
in_channels
;
++
c
)
{
const
float
*
in_ptr_base
=
input
+
b
*
in_batch_size
+
c
*
in_image_size
;
const
float
*
filter_ptr0
=
filter
+
m
*
in_channels
*
49
+
c
*
49
;
#if defined(MACE_ENABLE_NEON)
const
float
*
filter_ptr1
=
filter
+
(
m
+
1
)
*
in_channels
*
49
+
c
*
49
;
const
float
*
filter_ptr2
=
filter
+
(
m
+
2
)
*
in_channels
*
49
+
c
*
49
;
const
float
*
filter_ptr3
=
filter
+
(
m
+
3
)
*
in_channels
*
49
+
c
*
49
;
#if defined(MACE_ENABLE_NEON)
for
(
index_t
h
=
0
;
h
<
out_height
;
++
h
)
{
for
(
index_t
w
=
0
;
w
+
3
<
out_width
;
w
+=
4
)
{
// input offset
...
...
mace/kernels/arm/depthwise_conv2d_neon_3x3.cc
浏览文件 @
6376fe6f
...
...
@@ -17,6 +17,7 @@
#endif
#include "mace/kernels/arm/depthwise_conv2d_neon.h"
#include "mace/core/macros.h"
namespace
mace
{
namespace
kernels
{
...
...
@@ -65,6 +66,10 @@ void DepthwiseConv2dNeonK3x3S1(const float *input,
const
index_t
valid_w_start
,
const
index_t
valid_w_stop
,
float
*
output
)
{
#if !defined(MACE_ENABLE_NEON)
MACE_UNUSED
(
valid_w_start
);
MACE_UNUSED
(
valid_w_stop
);
#endif
const
index_t
multiplier
=
out_channels
/
in_channels
;
const
index_t
in_image_size
=
in_height
*
in_width
;
const
index_t
out_image_size
=
out_height
*
out_width
;
...
...
@@ -305,6 +310,10 @@ void DepthwiseConv2dNeonK3x3S2(const float *input,
const
index_t
valid_w_start
,
const
index_t
valid_w_stop
,
float
*
output
)
{
#if !defined(MACE_ENABLE_NEON)
MACE_UNUSED
(
valid_w_start
);
MACE_UNUSED
(
valid_w_stop
);
#endif
const
index_t
multiplier
=
out_channels
/
in_channels
;
const
index_t
in_image_size
=
in_height
*
in_width
;
const
index_t
out_image_size
=
out_height
*
out_width
;
...
...
mace/kernels/batch_norm.h
浏览文件 @
6376fe6f
...
...
@@ -64,6 +64,7 @@ struct BatchNormFunctor<DeviceType::CPU, float> : BatchNormFunctorBase {
const
float
epsilon
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
...
...
mace/kernels/bias_add.h
浏览文件 @
6376fe6f
...
...
@@ -38,6 +38,7 @@ struct BiasAddFunctor<DeviceType::CPU, float> {
const
Tensor
*
bias
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
const
index_t
batch
=
input
->
dim
(
0
);
const
index_t
channels
=
input
->
dim
(
1
);
const
index_t
height
=
input
->
dim
(
2
);
...
...
mace/kernels/buffer_to_image.h
浏览文件 @
6376fe6f
...
...
@@ -39,6 +39,10 @@ struct BufferToImageFunctor : BufferToImageFunctorBase {
const
BufferType
type
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
input
);
MACE_UNUSED
(
type
);
MACE_UNUSED
(
output
);
MACE_UNUSED
(
future
);
MACE_NOT_IMPLEMENTED
;
}
};
...
...
mace/kernels/channel_shuffle.h
浏览文件 @
6376fe6f
...
...
@@ -31,6 +31,7 @@ struct ChannelShuffleFunctor {
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
output
->
ResizeLike
(
input
);
Tensor
::
MappingGuard
logits_guard
(
input
);
...
...
@@ -56,7 +57,7 @@ struct ChannelShuffleFunctor {
index_t
idx
=
c
/
groups_
;
for
(
index_t
hw
=
0
;
hw
<
height
*
width
;
++
hw
)
{
output_base
[
c
*
image_size
+
hw
]
=
input_base
[
(
c
%
groups_
*
channels_per_group
+
c
/
groups_
)
*
image_size
+
hw
];
(
g
*
channels_per_group
+
idx
)
*
image_size
+
hw
];
}
}
}
...
...
mace/kernels/concat.h
浏览文件 @
6376fe6f
...
...
@@ -43,8 +43,9 @@ struct ConcatFunctor : ConcatFunctorBase {
void
operator
()(
const
std
::
vector
<
const
Tensor
*>
&
input_list
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
const
Tensor
*
input0
=
input_list
.
front
();
const
in
t
inputs_count
=
input_list
.
size
();
const
size_
t
inputs_count
=
input_list
.
size
();
std
::
vector
<
index_t
>
output_shape
(
input0
->
shape
());
index_t
inner_size
=
1
;
...
...
@@ -53,7 +54,7 @@ struct ConcatFunctor : ConcatFunctorBase {
}
std
::
vector
<
index_t
>
outer_sizes
(
inputs_count
,
0
);
outer_sizes
[
0
]
=
input0
->
size
()
/
inner_size
;
for
(
in
t
i
=
1
;
i
<
inputs_count
;
++
i
)
{
for
(
size_
t
i
=
1
;
i
<
inputs_count
;
++
i
)
{
const
Tensor
*
input
=
input_list
[
i
];
MACE_CHECK
(
input
->
dim_size
()
==
input0
->
dim_size
(),
"Ranks of all input tensors must be same."
);
...
...
mace/kernels/conv_2d.h
浏览文件 @
6376fe6f
...
...
@@ -103,8 +103,6 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
const
index_t
in_batch_size
=
in_channels
*
in_image_size
;
const
index_t
out_batch_size
=
out_channels
*
out_image_size
;
const
index_t
filter_size
=
filter_height
*
filter_width
;
const
index_t
in_tile_size
=
3
*
stride_w
+
(
filter_width
-
1
)
*
dilation_w
+
1
;
#pragma omp parallel for collapse(2)
for
(
index_t
b
=
0
;
b
<
batch
;
++
b
)
{
...
...
@@ -267,6 +265,7 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
const
Tensor
*
bias
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
MACE_CHECK_NOTNULL
(
input
);
MACE_CHECK_NOTNULL
(
filter
);
MACE_CHECK_NOTNULL
(
output
);
...
...
@@ -345,7 +344,6 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
Tensor
::
MappingGuard
bias_guard
(
bias
);
Tensor
::
MappingGuard
output_guard
(
output
);
auto
input_data
=
input
->
data
<
float
>
();
auto
filter_data
=
filter
->
data
<
float
>
();
auto
bias_data
=
bias
==
nullptr
?
nullptr
:
bias
->
data
<
float
>
();
auto
output_data
=
output
->
mutable_data
<
float
>
();
...
...
@@ -719,7 +717,10 @@ struct Conv2dFunctor<DeviceType::GPU, T> : Conv2dFunctorBase {
paddings
,
dilations
,
activation
,
relux_max_limit
)
{}
relux_max_limit
)
{
MACE_UNUSED
(
is_filter_transformed
);
MACE_UNUSED
(
scratch
);
}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
filter
,
...
...
mace/kernels/depth_to_space.h
浏览文件 @
6376fe6f
...
...
@@ -33,6 +33,7 @@ struct DepthToSpaceOpFunctor {
explicit
DepthToSpaceOpFunctor
(
const
int
block_size
,
bool
d2s
)
:
block_size_
(
block_size
),
d2s_
(
d2s
)
{}
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
const
int
batch_size
=
input
->
dim
(
0
);
const
int
input_depth
=
input
->
dim
(
1
);
const
int
input_height
=
input
->
dim
(
2
);
...
...
mace/kernels/depthwise_conv2d.h
浏览文件 @
6376fe6f
...
...
@@ -133,6 +133,7 @@ struct DepthwiseConv2dFunctor<DeviceType::CPU, float>
const
Tensor
*
bias
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
MACE_CHECK_NOTNULL
(
input
);
MACE_CHECK_NOTNULL
(
filter
);
MACE_CHECK_NOTNULL
(
output
);
...
...
mace/kernels/eltwise.h
浏览文件 @
6376fe6f
...
...
@@ -310,6 +310,7 @@ struct EltwiseFunctor<DeviceType::CPU, float>: EltwiseFunctorBase {
const
Tensor
*
input1
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
bool
swapped
=
false
;
if
(
input1
!=
nullptr
)
{
MACE_CHECK
(
input0
->
dim_size
()
==
input1
->
dim_size
())
...
...
mace/kernels/fully_connected.h
浏览文件 @
6376fe6f
...
...
@@ -59,6 +59,7 @@ struct FullyConnectedFunctor<DeviceType::CPU, float>: FullyConnectedBase {
const
Tensor
*
bias
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
std
::
vector
<
index_t
>
output_shape
=
{
input
->
dim
(
0
),
weight
->
dim
(
0
),
1
,
1
};
output
->
Resize
(
output_shape
);
const
index_t
N
=
output
->
dim
(
0
);
...
...
mace/kernels/gemm.cc
浏览文件 @
6376fe6f
...
...
@@ -135,7 +135,9 @@ inline void GemmTile(const float *A,
const
index_t
stride_k
,
const
index_t
stride_w
,
float
*
C
)
{
#if defined(MACE_ENABLE_NEON)
index_t
h
,
w
,
k
;
#endif
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
for
(
h
=
0
;
h
+
7
<
height
;
h
+=
8
)
{
...
...
@@ -443,6 +445,7 @@ inline void GemmTile(const float *A,
#else
#if defined(MACE_ENABLE_NEON) // armv7
w
=
(
width
>>
2
)
<<
2
;
for
(
h
=
0
;
h
+
3
<
height
;
h
+=
4
)
{
for
(
k
=
0
;
k
+
3
<
K
;
k
+=
4
)
{
const
float
*
a_ptr
=
A
+
(
h
*
stride_k
+
k
);
...
...
@@ -523,8 +526,6 @@ inline void GemmTile(const float *A,
c_ptr2
+=
4
;
c_ptr3
+=
4
;
}
w
=
(
width
>>
2
)
<<
2
;
}
if
(
w
<
width
)
{
const
float
*
a_ptr
=
A
+
(
h
*
stride_k
+
k
);
...
...
mace/kernels/local_response_norm.h
浏览文件 @
6376fe6f
...
...
@@ -42,6 +42,7 @@ struct LocalResponseNormFunctor<DeviceType::CPU, float> {
float
beta
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
const
index_t
batch
=
input
->
dim
(
0
);
const
index_t
channels
=
input
->
dim
(
1
);
const
index_t
height
=
input
->
dim
(
2
);
...
...
mace/kernels/matmul.h
浏览文件 @
6376fe6f
...
...
@@ -42,6 +42,7 @@ struct MatMulFunctor {
const
Tensor
*
B
,
Tensor
*
C
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
std
::
vector
<
index_t
>
c_shape
=
{
A
->
dim
(
0
),
A
->
dim
(
1
),
B
->
dim
(
2
),
1
};
C
->
Resize
(
c_shape
);
...
...
@@ -59,14 +60,6 @@ struct MatMulFunctor {
// It is better to use large block size if it fits for fast cache.
// Assume l1 cache size is 32k, we load three blocks at a time (A, B, C),
// the block size should be sqrt(32k / sizeof(T) / 3).
const
index_t
block_size
=
48
;
const
index_t
block_tile_height
=
RoundUpDiv
(
height
,
block_size
);
const
index_t
block_tile_width
=
RoundUpDiv
(
width
,
block_size
);
const
index_t
block_tile_k
=
RoundUpDiv
(
K
,
block_size
);
const
index_t
remain_height
=
height
%
block_size
;
const
index_t
remain_width
=
width
%
block_size
;
const
index_t
remain_k
=
K
%
block_size
;
constexpr
index_t
register_tile_size
=
4
;
memset
(
c_ptr_base
,
0
,
batch
*
height
*
width
*
sizeof
(
T
));
Gemm
(
a_ptr_base
,
b_ptr_base
,
batch
,
height
,
K
,
width
,
c_ptr_base
);
...
...
mace/kernels/opencl/addn.cc
浏览文件 @
6376fe6f
...
...
@@ -36,7 +36,7 @@ void AddNFunctor<DeviceType::GPU, T>::operator()(
auto
runtime
=
OpenCLRuntime
::
Global
();
for
(
in
t
i
=
1
;
i
<
size
;
++
i
)
{
for
(
size_
t
i
=
1
;
i
<
size
;
++
i
)
{
MACE_CHECK_NOTNULL
(
input_tensors
[
i
]);
MACE_CHECK
(
batch
==
input_tensors
[
i
]
->
dim
(
0
));
MACE_CHECK
(
height
==
input_tensors
[
i
]
->
dim
(
1
));
...
...
mace/kernels/opencl/concat.cc
浏览文件 @
6376fe6f
...
...
@@ -136,7 +136,6 @@ static void ConcatN(cl::Kernel *kernel,
const
index_t
batch
=
output
->
dim
(
0
);
const
index_t
height
=
output
->
dim
(
1
);
const
index_t
width
=
output
->
dim
(
2
);
const
index_t
channel
=
output
->
dim
(
3
);
auto
runtime
=
OpenCLRuntime
::
Global
();
...
...
mace/kernels/opencl/conv_2d_1x1.cc
浏览文件 @
6376fe6f
...
...
@@ -72,6 +72,8 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
StatsFuture
*
future
,
uint32_t
*
kwg_size
,
std
::
unique_ptr
<
BufferBase
>
*
kernel_error
)
{
MACE_UNUSED
(
padding
);
MACE_UNUSED
(
dilations
);
const
index_t
batch
=
output
->
dim
(
0
);
const
index_t
height
=
output
->
dim
(
1
);
const
index_t
width
=
output
->
dim
(
2
);
...
...
mace/kernels/opencl/depthwise_conv.cc
浏览文件 @
6376fe6f
...
...
@@ -135,7 +135,6 @@ static void DepthwiseConv2d(cl::Kernel *kernel,
static_cast
<
uint32_t
>
(
runtime
->
GetKernelMaxWorkGroupSize
(
*
kernel
));
}
if
(
!
IsVecEqual
(
*
prev_input_shape
,
input
->
shape
()))
{
const
index_t
input_batch
=
input
->
dim
(
0
);
const
index_t
input_height
=
input
->
dim
(
1
);
const
index_t
input_width
=
input
->
dim
(
2
);
...
...
mace/kernels/opencl/eltwise.cc
浏览文件 @
6376fe6f
...
...
@@ -25,6 +25,7 @@ void EltwiseFunctor<DeviceType::GPU, T>::operator()(const Tensor *input0,
const
Tensor
*
input1
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
bool
swapped
=
false
;
if
(
input1
!=
nullptr
)
{
MACE_CHECK
(
input0
->
dim_size
()
==
input1
->
dim_size
())
...
...
mace/kernels/opencl/helper.cc
浏览文件 @
6376fe6f
...
...
@@ -206,17 +206,6 @@ std::string DtToUpstreamCLCMDDt(const DataType dt) {
}
}
std
::
vector
<
uint32_t
>
Default2DLocalWS
(
const
uint32_t
*
gws
,
const
uint32_t
kwg_size
)
{
std
::
vector
<
uint32_t
>
lws
(
3
,
0
);
uint64_t
cache_size
=
OpenCLRuntime
::
Global
()
->
device_global_mem_cache_size
();
uint32_t
base
=
cache_size
/
kBaseGPUMemCacheSize
;
lws
[
0
]
=
std
::
min
<
uint32_t
>
(
base
,
kwg_size
);
lws
[
1
]
=
kwg_size
/
lws
[
1
];
return
lws
;
}
std
::
vector
<
uint32_t
>
Default3DLocalWS
(
const
uint32_t
*
gws
,
const
uint32_t
kwg_size
)
{
std
::
vector
<
uint32_t
>
lws
(
4
,
0
);
...
...
mace/kernels/opencl/helper.h
浏览文件 @
6376fe6f
...
...
@@ -19,6 +19,7 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/macros.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/types.h"
...
...
@@ -95,6 +96,7 @@ bool IsVecEqual(const std::vector<T> &input0,
template
<
typename
T
>
void
AppendToStream
(
std
::
stringstream
*
ss
,
const
std
::
string
&
delimiter
,
T
v
)
{
MACE_UNUSED
(
delimiter
);
(
*
ss
)
<<
v
;
}
...
...
@@ -114,8 +116,6 @@ std::string Concat(Args... args) {
return
ss
.
str
();
}
std
::
vector
<
uint32_t
>
Default2DLocalWS
(
const
uint32_t
*
gws
,
const
uint32_t
kwg_size
);
std
::
vector
<
uint32_t
>
Default3DLocalWS
(
const
uint32_t
*
gws
,
const
uint32_t
kwg_size
);
}
// namespace kernels
...
...
mace/kernels/opencl/matmul.cc
浏览文件 @
6376fe6f
...
...
@@ -25,6 +25,7 @@ void MatMulFunctor<DeviceType::GPU, T>::operator()(const Tensor *A,
const
Tensor
*
B
,
Tensor
*
C
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
std
::
vector
<
index_t
>
c_shape
=
{
A
->
dim
(
0
),
A
->
dim
(
1
),
B
->
dim
(
2
),
1
};
std
::
vector
<
size_t
>
c_image_shape
;
CalImage2DShape
(
c_shape
,
BufferType
::
IN_OUT_HEIGHT
,
&
c_image_shape
);
...
...
mace/kernels/opencl/out_of_range_check_test.cc
浏览文件 @
6376fe6f
...
...
@@ -24,7 +24,7 @@ namespace mace {
namespace
kernels
{
namespace
{
const
bool
BufferToImageOpImpl
(
Tensor
*
buffer
,
bool
BufferToImageOpImpl
(
Tensor
*
buffer
,
Tensor
*
image
,
const
std
::
vector
<
size_t
>
&
image_shape
)
{
std
::
unique_ptr
<
BufferBase
>
kernel_error
;
...
...
@@ -149,7 +149,7 @@ TEST(OutOfRangeCheckTest, RandomTest) {
ASSERT_FALSE
(
BufferToImageOpImpl
(
buffer
,
image
,
image_shape
));
std
::
vector
<
size_t
>
overflow_image_shape
=
image_shape
;
for
(
in
t
i
=
0
;
i
<
overflow_image_shape
.
size
();
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
overflow_image_shape
.
size
();
++
i
)
{
overflow_image_shape
[
i
]
+=
1
;
}
ASSERT_TRUE
(
BufferToImageOpImpl
(
buffer
,
image
,
overflow_image_shape
));
...
...
mace/kernels/opencl/pad.cc
浏览文件 @
6376fe6f
...
...
@@ -25,7 +25,8 @@ void PadFunctor<DeviceType::GPU, T>::operator()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_CHECK
(
this
->
paddings_
.
size
()
==
(
input
->
dim_size
()
*
2
));
MACE_CHECK
(
this
->
paddings_
.
size
()
==
static_cast
<
size_t
>
((
input
->
dim_size
()
*
2
)));
MACE_CHECK
((
this
->
paddings_
[
0
]
==
0
)
&&
(
this
->
paddings_
[
1
]
==
0
)
&&
(
this
->
paddings_
[
6
]
==
0
)
&&
(
this
->
paddings_
[
7
]
==
0
))
<<
"Mace only support height/width dimension now"
;
...
...
mace/kernels/opencl/slice.cc
浏览文件 @
6376fe6f
...
...
@@ -75,7 +75,7 @@ void SliceFunctor<DeviceType::GPU, T>::operator()(
const
std
::
vector
<
uint32_t
>
lws
=
Default3DLocalWS
(
gws
,
kwg_size_
);
cl
::
Event
event
;
CallStats
call_stats
{
INT64_MAX
,
0
};
for
(
in
t
i
=
0
;
i
<
outputs_count
;
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
outputs_count
;
++
i
)
{
uint32_t
idx
=
0
;
if
(
runtime
->
IsOutOfRangeCheckEnabled
())
{
kernel_
.
setArg
(
idx
++
,
...
...
mace/kernels/pad.h
浏览文件 @
6376fe6f
...
...
@@ -47,7 +47,9 @@ struct PadFunctor : public PadFunctorBase {
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_CHECK
(
this
->
paddings_
.
size
()
==
(
input
->
dim_size
()
*
2
));
MACE_UNUSED
(
future
);
MACE_CHECK
(
this
->
paddings_
.
size
()
==
static_cast
<
size_t
>
(
input
->
dim_size
())
*
2
);
auto
input_shape
=
input
->
shape
();
output
->
Resize
({
input_shape
[
0
]
+
this
->
paddings_
[
0
]
+
this
->
paddings_
[
1
],
input_shape
[
1
]
+
this
->
paddings_
[
2
]
+
this
->
paddings_
[
3
],
...
...
mace/kernels/pooling.h
浏览文件 @
6376fe6f
...
...
@@ -173,6 +173,7 @@ struct PoolingFunctor<DeviceType::CPU, float>: PoolingFunctorBase {
void
operator
()(
const
Tensor
*
input_tensor
,
Tensor
*
output_tensor
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
index_t
>
filter_shape
=
{
input_tensor
->
dim
(
1
),
input_tensor
->
dim
(
1
),
kernels_
[
0
],
kernels_
[
1
]};
...
...
mace/kernels/proposal.h
浏览文件 @
6376fe6f
...
...
@@ -92,7 +92,7 @@ inline std::vector<int> nms(const float *bboxes_ptr,
for
(
int
i
=
0
;
i
<
num_bboxes
;
++
i
)
{
if
(
suppressed
[
i
]
==
1
)
continue
;
keep
.
push_back
(
i
);
if
(
keep
.
size
()
>=
post_nms_top_n
)
break
;
if
(
keep
.
size
()
>=
static_cast
<
size_t
>
(
post_nms_top_n
)
)
break
;
int
coord_idx
=
i
<<
2
;
const
float
x1
=
bboxes_ptr
[
coord_idx
];
const
float
y1
=
bboxes_ptr
[
coord_idx
+
1
];
...
...
@@ -141,10 +141,11 @@ struct ProposalFunctor {
const
Tensor
*
img_info_tensor
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
MACE_CHECK
(
rpn_cls_prob
->
dim
(
1
)
==
rpn_bbox_pred
->
dim
(
1
)
&&
rpn_cls_prob
->
dim
(
2
)
==
rpn_bbox_pred
->
dim
(
2
));
MACE_CHECK
((
rpn_cls_prob
->
dim
(
3
)
/
2
==
rpn_bbox_pred
->
dim
(
3
)
/
4
)
&&
(
rpn_cls_prob
->
dim
(
3
)
/
2
==
anchors_
.
size
()));
(
static_cast
<
size_t
>
(
rpn_cls_prob
->
dim
(
3
)
/
2
)
==
anchors_
.
size
()));
const
float
*
img_info
=
img_info_tensor
->
data
<
float
>
();
const
int
im_height
=
static_cast
<
int
>
(
img_info
[
0
]
-
1
);
const
int
im_width
=
static_cast
<
int
>
(
img_info
[
1
]
-
1
);
...
...
mace/kernels/psroi_align.h
浏览文件 @
6376fe6f
...
...
@@ -38,6 +38,7 @@ struct PSROIAlignFunctor {
const
Tensor
*
rois
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
const
int
height
=
static_cast
<
int
>
(
input
->
dim
(
1
));
const
int
width
=
static_cast
<
int
>
(
input
->
dim
(
2
));
const
int
channels
=
static_cast
<
int
>
(
input
->
dim
(
3
));
...
...
mace/kernels/quantize.h
浏览文件 @
6376fe6f
...
...
@@ -81,6 +81,7 @@ struct QuantizeFunctor<CPU, uint8_t> {
Tensor
*
out_min
,
Tensor
*
out_max
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
const
float
*
input_data
=
input
->
data
<
float
>
();
const
float
in_min_data
=
in_min
->
data
<
float
>
()[
0
];
const
float
in_max_data
=
in_max
->
data
<
float
>
()[
0
];
...
...
@@ -109,6 +110,7 @@ struct DequantizeFunctor<CPU, uint8_t> {
const
Tensor
*
in_max
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
const
uint8_t
*
input_data
=
input
->
data
<
uint8_t
>
();
const
float
in_min_data
=
in_min
->
data
<
float
>
()[
0
];
const
float
in_max_data
=
in_max
->
data
<
float
>
()[
0
];
...
...
@@ -137,6 +139,7 @@ struct RequantizeFunctor<CPU, uint8_t> {
Tensor
*
out_min
,
Tensor
*
out_max
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
const
int
*
input_data
=
input
->
data
<
int
>
();
const
float
in_min_data
=
in_min
->
data
<
float
>
()[
0
];
const
float
in_max_data
=
in_max
->
data
<
float
>
()[
0
];
...
...
mace/kernels/reshape.h
浏览文件 @
6376fe6f
...
...
@@ -35,6 +35,7 @@ struct ReshapeFunctor {
const
std
::
vector
<
index_t
>
&
out_shape
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
output
->
ResizeWithBuffer
(
out_shape
,
input
->
UnderlyingBuffer
());
}
};
...
...
mace/kernels/resize_bilinear.h
浏览文件 @
6376fe6f
...
...
@@ -137,6 +137,7 @@ struct ResizeBilinearFunctor<DeviceType::CPU, float>
:
ResizeBilinearFunctorBase
(
size
,
align_corners
)
{}
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
const
index_t
batch
=
input
->
dim
(
0
);
const
index_t
channels
=
input
->
dim
(
1
);
const
index_t
in_height
=
input
->
dim
(
2
);
...
...
mace/kernels/slice.h
浏览文件 @
6376fe6f
...
...
@@ -44,6 +44,7 @@ struct SliceFunctor : SliceFunctorBase {
void
operator
()(
const
Tensor
*
input
,
const
std
::
vector
<
Tensor
*>
&
output_list
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
const
index_t
input_channels
=
input
->
dim
(
axis_
);
const
size_t
outputs_count
=
output_list
.
size
();
const
index_t
output_channels
=
input_channels
/
outputs_count
;
...
...
mace/kernels/softmax.h
浏览文件 @
6376fe6f
...
...
@@ -39,6 +39,7 @@ struct SoftmaxFunctor;
template
<
>
struct
SoftmaxFunctor
<
DeviceType
::
CPU
,
float
>
{
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
const
index_t
batch
=
input
->
dim
(
0
);
const
index_t
class_count
=
input
->
dim
(
1
);
const
index_t
class_size
=
input
->
dim
(
2
)
*
input
->
dim
(
3
);
...
...
mace/kernels/space_to_batch.h
浏览文件 @
6376fe6f
...
...
@@ -53,6 +53,10 @@ struct SpaceToBatchFunctor : SpaceToBatchFunctorBase {
const
std
::
vector
<
index_t
>
&
output_shape
,
Tensor
*
batch_tensor
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
space_tensor
);
MACE_UNUSED
(
output_shape
);
MACE_UNUSED
(
batch_tensor
);
MACE_UNUSED
(
future
);
MACE_NOT_IMPLEMENTED
;
}
};
...
...
mace/kernels/transpose.h
浏览文件 @
6376fe6f
...
...
@@ -30,6 +30,7 @@ struct TransposeFunctor {
explicit
TransposeFunctor
(
const
std
::
vector
<
int
>
&
dims
)
:
dims_
(
dims
)
{}
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
Tensor
::
MappingGuard
input_guard
(
input
);
Tensor
::
MappingGuard
output_guard
(
output
);
const
std
::
vector
<
index_t
>
&
input_shape
=
input
->
shape
();
...
...
mace/kernels/winograd_transform.h
浏览文件 @
6376fe6f
...
...
@@ -51,6 +51,9 @@ struct WinogradTransformFunctor : WinogradTransformFunctorBase {
:
WinogradTransformFunctorBase
(
padding_type
,
paddings
)
{}
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
input
);
MACE_UNUSED
(
output
);
MACE_UNUSED
(
future
);
MACE_NOT_IMPLEMENTED
;
}
};
...
...
@@ -105,6 +108,9 @@ struct WinogradInverseTransformFunctor : WinogradInverseTransformFunctorBase {
const
Tensor
*
bias
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
input
);
MACE_UNUSED
(
bias
);
MACE_UNUSED
(
output
);
MACE_NOT_IMPLEMENTED
;
}
};
...
...
mace/ops/batch_to_space.cc
浏览文件 @
6376fe6f
...
...
@@ -29,6 +29,8 @@ void Register_BatchToSpaceND(OperatorRegistry *op_registry) {
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
BatchToSpaceNDOp
<
DeviceType
::
GPU
,
half
>
);
#else
MACE_UNUSED
(
op_registry
);
#endif // MACE_ENABLE_OPENCL
}
...
...
mace/ops/batch_to_space.h
浏览文件 @
6376fe6f
...
...
@@ -38,7 +38,7 @@ class BatchToSpaceNDOp : public Operator<D, T> {
Tensor
*
space_tensor
=
this
->
Output
(
OUTPUT
);
std
::
vector
<
index_t
>
output_shape
(
4
,
0
);
CalculateOutputShape
(
batch_tensor
,
space_tensor
,
output_shape
.
data
());
CalculateOutputShape
(
batch_tensor
,
output_shape
.
data
());
functor_
(
space_tensor
,
output_shape
,
const_cast
<
Tensor
*>
(
batch_tensor
),
future
);
return
true
;
...
...
@@ -46,7 +46,6 @@ class BatchToSpaceNDOp : public Operator<D, T> {
private:
inline
void
CalculateOutputShape
(
const
Tensor
*
input_tensor
,
Tensor
*
output
,
index_t
*
output_shape
)
{
auto
crops
=
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"crops"
,
{
0
,
0
,
0
,
0
});
auto
block_shape
=
...
...
mace/ops/channel_shuffle.h
浏览文件 @
6376fe6f
...
...
@@ -45,7 +45,6 @@ class ChannelShuffleOp : public Operator<D, T> {
MACE_CHECK
(
channels
%
group_
==
0
,
"input channels must be an integral multiple of group. "
,
input
->
dim
(
3
));
int
channels_per_group
=
channels
/
group_
;
functor_
(
input
,
output
,
future
);
return
true
;
...
...
mace/ops/folded_batch_norm_test.cc
浏览文件 @
6376fe6f
...
...
@@ -30,7 +30,7 @@ void CalculateScaleOffset(const std::vector<float> &gamma,
std
::
vector
<
float
>
*
scale
,
std
::
vector
<
float
>
*
offset
)
{
size_t
size
=
gamma
.
size
();
for
(
in
t
i
=
0
;
i
<
size
;
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
size
;
++
i
)
{
(
*
scale
)[
i
]
=
gamma
[
i
]
/
std
::
sqrt
(
var
[
i
]
+
epsilon
);
(
*
offset
)[
i
]
=
beta
[
i
]
-
mean
[
i
]
*
(
*
scale
)[
i
];
}
...
...
mace/ops/proposal_test.cc
浏览文件 @
6376fe6f
...
...
@@ -45,7 +45,7 @@ TEST_F(ProposalOpTest, CPUSimple) {
.
Finalize
(
net
.
NewOperatorDef
());
std
::
vector
<
float
>
scores
(
height
*
width
*
18
);
for
(
in
t
i
=
0
;
i
<
scores
.
size
();
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
scores
.
size
();
++
i
)
{
scores
[
i
]
=
i
;
}
...
...
mace/ops/quantize_test.cc
浏览文件 @
6376fe6f
...
...
@@ -95,8 +95,6 @@ TEST_F(QuantizeTest, TestQuantizeTrend) {
net
.
RunOp
();
auto
output
=
net
.
GetTensor
(
"Output"
);
auto
output_min
=
net
.
GetTensor
(
"OutputMin"
);
auto
output_max
=
net
.
GetTensor
(
"OutputMax"
);
const
uint8_t
*
output_data
=
net
.
GetTensor
(
"Output"
)
->
data
<
uint8_t
>
();
for
(
int
i
=
1
;
i
<
output
->
size
();
++
i
)
{
...
...
mace/ops/space_to_batch.cc
浏览文件 @
6376fe6f
...
...
@@ -30,6 +30,8 @@ void Register_SpaceToBatchND(OperatorRegistry *op_registry) {
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
SpaceToBatchNDOp
<
DeviceType
::
GPU
,
half
>
);
#else
MACE_UNUSED
(
op_registry
);
#endif // MACE_ENABLE_OPENCL
}
...
...
mace/ops/space_to_batch.h
浏览文件 @
6376fe6f
...
...
@@ -39,7 +39,7 @@ class SpaceToBatchNDOp : public Operator<D, T> {
Tensor
*
batch_tensor
=
this
->
Output
(
OUTPUT
);
std
::
vector
<
index_t
>
output_shape
(
4
,
0
);
CalculateOutputShape
(
space_tensor
,
batch_tensor
,
output_shape
.
data
());
CalculateOutputShape
(
space_tensor
,
output_shape
.
data
());
functor_
(
const_cast
<
Tensor
*>
(
space_tensor
),
output_shape
,
batch_tensor
,
future
);
return
true
;
...
...
@@ -47,7 +47,6 @@ class SpaceToBatchNDOp : public Operator<D, T> {
private:
inline
void
CalculateOutputShape
(
const
Tensor
*
input_tensor
,
Tensor
*
output
,
index_t
*
output_shape
)
{
auto
paddings
=
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"paddings"
,
{
0
,
0
,
0
,
0
});
...
...
mace/ops/transpose.h
浏览文件 @
6376fe6f
...
...
@@ -35,11 +35,11 @@ class TransposeOp : public Operator<D, T> {
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
const
std
::
vector
<
index_t
>
&
input_shape
=
input
->
shape
();
MACE_CHECK
(
input_shape
.
size
()
==
4
&&
dims_
.
size
()
==
4
||
input_shape
.
size
()
==
2
&&
dims_
.
size
()
==
2
,
MACE_CHECK
(
(
input_shape
.
size
()
==
4
&&
dims_
.
size
()
==
4
)
||
(
input_shape
.
size
()
==
2
&&
dims_
.
size
()
==
2
)
,
"rank should be 2 or 4"
);
std
::
vector
<
index_t
>
output_shape
;
for
(
in
t
i
=
0
;
i
<
dims_
.
size
();
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
dims_
.
size
();
++
i
)
{
output_shape
.
push_back
(
input_shape
[
dims_
[
i
]]);
}
output
->
Resize
(
output_shape
);
...
...
mace/ops/winograd_inverse_transform.cc
浏览文件 @
6376fe6f
...
...
@@ -30,6 +30,8 @@ void Register_WinogradInverseTransform(OperatorRegistry *op_registry) {
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
WinogradInverseTransformOp
<
DeviceType
::
GPU
,
half
>
);
#else
MACE_UNUSED
(
op_registry
);
#endif // MACE_ENABLE_OPENCL
}
...
...
mace/ops/winograd_transform.cc
浏览文件 @
6376fe6f
...
...
@@ -30,6 +30,8 @@ void Register_WinogradTransform(OperatorRegistry *op_registry) {
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
WinogradTransformOp
<
DeviceType
::
GPU
,
half
>
);
#else
MACE_UNUSED
(
op_registry
);
#endif // MACE_ENABLE_OPENCL
}
...
...
mace/tools/validation/BUILD
浏览文件 @
6376fe6f
...
...
@@ -4,13 +4,15 @@ load("//mace:mace.bzl", "if_openmp_enabled", "if_android")
cc_binary
(
name
=
"mace_run"
,
srcs
=
[
"mace_run.cc"
],
copts
=
if_openmp_enabled
([
"-fopenmp"
])
+
if_android
([
"-DMACE_ENABLE_OPENCL"
,
]),
linkopts
=
if_openmp_enabled
([
"-fopenmp"
]),
linkstatic
=
1
,
copts
=
if_android
([
"-DMACE_ENABLE_OPENCL"
]),
deps
=
[
"//external:gflags_nothreads"
,
"//mace/codegen:generated_models"
,
"//mace/codegen:generated_mace_engine_factory"
,
"//mace/core:core"
,
"//mace/codegen:generated_models"
,
"//mace/core"
,
],
)
mace/tools/validation/mace_run.cc
浏览文件 @
6376fe6f
...
...
@@ -385,7 +385,7 @@ int Main(int argc, char **argv) {
ParseShape
(
output_shapes
[
i
],
&
output_shape_vec
[
i
]);
}
bool
ret
;
bool
ret
=
false
;
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
FLAGS_restart_round
;
++
i
)
{
VLOG
(
0
)
<<
"restart round "
<<
i
;
...
...
@@ -395,9 +395,8 @@ int Main(int argc, char **argv) {
}
if
(
ret
)
{
return
0
;
}
else
{
return
-
1
;
}
return
-
1
;
}
}
// namespace validation
...
...
mace/utils/BUILD
浏览文件 @
6376fe6f
...
...
@@ -12,20 +12,18 @@ load("//mace:mace.bzl", "if_android")
cc_library
(
name
=
"utils"
,
srcs
=
[
"command_line_flags.cc"
,
"logging.cc"
,
"string_util.cc"
,
],
hdrs
=
[
"command_line_flags.h"
,
"env_time.h"
,
"logging.h"
,
"memory_logging.h"
,
"rwlock.h"
,
"string_util.h"
,
"timer.h"
,
"tuner.h"
,
"utils.h"
,
"rwlock.h"
,
],
linkopts
=
if_android
([
"-llog"
,
...
...
@@ -35,24 +33,6 @@ cc_library(
],
)
cc_test
(
name
=
"utils_test"
,
testonly
=
1
,
srcs
=
[
"utils_test.cc"
,
],
linkopts
=
if_android
([
"-pie"
,
"-lm"
,
]),
linkstatic
=
1
,
deps
=
[
":utils"
,
"@gtest//:gtest"
,
"@gtest//:gtest_main"
,
],
)
cc_library
(
name
=
"utils_dev"
,
srcs
=
[
...
...
mace/utils/command_line_flags.cc
已删除
100644 → 0
浏览文件 @
22676b19
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/utils/command_line_flags.h"
#include <cstring>
#include <iomanip>
#include "mace/utils/logging.h"
namespace
mace
{
namespace
utils
{
bool
StringConsume
(
const
std
::
string
&
x
,
std
::
string
*
arg
)
{
MACE_CHECK_NOTNULL
(
arg
);
if
((
arg
->
size
()
>=
x
.
size
())
&&
(
memcmp
(
arg
->
data
(),
x
.
data
(),
x
.
size
())
==
0
))
{
*
arg
=
arg
->
substr
(
x
.
size
());
return
true
;
}
return
false
;
}
bool
ParseStringFlag
(
std
::
string
arg
,
std
::
string
flag
,
std
::
string
*
dst
,
bool
*
value_parsing_ok
)
{
*
value_parsing_ok
=
true
;
if
(
StringConsume
(
"--"
,
&
arg
)
&&
StringConsume
(
flag
,
&
arg
)
&&
StringConsume
(
"="
,
&
arg
))
{
*
dst
=
arg
;
return
true
;
}
return
false
;
}
bool
ParseInt32Flag
(
std
::
string
arg
,
std
::
string
flag
,
int32_t
*
dst
,
bool
*
value_parsing_ok
)
{
*
value_parsing_ok
=
true
;
if
(
StringConsume
(
"--"
,
&
arg
)
&&
StringConsume
(
flag
,
&
arg
)
&&
StringConsume
(
"="
,
&
arg
))
{
char
extra
;
if
(
sscanf
(
arg
.
data
(),
"%d%c"
,
dst
,
&
extra
)
!=
1
)
{
LOG
(
ERROR
)
<<
"Couldn't interpret value "
<<
arg
<<
" for flag "
<<
flag
<<
"."
;
*
value_parsing_ok
=
false
;
}
return
true
;
}
return
false
;
}
bool
ParseInt64Flag
(
std
::
string
arg
,
std
::
string
flag
,
int64_t
*
dst
,
bool
*
value_parsing_ok
)
{
*
value_parsing_ok
=
true
;
if
(
StringConsume
(
"--"
,
&
arg
)
&&
StringConsume
(
flag
,
&
arg
)
&&
StringConsume
(
"="
,
&
arg
))
{
char
extra
;
if
(
sscanf
(
arg
.
data
(),
"%lld%c"
,
dst
,
&
extra
)
!=
1
)
{
LOG
(
ERROR
)
<<
"Couldn't interpret value "
<<
arg
<<
" for flag "
<<
flag
<<
"."
;
*
value_parsing_ok
=
false
;
}
return
true
;
}
return
false
;
}
bool
ParseBoolFlag
(
std
::
string
arg
,
std
::
string
flag
,
bool
*
dst
,
bool
*
value_parsing_ok
)
{
*
value_parsing_ok
=
true
;
if
(
StringConsume
(
"--"
,
&
arg
)
&&
StringConsume
(
flag
,
&
arg
))
{
if
(
arg
.
empty
())
{
*
dst
=
true
;
return
true
;
}
if
(
arg
==
"=true"
)
{
*
dst
=
true
;
return
true
;
}
else
if
(
arg
==
"=false"
)
{
*
dst
=
false
;
return
true
;
}
else
{
LOG
(
ERROR
)
<<
"Couldn't interpret value "
<<
arg
<<
" for flag "
<<
flag
<<
"."
;
*
value_parsing_ok
=
false
;
return
true
;
}
}
return
false
;
}
bool
ParseFloatFlag
(
std
::
string
arg
,
std
::
string
flag
,
float
*
dst
,
bool
*
value_parsing_ok
)
{
*
value_parsing_ok
=
true
;
if
(
StringConsume
(
"--"
,
&
arg
)
&&
StringConsume
(
flag
,
&
arg
)
&&
StringConsume
(
"="
,
&
arg
))
{
char
extra
;
if
(
sscanf
(
arg
.
data
(),
"%f%c"
,
dst
,
&
extra
)
!=
1
)
{
LOG
(
ERROR
)
<<
"Couldn't interpret value "
<<
arg
<<
" for flag "
<<
flag
<<
"."
;
*
value_parsing_ok
=
false
;
}
return
true
;
}
return
false
;
}
}
// namespace utils
Flag
::
Flag
(
const
char
*
name
,
int
*
dst
,
const
std
::
string
&
usage_text
)
:
name_
(
name
),
type_
(
TYPE_INT
),
int_value_
(
dst
),
usage_text_
(
usage_text
)
{}
Flag
::
Flag
(
const
char
*
name
,
int64_t
*
dst
,
const
std
::
string
&
usage_text
)
:
name_
(
name
),
type_
(
TYPE_INT64
),
int64_value_
(
dst
),
usage_text_
(
usage_text
)
{}
Flag
::
Flag
(
const
char
*
name
,
bool
*
dst
,
const
std
::
string
&
usage_text
)
:
name_
(
name
),
type_
(
TYPE_BOOL
),
bool_value_
(
dst
),
usage_text_
(
usage_text
)
{}
Flag
::
Flag
(
const
char
*
name
,
std
::
string
*
dst
,
const
std
::
string
&
usage_text
)
:
name_
(
name
),
type_
(
TYPE_STRING
),
string_value_
(
dst
),
usage_text_
(
usage_text
)
{}
Flag
::
Flag
(
const
char
*
name
,
float
*
dst
,
const
std
::
string
&
usage_text
)
:
name_
(
name
),
type_
(
TYPE_FLOAT
),
float_value_
(
dst
),
usage_text_
(
usage_text
)
{}
bool
Flag
::
Parse
(
std
::
string
arg
,
bool
*
value_parsing_ok
)
const
{
bool
result
=
false
;
if
(
type_
==
TYPE_INT
)
{
result
=
utils
::
ParseInt32Flag
(
arg
,
name_
,
int_value_
,
value_parsing_ok
);
}
else
if
(
type_
==
TYPE_INT64
)
{
result
=
utils
::
ParseInt64Flag
(
arg
,
name_
,
int64_value_
,
value_parsing_ok
);
}
else
if
(
type_
==
TYPE_BOOL
)
{
result
=
utils
::
ParseBoolFlag
(
arg
,
name_
,
bool_value_
,
value_parsing_ok
);
}
else
if
(
type_
==
TYPE_STRING
)
{
result
=
utils
::
ParseStringFlag
(
arg
,
name_
,
string_value_
,
value_parsing_ok
);
}
else
if
(
type_
==
TYPE_FLOAT
)
{
result
=
utils
::
ParseFloatFlag
(
arg
,
name_
,
float_value_
,
value_parsing_ok
);
}
return
result
;
}
/*static*/
bool
Flags
::
Parse
(
int
*
argc
,
char
**
argv
,
const
std
::
vector
<
Flag
>
&
flag_list
)
{
bool
result
=
true
;
std
::
vector
<
char
*>
unknown_flags
;
for
(
int
i
=
1
;
i
<
*
argc
;
++
i
)
{
if
(
std
::
string
(
argv
[
i
])
==
"--"
)
{
while
(
i
<
*
argc
)
{
unknown_flags
.
push_back
(
argv
[
i
]);
++
i
;
}
break
;
}
bool
was_found
=
false
;
for
(
const
Flag
&
flag
:
flag_list
)
{
bool
value_parsing_ok
;
was_found
=
flag
.
Parse
(
argv
[
i
],
&
value_parsing_ok
);
if
(
!
value_parsing_ok
)
{
result
=
false
;
}
if
(
was_found
)
{
break
;
}
}
if
(
!
was_found
)
{
unknown_flags
.
push_back
(
argv
[
i
]);
}
}
// Passthrough any extra flags.
int
dst
=
1
;
// Skip argv[0]
for
(
char
*
f
:
unknown_flags
)
{
argv
[
dst
++
]
=
f
;
}
argv
[
dst
++
]
=
nullptr
;
*
argc
=
unknown_flags
.
size
()
+
1
;
return
result
&&
(
*
argc
<
2
||
strcmp
(
argv
[
1
],
"--help"
)
!=
0
);
}
std
::
string
Flags
::
Usage
(
const
std
::
string
&
cmdline
,
const
std
::
vector
<
Flag
>
&
flag_list
)
{
std
::
stringstream
usage_text
;
usage_text
<<
"usage: "
<<
cmdline
<<
std
::
endl
;
if
(
!
flag_list
.
empty
())
{
usage_text
<<
"Flags: "
<<
std
::
endl
;
}
for
(
const
Flag
&
flag
:
flag_list
)
{
usage_text
<<
"
\t
"
<<
std
::
left
<<
std
::
setw
(
30
)
<<
flag
.
name_
;
usage_text
<<
flag
.
usage_text_
<<
std
::
endl
;
}
return
usage_text
.
str
();
}
}
// namespace mace
mace/utils/command_line_flags.h
已删除
100644 → 0
浏览文件 @
22676b19
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_UTILS_COMMAND_LINE_FLAGS_H_
#define MACE_UTILS_COMMAND_LINE_FLAGS_H_
#include <string>
#include <vector>
namespace
mace
{
class
Flag
{
public:
Flag
(
const
char
*
name
,
int
*
dst1
,
const
std
::
string
&
usage_text
);
Flag
(
const
char
*
name
,
int64_t
*
dst1
,
const
std
::
string
&
usage_text
);
Flag
(
const
char
*
name
,
bool
*
dst
,
const
std
::
string
&
usage_text
);
Flag
(
const
char
*
name
,
std
::
string
*
dst
,
const
std
::
string
&
usage_text
);
Flag
(
const
char
*
name
,
float
*
dst
,
const
std
::
string
&
usage_text
);
private:
friend
class
Flags
;
bool
Parse
(
std
::
string
arg
,
bool
*
value_parsing_ok
)
const
;
std
::
string
name_
;
enum
{
TYPE_INT
,
TYPE_INT64
,
TYPE_BOOL
,
TYPE_STRING
,
TYPE_FLOAT
}
type_
;
int
*
int_value_
;
int64_t
*
int64_value_
;
bool
*
bool_value_
;
std
::
string
*
string_value_
;
float
*
float_value_
;
std
::
string
usage_text_
;
};
class
Flags
{
public:
// Parse the command line represented by argv[0, ..., (*argc)-1] to find flag
// instances matching flags in flaglist[]. Update the variables associated
// with matching flags, and remove the matching arguments from (*argc, argv).
// Return true iff all recognized flag values were parsed correctly, and the
// first remaining argument is not "--help".
static
bool
Parse
(
int
*
argc
,
char
**
argv
,
const
std
::
vector
<
Flag
>
&
flag_list
);
// Return a usage message with command line cmdline, and the
// usage_text strings in flag_list[].
static
std
::
string
Usage
(
const
std
::
string
&
cmdline
,
const
std
::
vector
<
Flag
>
&
flag_list
);
};
}
// namespace mace
#endif // MACE_UTILS_COMMAND_LINE_FLAGS_H_
mace/utils/string_util.h
浏览文件 @
6376fe6f
...
...
@@ -58,7 +58,7 @@ std::string MakeString(const std::vector<T> &args) {
std
::
stringstream
ss
;
ss
<<
"["
;
const
size_t
size
=
args
.
size
();
for
(
in
t
i
=
0
;
i
<
size
;
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
size
;
++
i
)
{
ss
<<
args
[
i
];
if
(
i
<
size
-
1
)
{
ss
<<
", "
;
...
...
mace/utils/tuner.h
浏览文件 @
6376fe6f
...
...
@@ -146,7 +146,7 @@ class Tuner {
for
(
iter
=
0
;
iter
<
num_runs
;
++
iter
)
{
res
=
func
(
params
,
timer
,
tuning_result
);
total_time_us
+=
timer
->
AccumulatedMicros
();
if
(
iter
>=
1
&&
total_time_us
>
100000
||
total_time_us
>
200000
)
{
if
(
(
iter
>=
1
&&
total_time_us
>
100000
)
||
total_time_us
>
200000
)
{
++
iter
;
break
;
}
...
...
@@ -165,7 +165,7 @@ class Tuner {
std
::
vector
<
param_type
>
*
)
>
&
func
,
Timer
*
timer
,
std
::
vector
<
param_type
>
*
opt_params
)
{
RetType
res
;
RetType
res
=
0
;
double
opt_time
=
std
::
numeric_limits
<
double
>::
max
();
auto
params
=
param_generator
();
std
::
vector
<
param_type
>
tuning_result
;
...
...
mace/utils/tuner_production.cc
浏览文件 @
6376fe6f
...
...
@@ -22,6 +22,7 @@ namespace mace {
bool
GetTuningParams
(
const
char
*
path
,
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
unsigned
int
>>
*
param_table
)
{
(
void
)(
path
);
extern
const
std
::
map
<
std
::
string
,
std
::
vector
<
unsigned
int
>>
kTuningParamsData
;
for
(
auto
it
=
kTuningParamsData
.
begin
();
it
!=
kTuningParamsData
.
end
();
...
...
mace/utils/tuner_test.cc
浏览文件 @
6376fe6f
...
...
@@ -54,7 +54,7 @@ TEST_F(TunerTest, SimpleRun) {
}
TEST_F
(
TunerTest
,
SimpleTune
)
{
int
expect
=
3
;
unsigned
int
expect
=
3
;
auto
TunerFunc
=
[
&
](
const
std
::
vector
<
unsigned
int
>
&
params
,
Timer
*
timer
,
std
::
vector
<
uint32_t
>
*
tuning_result
)
->
int
{
int
res
=
0
;
...
...
tools/sh_commands.py
浏览文件 @
6376fe6f
...
...
@@ -277,7 +277,7 @@ def bazel_build(target,
stdout_buff
=
[]
process_output
=
make_output_processor
(
stdout_buff
)
if
abi
==
"host"
:
p
=
sh
.
bazel
(
bazel_args
=
(
"build"
,
"-c"
,
"opt"
,
...
...
@@ -287,12 +287,17 @@ def bazel_build(target,
target
,
"--copt=-std=c++11"
,
"--copt=-D_GLIBCXX_USE_C99_MATH_TR1"
,
"--copt=-Werror=return-type"
,
"--copt=-Werror"
,
"--copt=-Wextra"
,
"--copt=-Wno-missing-field-initializers"
,
"--copt=-O3"
,
"--define"
,
"openmp=%s"
%
str
(
enable_openmp
).
lower
(),
"--define"
,
"production=%s"
%
str
(
production_mode
).
lower
(),
)
p
=
sh
.
bazel
(
*
bazel_args
,
_out
=
process_output
,
_bg
=
True
,
_err_to_out
=
True
)
...
...
@@ -311,7 +316,9 @@ def bazel_build(target,
"--cpu=%s"
%
abi
,
"--copt=-std=c++11"
,
"--copt=-D_GLIBCXX_USE_C99_MATH_TR1"
,
"--copt=-Werror=return-type"
,
"--copt=-Werror"
,
"--copt=-Wextra"
,
"--copt=-Wno-missing-field-initializers"
,
"--copt=-DMACE_OBFUSCATE_LITERALS"
,
"--copt=-O3"
,
"--define"
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录