Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
2ea7b6b1
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
2ea7b6b1
编写于
5月 08, 2020
作者:
L
luxuhui
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove openmp, fix scratch bug in micro and opt some codes
N/A Signed-off-by:
N
Luxuhui
<
luxuhui@xiaomi.com
>
上级
ab7499a5
变更
29
显示空白变更内容
内联
并排
Showing
29 changed file
with
242 addition
and
387 deletion
+242
-387
docs/user_guide/advanced_usage.rst
docs/user_guide/advanced_usage.rst
+3
-3
docs/user_guide/basic_usage_cmake.rst
docs/user_guide/basic_usage_cmake.rst
+1
-1
examples/android/build.sh
examples/android/build.sh
+1
-1
examples/android/macelibrary/build.gradle
examples/android/macelibrary/build.gradle
+1
-1
examples/android/macelibrary/src/main/cpp/image_classify.cc
examples/android/macelibrary/src/main/cpp/image_classify.cc
+4
-5
include/mace/public/mace.h
include/mace/public/mace.h
+0
-2
mace/BUILD.bazel
mace/BUILD.bazel
+0
-8
mace/core/BUILD.bazel
mace/core/BUILD.bazel
+1
-5
mace/core/runtime/cpu/cpu_runtime.cc
mace/core/runtime/cpu/cpu_runtime.cc
+6
-75
mace/core/runtime/cpu/cpu_runtime.h
mace/core/runtime/cpu/cpu_runtime.h
+5
-7
mace/libmace/BUILD.bazel
mace/libmace/BUILD.bazel
+2
-5
mace/mace.bzl
mace/mace.bzl
+83
-89
mace/ops/BUILD.bazel
mace/ops/BUILD.bazel
+6
-19
mace/ops/arm/fp16/gemv.h
mace/ops/arm/fp16/gemv.h
+79
-72
mace/ops/depthwise_conv2d.cc
mace/ops/depthwise_conv2d.cc
+1
-1
mace/ops/matmul.cc
mace/ops/matmul.cc
+4
-10
mace/tools/BUILD.bazel
mace/tools/BUILD.bazel
+0
-1
mace/tools/mace_run.cc
mace/tools/mace_run.cc
+4
-4
mace/utils/BUILD.bazel
mace/utils/BUILD.bazel
+1
-4
test/ccbenchmark/BUILD.bazel
test/ccbenchmark/BUILD.bazel
+4
-5
test/ccbenchmark/mace/benchmark_utils/test_benchmark_main.cc
test/ccbenchmark/mace/benchmark_utils/test_benchmark_main.cc
+2
-2
test/ccunit/BUILD.bazel
test/ccunit/BUILD.bazel
+1
-7
test/ccunit/mace/libmace/BUILD.bazel
test/ccunit/mace/libmace/BUILD.bazel
+4
-25
test/ccutils/BUILD.bazel
test/ccutils/BUILD.bazel
+1
-2
tools/converter.py
tools/converter.py
+5
-11
tools/device.py
tools/device.py
+6
-6
tools/python/micro/scratch_computer.py
tools/python/micro/scratch_computer.py
+6
-2
tools/python/transform/onnx_converter.py
tools/python/transform/onnx_converter.py
+11
-9
tools/sh_commands.py
tools/sh_commands.py
+0
-5
未找到文件。
docs/user_guide/advanced_usage.rst
浏览文件 @
2ea7b6b1
...
@@ -364,12 +364,12 @@ Tuning for specific SoC's GPU
...
@@ -364,12 +364,12 @@ Tuning for specific SoC's GPU
.. note::
.. note::
You
shoul
d plug in device(s) with the specific SoC(s).
You
must specify the ``target_socs`` in your YAML file an
d plug in device(s) with the specific SoC(s).
.. code-block:: sh
.. code-block:: sh
python tools/converter.py run --config=/path/to/model_deployment_file.yml
--validate
python tools/converter.py run --config=/path/to/model_deployment_file.yml
The command will generate two files in `build/${library_name}/opencl`, like the following dir-tree.
The command will generate two files in `build/${library_name}/opencl`, like the following dir-tree.
...
@@ -487,7 +487,7 @@ the detailed information is in :doc:`benchmark`.
...
@@ -487,7 +487,7 @@ the detailed information is in :doc:`benchmark`.
-
default
-
default
-
commands
-
commands
-
explanation
-
explanation
*
-
--
omp_
num_threads
*
-
--
num_threads
-
int
-
int
-
-
1
-
-
1
-
``
run
``
-
``
run
``
...
...
docs/user_guide/basic_usage_cmake.rst
浏览文件 @
2ea7b6b1
...
@@ -180,4 +180,4 @@ Please refer to \ ``mace/tools/mace_run.cc``\ for full usage. The following list
...
@@ -180,4 +180,4 @@ Please refer to \ ``mace/tools/mace_run.cc``\ for full usage. The following list
// 5. Run the model
// 5. Run the model
MaceStatus status = engine.Run(inputs, &outputs);
MaceStatus status = engine.Run(inputs, &outputs);
More details are in :doc:`advanced_usage`.
More details are in :doc:`advanced_usage
_cmake
`.
examples/android/build.sh
浏览文件 @
2ea7b6b1
...
@@ -54,7 +54,7 @@ cp -rf include/mace $INCLUDE_DIR
...
@@ -54,7 +54,7 @@ cp -rf include/mace $INCLUDE_DIR
cp
-rf
build/mobilenet/include/mace/public/
*
.h
$INCLUDE_DIR
/mace/public/
cp
-rf
build/mobilenet/include/mace/public/
*
.h
$INCLUDE_DIR
/mace/public/
cp
-rf
build/mobilenet/model
$LIBRARY_DIR
cp
-rf
build/mobilenet/model
$LIBRARY_DIR
bazel build
--config
android
--config
optimization
$BAZEL_LIBMACE_TARGET
--define
neon
=
true
--define
open
mp
=
true
--define
open
cl
=
true
--define
quantize
=
true
--cpu
=
$TARGET_ABI
bazel build
--config
android
--config
optimization
$BAZEL_LIBMACE_TARGET
--define
neon
=
true
--define
opencl
=
true
--define
quantize
=
true
--cpu
=
$TARGET_ABI
cp
-rf
$BAZEL_GEN_LIBMACE_PATH
$LIBMACE_DIR
cp
-rf
$BAZEL_GEN_LIBMACE_PATH
$LIBMACE_DIR
if
[
$MACE_LINK_TYPE
==
"dynamic"
]
;
then
if
[
$MACE_LINK_TYPE
==
"dynamic"
]
;
then
...
...
examples/android/macelibrary/build.gradle
浏览文件 @
2ea7b6b1
...
@@ -16,7 +16,7 @@ android {
...
@@ -16,7 +16,7 @@ android {
externalNativeBuild
{
externalNativeBuild
{
cmake
{
cmake
{
cppFlags
"-std=c++11
-fopenmp
"
cppFlags
"-std=c++11"
abiFilters
"arm64-v8a"
abiFilters
"arm64-v8a"
}
}
}
}
...
...
examples/android/macelibrary/src/main/cpp/image_classify.cc
浏览文件 @
2ea7b6b1
...
@@ -95,7 +95,7 @@ Java_com_xiaomi_mace_JniMaceUtils_maceMobilenetCreateGPUContext(
...
@@ -95,7 +95,7 @@ Java_com_xiaomi_mace_JniMaceUtils_maceMobilenetCreateGPUContext(
JNIEXPORT
jint
JNICALL
JNIEXPORT
jint
JNICALL
Java_com_xiaomi_mace_JniMaceUtils_maceMobilenetCreateEngine
(
Java_com_xiaomi_mace_JniMaceUtils_maceMobilenetCreateEngine
(
JNIEnv
*
env
,
jclass
thisObj
,
jint
omp_
num_threads
,
jint
cpu_affinity_policy
,
JNIEnv
*
env
,
jclass
thisObj
,
jint
num_threads
,
jint
cpu_affinity_policy
,
jint
gpu_perf_hint
,
jint
gpu_priority_hint
,
jint
gpu_perf_hint
,
jint
gpu_priority_hint
,
jstring
model_name_str
,
jstring
device
)
{
jstring
model_name_str
,
jstring
device
)
{
MaceContext
&
mace_context
=
GetMaceContext
();
MaceContext
&
mace_context
=
GetMaceContext
();
...
@@ -110,14 +110,13 @@ Java_com_xiaomi_mace_JniMaceUtils_maceMobilenetCreateEngine(
...
@@ -110,14 +110,13 @@ Java_com_xiaomi_mace_JniMaceUtils_maceMobilenetCreateEngine(
mace
::
MaceStatus
status
;
mace
::
MaceStatus
status
;
mace
::
MaceEngineConfig
config
(
mace_context
.
device_type
);
mace
::
MaceEngineConfig
config
(
mace_context
.
device_type
);
status
=
config
.
SetCPUThreadPolicy
(
status
=
config
.
SetCPUThreadPolicy
(
omp_
num_threads
,
num_threads
,
static_cast
<
mace
::
CPUAffinityPolicy
>
(
cpu_affinity_policy
));
static_cast
<
mace
::
CPUAffinityPolicy
>
(
cpu_affinity_policy
));
if
(
status
!=
mace
::
MaceStatus
::
MACE_SUCCESS
)
{
if
(
status
!=
mace
::
MaceStatus
::
MACE_SUCCESS
)
{
__android_log_print
(
ANDROID_LOG_ERROR
,
__android_log_print
(
ANDROID_LOG_ERROR
,
"image_classify attrs"
,
"image_classify attrs"
,
"openmp result: %s, threads: %d, cpu: %d"
,
"threads: %d, cpu: %d"
,
status
.
information
().
c_str
(),
omp_num_threads
,
num_threads
,
cpu_affinity_policy
);
cpu_affinity_policy
);
}
}
if
(
mace_context
.
device_type
==
mace
::
DeviceType
::
GPU
)
{
if
(
mace_context
.
device_type
==
mace
::
DeviceType
::
GPU
)
{
config
.
SetGPUContext
(
mace_context
.
gpu_context
);
config
.
SetGPUContext
(
mace_context
.
gpu_context
);
...
...
include/mace/public/mace.h
浏览文件 @
2ea7b6b1
...
@@ -316,8 +316,6 @@ class MACE_API MaceEngineConfig {
...
@@ -316,8 +316,6 @@ class MACE_API MaceEngineConfig {
/// (AFFINITY_NONE) cores according to the policy. The threads number will
/// (AFFINITY_NONE) cores according to the policy. The threads number will
/// also be truncated to the corresponding cores number when num_threads_hint
/// also be truncated to the corresponding cores number when num_threads_hint
/// is larger than it.
/// is larger than it.
/// The OpenMP threads will be bind to (via sched_setaffinity) big cores
/// (AFFINITY_BIG_ONLY) and little cores (AFFINITY_LITTLE_ONLY).
///
///
/// \param num_threads_hint it is only a hint.
/// \param num_threads_hint it is only a hint.
/// \param policy one of CPUAffinityPolicy
/// \param policy one of CPUAffinityPolicy
...
...
mace/BUILD.bazel
浏览文件 @
2ea7b6b1
...
@@ -108,14 +108,6 @@ config_setting(
...
@@ -108,14 +108,6 @@ config_setting(
visibility
=
[
"//visibility:public"
],
visibility
=
[
"//visibility:public"
],
)
)
config_setting
(
name
=
"openmp_enabled"
,
define_values
=
{
"openmp"
:
"true"
,
},
visibility
=
[
"//visibility:public"
],
)
config_setting
(
config_setting
(
name
=
"opencl_enabled"
,
name
=
"opencl_enabled"
,
define_values
=
{
define_values
=
{
...
...
mace/core/BUILD.bazel
浏览文件 @
2ea7b6b1
...
@@ -17,7 +17,6 @@ load(
...
@@ -17,7 +17,6 @@ load(
"if_not_apu_enabled"
,
"if_not_apu_enabled"
,
"if_not_hexagon_enabled"
,
"if_not_hexagon_enabled"
,
"if_opencl_enabled"
,
"if_opencl_enabled"
,
"if_openmp_enabled"
,
"if_quantize_enabled"
,
"if_quantize_enabled"
,
"if_rpcmem_enabled"
,
"if_rpcmem_enabled"
,
)
)
...
@@ -81,10 +80,7 @@ cc_library(
...
@@ -81,10 +80,7 @@ cc_library(
"-Werror"
,
"-Werror"
,
"-Wextra"
,
"-Wextra"
,
"-Wno-missing-field-initializers"
,
"-Wno-missing-field-initializers"
,
]
+
if_openmp_enabled
([
]
+
if_opencl_enabled
([
"-fopenmp"
,
"-DMACE_ENABLE_OPENMP"
,
])
+
if_opencl_enabled
([
"-DMACE_ENABLE_OPENCL"
,
"-DMACE_ENABLE_OPENCL"
,
])
+
if_quantize_enabled
([
])
+
if_quantize_enabled
([
"-DMACE_ENABLE_QUANTIZE"
,
"-DMACE_ENABLE_QUANTIZE"
,
...
...
mace/core/runtime/cpu/cpu_runtime.cc
浏览文件 @
2ea7b6b1
...
@@ -14,10 +14,6 @@
...
@@ -14,10 +14,6 @@
#include "mace/core/runtime/cpu/cpu_runtime.h"
#include "mace/core/runtime/cpu/cpu_runtime.h"
#ifdef MACE_ENABLE_OPENMP
#include <omp.h>
#endif
#include <algorithm>
#include <algorithm>
#include <cerrno>
#include <cerrno>
#include <cmath>
#include <cmath>
...
@@ -35,62 +31,7 @@
...
@@ -35,62 +31,7 @@
namespace
mace
{
namespace
mace
{
int
MaceOpenMPThreadCount
=
1
;
MaceStatus
CPURuntime
::
SetThreadsHintAndAffinityPolicy
(
enum
SchedulePolicy
{
SCHED_STATIC
,
SCHED_GUIDED
,
};
namespace
{
MaceStatus
SetOpenMPThreadsAndAffinityCPUs
(
int
omp_num_threads
,
const
std
::
vector
<
size_t
>
&
cpu_ids
,
SchedulePolicy
schedule_policy
)
{
MaceOpenMPThreadCount
=
omp_num_threads
;
SchedSetAffinity
(
cpu_ids
);
#ifdef MACE_ENABLE_OPENMP
VLOG
(
1
)
<<
"Set OpenMP threads number: "
<<
omp_num_threads
<<
", CPU core IDs: "
<<
MakeString
(
cpu_ids
);
if
(
schedule_policy
==
SCHED_GUIDED
)
{
omp_set_schedule
(
omp_sched_guided
,
1
);
}
else
if
(
schedule_policy
==
SCHED_STATIC
)
{
omp_set_schedule
(
omp_sched_static
,
0
);
}
else
{
LOG
(
WARNING
)
<<
"Unknown schedule policy: "
<<
schedule_policy
;
}
omp_set_num_threads
(
omp_num_threads
);
#else
MACE_UNUSED
(
omp_num_threads
);
MACE_UNUSED
(
schedule_policy
);
VLOG
(
2
)
<<
"Set OpenMP threads number failed: OpenMP not enabled."
;
#endif
#ifdef MACE_ENABLE_OPENMP
std
::
vector
<
MaceStatus
>
status
(
omp_num_threads
,
MaceStatus
::
MACE_INVALID_ARGS
);
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
omp_num_threads
;
++
i
)
{
VLOG
(
1
)
<<
"Set affinity for OpenMP thread "
<<
omp_get_thread_num
()
<<
"/"
<<
omp_get_num_threads
();
status
[
i
]
=
SchedSetAffinity
(
cpu_ids
);
}
for
(
int
i
=
0
;
i
<
omp_num_threads
;
++
i
)
{
if
(
status
[
i
]
!=
MaceStatus
::
MACE_SUCCESS
)
return
MaceStatus
::
MACE_INVALID_ARGS
;
}
return
MaceStatus
::
MACE_SUCCESS
;
#else
MaceStatus
status
=
SchedSetAffinity
(
cpu_ids
);
VLOG
(
1
)
<<
"Set affinity without OpenMP: "
<<
MakeString
(
cpu_ids
);
return
status
;
#endif
}
}
// namespace
MaceStatus
CPURuntime
::
SetOpenMPThreadsAndAffinityPolicy
(
int
num_threads_hint
,
int
num_threads_hint
,
CPUAffinityPolicy
policy
,
CPUAffinityPolicy
policy
,
void
*
gemm_context
)
{
void
*
gemm_context
)
{
...
@@ -115,19 +56,8 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy(
...
@@ -115,19 +56,8 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy(
#else
#else
MACE_UNUSED
(
gemm_context
);
MACE_UNUSED
(
gemm_context
);
#endif // MACE_ENABLE_QUANTIZE
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENMP
omp_set_num_threads
(
num_threads_hint
);
#else
VLOG
(
2
)
<<
"Set OpenMP threads number failed: OpenMP not enabled."
;
#endif
return
MaceStatus
::
MACE_SUCCESS
;
}
SchedulePolicy
sched_policy
=
SCHED_GUIDED
;
return
MaceStatus
::
MACE_SUCCESS
;
float
first_freq
=
cpu_max_freqs
[
cores_to_use
[
0
]];
float
last_freq
=
cpu_max_freqs
[
cores_to_use
[
cores_to_use
.
size
()
-
1
]];
if
(
std
::
abs
(
first_freq
-
last_freq
)
<
1e-6
)
{
sched_policy
=
SCHED_STATIC
;
}
}
#ifdef MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_QUANTIZE
...
@@ -137,9 +67,10 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy(
...
@@ -137,9 +67,10 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy(
}
}
#endif // MACE_ENABLE_QUANTIZE
#endif // MACE_ENABLE_QUANTIZE
return
SetOpenMPThreadsAndAffinityCPUs
(
num_threads_hint
,
MaceStatus
status
=
SchedSetAffinity
(
cores_to_use
);
cores_to_use
,
VLOG
(
1
)
<<
"Set affinity : "
<<
MakeString
(
cores_to_use
);
sched_policy
);
return
status
;
}
}
}
// namespace mace
}
// namespace mace
...
...
mace/core/runtime/cpu/cpu_runtime.h
浏览文件 @
2ea7b6b1
...
@@ -29,8 +29,6 @@
...
@@ -29,8 +29,6 @@
namespace
mace
{
namespace
mace
{
extern
int
MaceOpenMPThreadCount
;
class
CPURuntime
{
class
CPURuntime
{
public:
public:
CPURuntime
(
const
int
num_threads
,
CPURuntime
(
const
int
num_threads
,
...
@@ -43,7 +41,7 @@ class CPURuntime {
...
@@ -43,7 +41,7 @@ class CPURuntime {
#ifdef MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_QUANTIZE
MACE_CHECK_NOTNULL
(
GetGemmlowpContext
());
MACE_CHECK_NOTNULL
(
GetGemmlowpContext
());
#endif // MACE_ENABLE_QUANTIZE
#endif // MACE_ENABLE_QUANTIZE
Set
OpenMPThreads
AndAffinityPolicy
(
num_threads_
,
Set
ThreadsHint
AndAffinityPolicy
(
num_threads_
,
policy_
,
policy_
,
gemm_context_
);
gemm_context_
);
}
}
...
@@ -78,8 +76,8 @@ class CPURuntime {
...
@@ -78,8 +76,8 @@ class CPURuntime {
}
}
private:
private:
MaceStatus
Set
OpenMPThreads
AndAffinityPolicy
(
MaceStatus
Set
ThreadsHint
AndAffinityPolicy
(
int
omp_
num_threads_hint
,
int
num_threads_hint
,
CPUAffinityPolicy
policy
,
CPUAffinityPolicy
policy
,
void
*
gemm_context
);
void
*
gemm_context
);
...
...
mace/libmace/BUILD.bazel
浏览文件 @
2ea7b6b1
...
@@ -20,7 +20,6 @@ load(
...
@@ -20,7 +20,6 @@ load(
"if_linux_base"
,
"if_linux_base"
,
"if_neon_enabled"
,
"if_neon_enabled"
,
"if_opencl_enabled"
,
"if_opencl_enabled"
,
"if_openmp_enabled"
,
"if_quantize_enabled"
,
"if_quantize_enabled"
,
"if_rpcmem_enabled"
,
"if_rpcmem_enabled"
,
)
)
...
@@ -33,7 +32,7 @@ cc_library(
...
@@ -33,7 +32,7 @@ cc_library(
copts
=
[
copts
=
[
"-Werror"
,
"-Werror"
,
"-Wextra"
,
"-Wextra"
,
]
+
if_
openmp_enabled
([
"-fopenmp"
])
+
if_
neon_enabled
([
]
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
,
"-DMACE_ENABLE_NEON"
,
])
+
if_android_armv7
([
])
+
if_android_armv7
([
"-mfpu=neon"
,
"-mfpu=neon"
,
...
@@ -70,9 +69,7 @@ cc_binary(
...
@@ -70,9 +69,7 @@ cc_binary(
"-Wl,--version-script"
,
"-Wl,--version-script"
,
"$(location //mace/libmace:mace_version_script.lds)"
,
"$(location //mace/libmace:mace_version_script.lds)"
,
],
],
)
+
if_openmp_enabled
([
),
"-fopenmp"
,
]),
linkshared
=
1
,
linkshared
=
1
,
linkstatic
=
0
,
linkstatic
=
0
,
deps
=
[
deps
=
[
...
...
mace/mace.bzl
浏览文件 @
2ea7b6b1
...
@@ -45,7 +45,7 @@ def if_arm_linux_aarch64(a):
...
@@ -45,7 +45,7 @@ def if_arm_linux_aarch64(a):
def
if_arm_linux_armhf
(
a
):
def
if_arm_linux_armhf
(
a
):
return
select
({
return
select
({
"//mace:arm_linux_armhf"
:
a
,
"//mace:arm_linux_armhf"
:
a
,
"//conditions:default"
:
[]
"//conditions:default"
:
[],
})
})
def
if_neon_enabled
(
a
,
default_value
=
[]):
def
if_neon_enabled
(
a
,
default_value
=
[]):
...
@@ -91,12 +91,6 @@ def if_not_apu_enabled(a):
...
@@ -91,12 +91,6 @@ def if_not_apu_enabled(a):
"//conditions:default"
:
a
,
"//conditions:default"
:
a
,
})
})
def
if_openmp_enabled
(
a
):
return
select
({
"//mace:openmp_enabled"
:
a
,
"//conditions:default"
:
[],
})
def
if_opencl_enabled
(
a
,
default_value
=
[]):
def
if_opencl_enabled
(
a
,
default_value
=
[]):
return
select
({
return
select
({
"//mace:opencl_enabled"
:
a
,
"//mace:opencl_enabled"
:
a
,
...
@@ -126,7 +120,7 @@ def mace_version_genrule():
...
@@ -126,7 +120,7 @@ def mace_version_genrule():
name
=
"mace_version_gen"
,
name
=
"mace_version_gen"
,
srcs
=
[
str
(
Label
(
"@local_version_config//:gen/version"
))],
srcs
=
[
str
(
Label
(
"@local_version_config//:gen/version"
))],
outs
=
[
"version/version.cc"
],
outs
=
[
"version/version.cc"
],
cmd
=
"cat $(SRCS) > $@;"
cmd
=
"cat $(SRCS) > $@;"
,
)
)
def
encrypt_opencl_kernel_genrule
():
def
encrypt_opencl_kernel_genrule
():
...
...
mace/ops/BUILD.bazel
浏览文件 @
2ea7b6b1
...
@@ -14,7 +14,6 @@ load(
...
@@ -14,7 +14,6 @@ load(
"if_hexagon_enabled"
,
"if_hexagon_enabled"
,
"if_neon_enabled"
,
"if_neon_enabled"
,
"if_opencl_enabled"
,
"if_opencl_enabled"
,
"if_openmp_enabled"
,
"if_quantize_enabled"
,
"if_quantize_enabled"
,
)
)
...
@@ -36,9 +35,7 @@ cc_library(
...
@@ -36,9 +35,7 @@ cc_library(
"-Werror"
,
"-Werror"
,
"-Wextra"
,
"-Wextra"
,
"-Wno-missing-field-initializers"
,
"-Wno-missing-field-initializers"
,
]
+
if_openmp_enabled
([
]
+
if_neon_enabled
([
"-fopenmp"
,
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
,
"-DMACE_ENABLE_NEON"
,
])
+
if_android_armv7
([
])
+
if_android_armv7
([
"-mfpu=neon-fp16"
,
"-mfpu=neon-fp16"
,
...
@@ -77,9 +74,7 @@ cc_library(
...
@@ -77,9 +74,7 @@ cc_library(
"-Werror"
,
"-Werror"
,
"-Wextra"
,
"-Wextra"
,
"-Wno-missing-field-initializers"
,
"-Wno-missing-field-initializers"
,
]
+
if_openmp_enabled
([
]
+
if_neon_enabled
([
"-fopenmp"
,
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
,
"-DMACE_ENABLE_NEON"
,
])
+
if_android_armv7
([
])
+
if_android_armv7
([
"-mfpu=neon-fp16"
,
"-mfpu=neon-fp16"
,
...
@@ -134,9 +129,7 @@ cc_library(
...
@@ -134,9 +129,7 @@ cc_library(
"-Werror"
,
"-Werror"
,
"-Wextra"
,
"-Wextra"
,
"-Wno-missing-field-initializers"
,
"-Wno-missing-field-initializers"
,
]
+
if_openmp_enabled
([
]
+
if_neon_enabled
([
"-fopenmp"
,
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
,
"-DMACE_ENABLE_NEON"
,
])
+
if_android_armv7
([
])
+
if_android_armv7
([
"-mfpu=neon-fp16"
,
"-mfpu=neon-fp16"
,
...
@@ -176,9 +169,7 @@ cc_library(
...
@@ -176,9 +169,7 @@ cc_library(
"-Werror"
,
"-Werror"
,
"-Wextra"
,
"-Wextra"
,
"-Wno-missing-field-initializers"
,
"-Wno-missing-field-initializers"
,
]
+
if_openmp_enabled
([
]
+
if_neon_enabled
([
"-fopenmp"
,
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
,
"-DMACE_ENABLE_NEON"
,
])
+
if_android_armv7
([
])
+
if_android_armv7
([
"-mfpu=neon-fp16"
,
"-mfpu=neon-fp16"
,
...
@@ -221,9 +212,7 @@ cc_library(
...
@@ -221,9 +212,7 @@ cc_library(
"-Werror"
,
"-Werror"
,
"-Wextra"
,
"-Wextra"
,
"-Wno-missing-field-initializers"
,
"-Wno-missing-field-initializers"
,
]
+
if_openmp_enabled
([
]
+
if_neon_enabled
([
"-fopenmp"
,
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
,
"-DMACE_ENABLE_NEON"
,
])
+
if_android_armv7
([
])
+
if_android_armv7
([
"-mfpu=neon-fp16"
,
"-mfpu=neon-fp16"
,
...
@@ -263,9 +252,7 @@ cc_library(
...
@@ -263,9 +252,7 @@ cc_library(
"-Werror"
,
"-Werror"
,
"-Wextra"
,
"-Wextra"
,
"-Wno-missing-field-initializers"
,
"-Wno-missing-field-initializers"
,
]
+
if_openmp_enabled
([
]
+
if_neon_enabled
([
"-fopenmp"
,
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
,
"-DMACE_ENABLE_NEON"
,
])
+
if_android_armv7
([
])
+
if_android_armv7
([
"-mfpu=neon-fp16"
,
"-mfpu=neon-fp16"
,
...
...
mace/ops/arm/fp16/gemv.h
浏览文件 @
2ea7b6b1
...
@@ -21,8 +21,9 @@
...
@@ -21,8 +21,9 @@
#define MACE_ENABLE_FP16_NEON
#define MACE_ENABLE_FP16_NEON
#endif
#endif
#include "mace/core/ops/op_context.h"
#include "mace/core/types.h"
#include "mace/core/types.h"
#include "mace/utils/thread_pool.h"
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
#if defined(MACE_ENABLE_NEON) && defined(__ANDROID__)
#include <arm_neon.h>
#include <arm_neon.h>
...
@@ -38,7 +39,8 @@ namespace ops {
...
@@ -38,7 +39,8 @@ namespace ops {
template
<
typename
INPUT_TYPE_LEFT
,
template
<
typename
INPUT_TYPE_LEFT
,
typename
INPUT_TYPE_RIGHT
,
typename
INPUT_TYPE_RIGHT
,
typename
OUTPUT_TYPE
>
typename
OUTPUT_TYPE
>
void
FP16Gemv
(
const
INPUT_TYPE_LEFT
*
m_ptr
,
void
FP16Gemv
(
OpContext
*
context
,
const
INPUT_TYPE_LEFT
*
m_ptr
,
const
INPUT_TYPE_RIGHT
*
v_ptr
,
const
INPUT_TYPE_RIGHT
*
v_ptr
,
const
index_t
height
,
const
index_t
height
,
const
index_t
width
,
const
index_t
width
,
...
@@ -46,13 +48,17 @@ void FP16Gemv(const INPUT_TYPE_LEFT *m_ptr,
...
@@ -46,13 +48,17 @@ void FP16Gemv(const INPUT_TYPE_LEFT *m_ptr,
#if defined(MACE_ENABLE_FP16_NEON) && defined(__ANDROID__)
#if defined(MACE_ENABLE_FP16_NEON) && defined(__ANDROID__)
template
<
>
template
<
>
void
FP16Gemv
<
float16_t
,
float
,
float
>
(
const
float16_t
*
m_ptr
,
void
FP16Gemv
<
float16_t
,
float
,
float
>
(
OpContext
*
context
,
const
float16_t
*
m_ptr
,
const
float
*
v_ptr
,
const
float
*
v_ptr
,
const
index_t
height
,
const
index_t
height
,
const
index_t
width
,
const
index_t
width
,
float
*
out_ptr
)
{
float
*
out_ptr
)
{
#pragma omp parallel for
utils
::
ThreadPool
&
thread_pool
=
for
(
index_t
h
=
0
;
h
<
height
;
++
h
)
{
context
->
device
()
->
cpu_runtime
()
->
thread_pool
();
thread_pool
.
Compute1D
([
=
](
index_t
start0
,
index_t
end0
,
index_t
step0
)
{
for
(
index_t
h
=
start0
;
h
<
end0
;
h
+=
step0
)
{
const
float16_t
*
m_ptr0
=
m_ptr
+
h
*
width
;
const
float16_t
*
m_ptr0
=
m_ptr
+
h
*
width
;
const
float
*
v_ptr0
=
v_ptr
;
const
float
*
v_ptr0
=
v_ptr
;
float
*
out_ptr0
=
out_ptr
+
h
;
float
*
out_ptr0
=
out_ptr
+
h
;
...
@@ -118,6 +124,7 @@ void FP16Gemv<float16_t, float, float>(const float16_t *m_ptr,
...
@@ -118,6 +124,7 @@ void FP16Gemv<float16_t, float, float>(const float16_t *m_ptr,
}
}
*
out_ptr0
++
=
sum0
;
*
out_ptr0
++
=
sum0
;
}
}
},
0
,
height
,
1
);
}
}
#endif // MACE_ENABLE_FP16_NEON && __ANDROID__
#endif // MACE_ENABLE_FP16_NEON && __ANDROID__
...
...
mace/ops/depthwise_conv2d.cc
浏览文件 @
2ea7b6b1
...
@@ -20,7 +20,7 @@
...
@@ -20,7 +20,7 @@
#ifdef MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_QUANTIZE
#include "mace/ops/arm/q8/quantization_util.h"
#include "mace/ops/arm/q8/quantization_util.h"
// We reuse TensorFlow Lite's optimized depthwiseconv_uint8 and parallelized it
// We reuse TensorFlow Lite's optimized depthwiseconv_uint8 and parallelized it
// using
OpenMP
for MACE's quantized depthwise_conv2d.
// using
thread pool
for MACE's quantized depthwise_conv2d.
#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h"
#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h"
#endif // MACE_ENABLE_QUANTIZE
#endif // MACE_ENABLE_QUANTIZE
...
...
mace/ops/matmul.cc
浏览文件 @
2ea7b6b1
...
@@ -567,21 +567,15 @@ class MatMulOp<CPU, float16_t> : public MatMulOpBase {
...
@@ -567,21 +567,15 @@ class MatMulOp<CPU, float16_t> : public MatMulOpBase {
B
->
dtype
()
==
DT_FLOAT
)
{
B
->
dtype
()
==
DT_FLOAT
)
{
auto
*
a_ptr_base
=
A
->
data
<
float16_t
>
();
auto
*
a_ptr_base
=
A
->
data
<
float16_t
>
();
auto
*
b_ptr_base
=
B
->
data
<
float
>
();
auto
*
b_ptr_base
=
B
->
data
<
float
>
();
FP16Gemv
(
a_ptr_base
,
FP16Gemv
(
context
,
a_ptr_base
,
b_ptr_base
,
b_ptr_base
,
height
,
K
,
c_ptr_base
);
height
,
K
,
c_ptr_base
);
return
MaceStatus
::
MACE_SUCCESS
;
return
MaceStatus
::
MACE_SUCCESS
;
}
else
if
(
height
==
1
&&
transpose_b_
&&
A
->
dtype
()
==
DT_FLOAT
&&
}
else
if
(
height
==
1
&&
transpose_b_
&&
A
->
dtype
()
==
DT_FLOAT
&&
B
->
dtype
()
==
DT_FLOAT16
)
{
B
->
dtype
()
==
DT_FLOAT16
)
{
auto
*
b_ptr_base
=
B
->
data
<
float16_t
>
();
auto
*
b_ptr_base
=
B
->
data
<
float16_t
>
();
auto
*
a_ptr_base
=
A
->
data
<
float
>
();
auto
*
a_ptr_base
=
A
->
data
<
float
>
();
FP16Gemv
(
b_ptr_base
,
FP16Gemv
(
context
,
b_ptr_base
,
a_ptr_base
,
a_ptr_base
,
width
,
K
,
c_ptr_base
);
width
,
K
,
c_ptr_base
);
return
MaceStatus
::
MACE_SUCCESS
;
return
MaceStatus
::
MACE_SUCCESS
;
}
else
{
}
else
{
LOG
(
INFO
)
<<
"Matmul fp16 gemv args: "
<<
height
<<
" "
<<
width
<<
" "
LOG
(
INFO
)
<<
"Matmul fp16 gemv args: "
<<
height
<<
" "
<<
width
<<
" "
...
...
mace/tools/BUILD.bazel
浏览文件 @
2ea7b6b1
...
@@ -5,7 +5,6 @@ load(
...
@@ -5,7 +5,6 @@ load(
"if_android"
,
"if_android"
,
"if_hexagon_enabled"
,
"if_hexagon_enabled"
,
"if_opencl_enabled"
,
"if_opencl_enabled"
,
"if_openmp_enabled"
,
)
)
licenses
([
"notice"
])
# Apache 2.0
licenses
([
"notice"
])
# Apache 2.0
...
...
mace/tools/mace_run.cc
浏览文件 @
2ea7b6b1
...
@@ -150,7 +150,7 @@ DEFINE_int32(restart_round, 1, "restart round");
...
@@ -150,7 +150,7 @@ DEFINE_int32(restart_round, 1, "restart round");
DEFINE_int32
(
malloc_check_cycle
,
-
1
,
"malloc debug check cycle, -1 to disable"
);
DEFINE_int32
(
malloc_check_cycle
,
-
1
,
"malloc debug check cycle, -1 to disable"
);
DEFINE_int32
(
gpu_perf_hint
,
3
,
"0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
gpu_perf_hint
,
3
,
"0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
gpu_priority_hint
,
3
,
"0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
gpu_priority_hint
,
3
,
"0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
omp_num_threads
,
-
1
,
"num of openmp
threads"
);
DEFINE_int32
(
num_threads
,
-
1
,
"num of
threads"
);
DEFINE_int32
(
cpu_affinity_policy
,
1
,
DEFINE_int32
(
cpu_affinity_policy
,
1
,
"0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"
);
"0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"
);
DEFINE_bool
(
benchmark
,
false
,
"enable benchmark op"
);
DEFINE_bool
(
benchmark
,
false
,
"enable benchmark op"
);
...
@@ -170,10 +170,10 @@ bool RunModel(const std::string &model_name,
...
@@ -170,10 +170,10 @@ bool RunModel(const std::string &model_name,
MaceStatus
status
;
MaceStatus
status
;
MaceEngineConfig
config
(
device_type
);
MaceEngineConfig
config
(
device_type
);
status
=
config
.
SetCPUThreadPolicy
(
status
=
config
.
SetCPUThreadPolicy
(
FLAGS_
omp_
num_threads
,
FLAGS_num_threads
,
static_cast
<
CPUAffinityPolicy
>
(
FLAGS_cpu_affinity_policy
));
static_cast
<
CPUAffinityPolicy
>
(
FLAGS_cpu_affinity_policy
));
if
(
status
!=
MaceStatus
::
MACE_SUCCESS
)
{
if
(
status
!=
MaceStatus
::
MACE_SUCCESS
)
{
LOG
(
WARNING
)
<<
"Set
openmp or
cpu affinity failed."
;
LOG
(
WARNING
)
<<
"Set cpu affinity failed."
;
}
}
#if defined(MACE_ENABLE_OPENCL) || defined(MACE_ENABLE_HTA)
#if defined(MACE_ENABLE_OPENCL) || defined(MACE_ENABLE_HTA)
std
::
shared_ptr
<
GPUContext
>
gpu_context
;
std
::
shared_ptr
<
GPUContext
>
gpu_context
;
...
@@ -544,7 +544,7 @@ int Main(int argc, char **argv) {
...
@@ -544,7 +544,7 @@ int Main(int argc, char **argv) {
LOG
(
INFO
)
<<
"restart_round: "
<<
FLAGS_restart_round
;
LOG
(
INFO
)
<<
"restart_round: "
<<
FLAGS_restart_round
;
LOG
(
INFO
)
<<
"gpu_perf_hint: "
<<
FLAGS_gpu_perf_hint
;
LOG
(
INFO
)
<<
"gpu_perf_hint: "
<<
FLAGS_gpu_perf_hint
;
LOG
(
INFO
)
<<
"gpu_priority_hint: "
<<
FLAGS_gpu_priority_hint
;
LOG
(
INFO
)
<<
"gpu_priority_hint: "
<<
FLAGS_gpu_priority_hint
;
LOG
(
INFO
)
<<
"
omp_num_threads: "
<<
FLAGS_omp
_num_threads
;
LOG
(
INFO
)
<<
"
num_threads: "
<<
FLAGS
_num_threads
;
LOG
(
INFO
)
<<
"cpu_affinity_policy: "
<<
FLAGS_cpu_affinity_policy
;
LOG
(
INFO
)
<<
"cpu_affinity_policy: "
<<
FLAGS_cpu_affinity_policy
;
auto
limit_opencl_kernel_time
=
getenv
(
"MACE_LIMIT_OPENCL_KERNEL_TIME"
);
auto
limit_opencl_kernel_time
=
getenv
(
"MACE_LIMIT_OPENCL_KERNEL_TIME"
);
if
(
limit_opencl_kernel_time
)
{
if
(
limit_opencl_kernel_time
)
{
...
...
mace/utils/BUILD.bazel
浏览文件 @
2ea7b6b1
...
@@ -12,7 +12,6 @@ load(
...
@@ -12,7 +12,6 @@ load(
"if_android"
,
"if_android"
,
"if_android_armv7"
,
"if_android_armv7"
,
"if_neon_enabled"
,
"if_neon_enabled"
,
"if_openmp_enabled"
,
)
)
cc_library
(
cc_library
(
...
@@ -41,9 +40,7 @@ cc_library(
...
@@ -41,9 +40,7 @@ cc_library(
"-Werror"
,
"-Werror"
,
"-Wextra"
,
"-Wextra"
,
"-Wno-missing-field-initializers"
,
"-Wno-missing-field-initializers"
,
]
+
if_openmp_enabled
([
]
+
if_neon_enabled
([
"-fopenmp"
,
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
,
"-DMACE_ENABLE_NEON"
,
])
+
if_android_armv7
([
])
+
if_android_armv7
([
"-mfpu=neon"
,
"-mfpu=neon"
,
...
...
test/ccbenchmark/BUILD.bazel
浏览文件 @
2ea7b6b1
...
@@ -11,19 +11,18 @@ load(
...
@@ -11,19 +11,18 @@ load(
"if_hexagon_enabled"
,
"if_hexagon_enabled"
,
"if_neon_enabled"
,
"if_neon_enabled"
,
"if_opencl_enabled"
,
"if_opencl_enabled"
,
"if_openmp_enabled"
,
"if_quantize_enabled"
,
"if_quantize_enabled"
,
)
)
cc_library
(
cc_library
(
name
=
"benchmark_utils"
,
name
=
"benchmark_utils"
,
testonly
=
1
,
testonly
=
1
,
hdrs
=
glob
([
"mace/benchmark_utils/*.h"
,
]),
srcs
=
glob
([
srcs
=
glob
([
"mace/benchmark_utils/*.cc"
,
"mace/benchmark_utils/*.cc"
,
]),
]),
hdrs
=
glob
([
"mace/benchmark_utils/*.h"
,
]),
copts
=
[
copts
=
[
"-Werror"
,
"-Werror"
,
"-Wextra"
,
"-Wextra"
,
...
@@ -31,9 +30,9 @@ cc_library(
...
@@ -31,9 +30,9 @@ cc_library(
],
],
strip_include_prefix
=
""
,
strip_include_prefix
=
""
,
deps
=
[
deps
=
[
"//external:gflags_nothreads"
,
"//mace/core"
,
"//mace/core"
,
"//test/ccutils"
,
"//test/ccutils"
,
"//external:gflags_nothreads"
,
],
],
)
)
...
...
test/ccbenchmark/mace/benchmark_utils/test_benchmark_main.cc
浏览文件 @
2ea7b6b1
...
@@ -20,7 +20,7 @@
...
@@ -20,7 +20,7 @@
#include "mace/ops/ops_test_util.h"
#include "mace/ops/ops_test_util.h"
DEFINE_string
(
filter
,
"all"
,
"op benchmark regex filter, eg:.*CONV.*"
);
DEFINE_string
(
filter
,
"all"
,
"op benchmark regex filter, eg:.*CONV.*"
);
DEFINE_int32
(
omp_num_threads
,
-
1
,
"num of openmp
threads"
);
DEFINE_int32
(
num_threads
,
-
1
,
"num of
threads"
);
DEFINE_int32
(
cpu_affinity_policy
,
1
,
DEFINE_int32
(
cpu_affinity_policy
,
1
,
"0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"
);
"0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"
);
...
@@ -32,7 +32,7 @@ int main(int argc, char **argv) {
...
@@ -32,7 +32,7 @@ int main(int argc, char **argv) {
// config runtime
// config runtime
mace
::
ops
::
test
::
OpTestContext
::
Get
(
mace
::
ops
::
test
::
OpTestContext
::
Get
(
FLAGS_
omp_
num_threads
,
FLAGS_num_threads
,
static_cast
<
mace
::
CPUAffinityPolicy
>
(
FLAGS_cpu_affinity_policy
));
static_cast
<
mace
::
CPUAffinityPolicy
>
(
FLAGS_cpu_affinity_policy
));
mace
::
testing
::
Benchmark
::
Run
(
FLAGS_filter
.
c_str
());
mace
::
testing
::
Benchmark
::
Run
(
FLAGS_filter
.
c_str
());
...
...
test/ccunit/BUILD.bazel
浏览文件 @
2ea7b6b1
...
@@ -12,7 +12,6 @@ load(
...
@@ -12,7 +12,6 @@ load(
"if_hta_enabled"
,
"if_hta_enabled"
,
"if_neon_enabled"
,
"if_neon_enabled"
,
"if_opencl_enabled"
,
"if_opencl_enabled"
,
"if_openmp_enabled"
,
"if_quantize_enabled"
,
"if_quantize_enabled"
,
)
)
...
@@ -49,9 +48,7 @@ cc_test(
...
@@ -49,9 +48,7 @@ cc_test(
"-Werror"
,
"-Werror"
,
"-Wextra"
,
"-Wextra"
,
"-Wno-missing-field-initializers"
,
"-Wno-missing-field-initializers"
,
]
+
if_openmp_enabled
([
]
+
if_neon_enabled
([
"-fopenmp"
,
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
,
"-DMACE_ENABLE_NEON"
,
])
+
if_android_armv7
([
])
+
if_android_armv7
([
"-mfpu=neon-fp16"
,
"-mfpu=neon-fp16"
,
...
@@ -65,9 +62,6 @@ cc_test(
...
@@ -65,9 +62,6 @@ cc_test(
])
+
if_hta_enabled
([
])
+
if_hta_enabled
([
"-DMACE_ENABLE_HTA"
,
"-DMACE_ENABLE_HTA"
,
]),
]),
linkopts
=
if_openmp_enabled
([
"-fopenmp"
,
]),
linkstatic
=
1
,
linkstatic
=
1
,
deps
=
[
deps
=
[
"//mace/ops"
,
"//mace/ops"
,
...
...
test/ccunit/mace/libmace/BUILD.bazel
浏览文件 @
2ea7b6b1
...
@@ -12,7 +12,6 @@ load(
...
@@ -12,7 +12,6 @@ load(
"if_hta_enabled"
,
"if_hta_enabled"
,
"if_neon_enabled"
,
"if_neon_enabled"
,
"if_opencl_enabled"
,
"if_opencl_enabled"
,
"if_openmp_enabled"
,
"if_quantize_enabled"
,
"if_quantize_enabled"
,
)
)
...
@@ -36,9 +35,7 @@ cc_test(
...
@@ -36,9 +35,7 @@ cc_test(
"-Werror"
,
"-Werror"
,
"-Wextra"
,
"-Wextra"
,
"-Wno-missing-field-initializers"
,
"-Wno-missing-field-initializers"
,
]
+
if_openmp_enabled
([
]
+
if_neon_enabled
([
"-fopenmp"
,
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
,
"-DMACE_ENABLE_NEON"
,
])
+
if_android_armv7
([
])
+
if_android_armv7
([
"-mfpu=neon"
,
"-mfpu=neon"
,
...
@@ -53,9 +50,6 @@ cc_test(
...
@@ -53,9 +50,6 @@ cc_test(
])
+
if_hta_enabled
([
])
+
if_hta_enabled
([
"-DMACE_ENABLE_HTA"
,
"-DMACE_ENABLE_HTA"
,
]),
]),
linkopts
=
if_openmp_enabled
([
"-fopenmp"
,
]),
linkstatic
=
1
,
linkstatic
=
1
,
deps
=
[
deps
=
[
":mace_api_test_header"
,
":mace_api_test_header"
,
...
@@ -73,9 +67,7 @@ cc_test(
...
@@ -73,9 +67,7 @@ cc_test(
"-Werror"
,
"-Werror"
,
"-Wextra"
,
"-Wextra"
,
"-Wno-missing-field-initializers"
,
"-Wno-missing-field-initializers"
,
]
+
if_openmp_enabled
([
]
+
if_neon_enabled
([
"-fopenmp"
,
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
,
"-DMACE_ENABLE_NEON"
,
])
+
if_android_armv7
([
])
+
if_android_armv7
([
"-mfpu=neon"
,
"-mfpu=neon"
,
...
@@ -90,9 +82,6 @@ cc_test(
...
@@ -90,9 +82,6 @@ cc_test(
])
+
if_hta_enabled
([
])
+
if_hta_enabled
([
"-DMACE_ENABLE_HTA"
,
"-DMACE_ENABLE_HTA"
,
]),
]),
linkopts
=
if_openmp_enabled
([
"-fopenmp"
,
]),
linkstatic
=
1
,
linkstatic
=
1
,
deps
=
[
deps
=
[
":mace_api_test_header"
,
":mace_api_test_header"
,
...
@@ -110,9 +99,7 @@ cc_test(
...
@@ -110,9 +99,7 @@ cc_test(
"-Werror"
,
"-Werror"
,
"-Wextra"
,
"-Wextra"
,
"-Wno-missing-field-initializers"
,
"-Wno-missing-field-initializers"
,
]
+
if_openmp_enabled
([
]
+
if_neon_enabled
([
"-fopenmp"
,
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
,
"-DMACE_ENABLE_NEON"
,
])
+
if_android_armv7
([
])
+
if_android_armv7
([
"-mfpu=neon"
,
"-mfpu=neon"
,
...
@@ -127,9 +114,6 @@ cc_test(
...
@@ -127,9 +114,6 @@ cc_test(
])
+
if_hta_enabled
([
])
+
if_hta_enabled
([
"-DMACE_ENABLE_HTA"
,
"-DMACE_ENABLE_HTA"
,
]),
]),
linkopts
=
if_openmp_enabled
([
"-fopenmp"
,
]),
linkstatic
=
1
,
linkstatic
=
1
,
deps
=
[
deps
=
[
"//mace/libmace"
,
"//mace/libmace"
,
...
@@ -146,9 +130,7 @@ cc_test(
...
@@ -146,9 +130,7 @@ cc_test(
"-Werror"
,
"-Werror"
,
"-Wextra"
,
"-Wextra"
,
"-Wno-missing-field-initializers"
,
"-Wno-missing-field-initializers"
,
]
+
if_openmp_enabled
([
]
+
if_neon_enabled
([
"-fopenmp"
,
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
,
"-DMACE_ENABLE_NEON"
,
])
+
if_android_armv7
([
])
+
if_android_armv7
([
"-mfpu=neon"
,
"-mfpu=neon"
,
...
@@ -163,9 +145,6 @@ cc_test(
...
@@ -163,9 +145,6 @@ cc_test(
])
+
if_hta_enabled
([
])
+
if_hta_enabled
([
"-DMACE_ENABLE_HTA"
,
"-DMACE_ENABLE_HTA"
,
]),
]),
linkopts
=
if_openmp_enabled
([
"-fopenmp"
,
]),
linkstatic
=
1
,
linkstatic
=
1
,
deps
=
[
deps
=
[
"//mace/libmace"
,
"//mace/libmace"
,
...
...
test/ccutils/BUILD.bazel
浏览文件 @
2ea7b6b1
...
@@ -11,7 +11,6 @@ load(
...
@@ -11,7 +11,6 @@ load(
"if_hexagon_enabled"
,
"if_hexagon_enabled"
,
"if_neon_enabled"
,
"if_neon_enabled"
,
"if_opencl_enabled"
,
"if_opencl_enabled"
,
"if_openmp_enabled"
,
"if_quantize_enabled"
,
"if_quantize_enabled"
,
)
)
...
@@ -29,7 +28,7 @@ cc_library(
...
@@ -29,7 +28,7 @@ cc_library(
copts
=
[
copts
=
[
"-Werror"
,
"-Werror"
,
"-Wextra"
,
"-Wextra"
,
]
+
if_
openmp_enabled
([
"-fopenmp"
])
+
if_
neon_enabled
([
]
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
,
"-DMACE_ENABLE_NEON"
,
])
+
if_android_armv7
([
])
+
if_android_armv7
([
"-mfpu=neon-fp16"
,
"-mfpu=neon-fp16"
,
...
...
tools/converter.py
浏览文件 @
2ea7b6b1
...
@@ -117,7 +117,7 @@ DataFormatStrs = [
...
@@ -117,7 +117,7 @@ DataFormatStrs = [
class
DefaultValues
(
object
):
class
DefaultValues
(
object
):
mace_lib_type
=
MACELibType
.
static
mace_lib_type
=
MACELibType
.
static
omp_
num_threads
=
-
1
,
num_threads
=
-
1
,
cpu_affinity_policy
=
1
,
cpu_affinity_policy
=
1
,
gpu_perf_hint
=
3
,
gpu_perf_hint
=
3
,
gpu_priority_hint
=
3
,
gpu_priority_hint
=
3
,
...
@@ -887,7 +887,7 @@ def convert_func(flags):
...
@@ -887,7 +887,7 @@ def convert_func(flags):
################################
################################
# run
# run
################################
################################
def
build_mace_run
(
configs
,
target_abi
,
toolchain
,
enable_openmp
,
def
build_mace_run
(
configs
,
target_abi
,
toolchain
,
address_sanitizer
,
mace_lib_type
,
debug_mode
):
address_sanitizer
,
mace_lib_type
,
debug_mode
):
library_name
=
configs
[
YAMLKeyword
.
library_name
]
library_name
=
configs
[
YAMLKeyword
.
library_name
]
...
@@ -913,7 +913,6 @@ def build_mace_run(configs, target_abi, toolchain, enable_openmp,
...
@@ -913,7 +913,6 @@ def build_mace_run(configs, target_abi, toolchain, enable_openmp,
enable_hexagon
=
hexagon_enabled
(
configs
),
enable_hexagon
=
hexagon_enabled
(
configs
),
enable_hta
=
hta_enabled
(
configs
),
enable_hta
=
hta_enabled
(
configs
),
enable_apu
=
apu_enabled
(
configs
),
enable_apu
=
apu_enabled
(
configs
),
enable_openmp
=
enable_openmp
,
enable_opencl
=
opencl_enabled
(
configs
),
enable_opencl
=
opencl_enabled
(
configs
),
enable_quantize
=
quantize_enabled
(
configs
),
enable_quantize
=
quantize_enabled
(
configs
),
enable_bfloat16
=
bfloat16_enabled
(
configs
),
enable_bfloat16
=
bfloat16_enabled
(
configs
),
...
@@ -961,7 +960,6 @@ def run_mace(flags):
...
@@ -961,7 +960,6 @@ def run_mace(flags):
build_mace_run
(
configs
,
build_mace_run
(
configs
,
target_abi
,
target_abi
,
toolchain
,
toolchain
,
flags
.
enable_openmp
,
flags
.
address_sanitizer
,
flags
.
address_sanitizer
,
flags
.
mace_lib_type
,
flags
.
mace_lib_type
,
flags
.
debug_mode
)
flags
.
debug_mode
)
...
@@ -1079,14 +1077,10 @@ def parse_args():
...
@@ -1079,14 +1077,10 @@ def parse_args():
default
=
DefaultValues
.
mace_lib_type
,
default
=
DefaultValues
.
mace_lib_type
,
help
=
"[static | dynamic], Which type MACE library to use."
)
help
=
"[static | dynamic], Which type MACE library to use."
)
run
.
add_argument
(
run
.
add_argument
(
"--enable_openmp"
,
"--num_threads"
,
action
=
"store_true"
,
help
=
"Enable openmp for multiple thread."
)
run
.
add_argument
(
"--omp_num_threads"
,
type
=
int
,
type
=
int
,
default
=
DefaultValues
.
omp_
num_threads
,
default
=
DefaultValues
.
num_threads
,
help
=
"num of
openmp
threads"
)
help
=
"num of threads"
)
run
.
add_argument
(
run
.
add_argument
(
"--cpu_affinity_policy"
,
"--cpu_affinity_policy"
,
type
=
int
,
type
=
int
,
...
...
tools/device.py
浏览文件 @
2ea7b6b1
...
@@ -173,7 +173,7 @@ class DeviceWrapper:
...
@@ -173,7 +173,7 @@ class DeviceWrapper:
opencl_binary_file
,
opencl_binary_file
,
opencl_parameter_file
,
opencl_parameter_file
,
libmace_dynamic_library_path
,
libmace_dynamic_library_path
,
omp_
num_threads
=-
1
,
num_threads
=-
1
,
cpu_affinity_policy
=
1
,
cpu_affinity_policy
=
1
,
gpu_perf_hint
=
3
,
gpu_perf_hint
=
3
,
gpu_priority_hint
=
3
,
gpu_priority_hint
=
3
,
...
@@ -189,11 +189,11 @@ class DeviceWrapper:
...
@@ -189,11 +189,11 @@ class DeviceWrapper:
benchmark
=
False
,
benchmark
=
False
,
):
):
six
.
print_
(
"* Run '%s' with round=%s, restart_round=%s, tuning=%s, "
six
.
print_
(
"* Run '%s' with round=%s, restart_round=%s, tuning=%s, "
"out_of_range_check=%s,
omp_
num_threads=%s, "
"out_of_range_check=%s, num_threads=%s, "
"cpu_affinity_policy=%s, gpu_perf_hint=%s, "
"cpu_affinity_policy=%s, gpu_perf_hint=%s, "
"gpu_priority_hint=%s"
%
"gpu_priority_hint=%s"
%
(
model_tag
,
running_round
,
restart_round
,
str
(
tuning
),
(
model_tag
,
running_round
,
restart_round
,
str
(
tuning
),
str
(
out_of_range_check
),
omp_
num_threads
,
str
(
out_of_range_check
),
num_threads
,
cpu_affinity_policy
,
gpu_perf_hint
,
gpu_priority_hint
))
cpu_affinity_policy
,
gpu_perf_hint
,
gpu_priority_hint
))
mace_model_path
=
""
mace_model_path
=
""
if
model_graph_format
==
ModelFormat
.
file
:
if
model_graph_format
==
ModelFormat
.
file
:
...
@@ -236,7 +236,7 @@ class DeviceWrapper:
...
@@ -236,7 +236,7 @@ class DeviceWrapper:
"--device=%s"
%
device_type
,
"--device=%s"
%
device_type
,
"--round=%s"
%
running_round
,
"--round=%s"
%
running_round
,
"--restart_round=%s"
%
restart_round
,
"--restart_round=%s"
%
restart_round
,
"--
omp_num_threads=%s"
%
omp_
num_threads
,
"--
num_threads=%s"
%
num_threads
,
"--cpu_affinity_policy=%s"
%
cpu_affinity_policy
,
"--cpu_affinity_policy=%s"
%
cpu_affinity_policy
,
"--gpu_perf_hint=%s"
%
gpu_perf_hint
,
"--gpu_perf_hint=%s"
%
gpu_perf_hint
,
"--gpu_priority_hint=%s"
%
gpu_priority_hint
,
"--gpu_priority_hint=%s"
%
gpu_priority_hint
,
...
@@ -336,7 +336,7 @@ class DeviceWrapper:
...
@@ -336,7 +336,7 @@ class DeviceWrapper:
"--device=%s"
%
device_type
,
"--device=%s"
%
device_type
,
"--round=%s"
%
running_round
,
"--round=%s"
%
running_round
,
"--restart_round=%s"
%
restart_round
,
"--restart_round=%s"
%
restart_round
,
"--
omp_num_threads=%s"
%
omp_
num_threads
,
"--
num_threads=%s"
%
num_threads
,
"--cpu_affinity_policy=%s"
%
cpu_affinity_policy
,
"--cpu_affinity_policy=%s"
%
cpu_affinity_policy
,
"--gpu_perf_hint=%s"
%
gpu_perf_hint
,
"--gpu_perf_hint=%s"
%
gpu_perf_hint
,
"--gpu_priority_hint=%s"
%
gpu_priority_hint
,
"--gpu_priority_hint=%s"
%
gpu_priority_hint
,
...
@@ -541,7 +541,7 @@ class DeviceWrapper:
...
@@ -541,7 +541,7 @@ class DeviceWrapper:
out_of_range_check
=
flags
.
gpu_out_of_range_check
,
out_of_range_check
=
flags
.
gpu_out_of_range_check
,
model_graph_format
=
configs
[
model_graph_format
=
configs
[
YAMLKeyword
.
model_graph_format
],
YAMLKeyword
.
model_graph_format
],
omp_num_threads
=
flags
.
omp_
num_threads
,
num_threads
=
flags
.
num_threads
,
cpu_affinity_policy
=
flags
.
cpu_affinity_policy
,
cpu_affinity_policy
=
flags
.
cpu_affinity_policy
,
gpu_perf_hint
=
flags
.
gpu_perf_hint
,
gpu_perf_hint
=
flags
.
gpu_perf_hint
,
gpu_priority_hint
=
flags
.
gpu_priority_hint
,
gpu_priority_hint
=
flags
.
gpu_priority_hint
,
...
...
tools/python/micro/scratch_computer.py
浏览文件 @
2ea7b6b1
...
@@ -31,7 +31,7 @@ class ScratchComputer:
...
@@ -31,7 +31,7 @@ class ScratchComputer:
MaceOp
.
Conv2D
:
self
.
scratch_size_no_need
,
MaceOp
.
Conv2D
:
self
.
scratch_size_no_need
,
MaceOp
.
Squeeze
:
self
.
scratch_size_of_squeeze
,
MaceOp
.
Squeeze
:
self
.
scratch_size_of_squeeze
,
MaceOp
.
Softmax
:
self
.
scratch_size_no_need
,
MaceOp
.
Softmax
:
self
.
scratch_size_no_need
,
MaceOp
.
Eltwise
:
self
.
scratch_size_
no_need
,
MaceOp
.
Eltwise
:
self
.
scratch_size_
eltwise
,
MaceOp
.
Activation
:
self
.
scratch_size_no_need
,
MaceOp
.
Activation
:
self
.
scratch_size_no_need
,
MaceOp
.
StridedSlice
:
self
.
scratch_size_no_need
,
MaceOp
.
StridedSlice
:
self
.
scratch_size_no_need
,
MaceOp
.
Reduce
:
self
.
scratch_size_no_need
,
MaceOp
.
Reduce
:
self
.
scratch_size_no_need
,
...
@@ -126,4 +126,8 @@ class ScratchComputer:
...
@@ -126,4 +126,8 @@ class ScratchComputer:
def
scratch_size_of_squeeze
(
self
,
op_def
):
def
scratch_size_of_squeeze
(
self
,
op_def
):
input0_dims
=
self
.
get_op_input_dims
(
op_def
,
0
)
input0_dims
=
self
.
get_op_input_dims
(
op_def
,
0
)
return
len
(
input0_dims
)
*
self
.
get_data_bytes
(
mace_pb2
.
DT_FLOAT
)
return
len
(
input0_dims
)
*
self
.
get_data_bytes
(
mace_pb2
.
DT_INT32
)
*
2
def
scratch_size_eltwise
(
self
,
op_def
):
input0_dims
=
self
.
get_op_input_dims
(
op_def
,
0
)
return
len
(
input0_dims
)
*
self
.
get_data_bytes
(
mace_pb2
.
DT_INT32
)
*
3
tools/python/transform/onnx_converter.py
浏览文件 @
2ea7b6b1
...
@@ -19,17 +19,19 @@ import six
...
@@ -19,17 +19,19 @@ import six
from
py_proto
import
mace_pb2
from
py_proto
import
mace_pb2
from
transform
import
base_converter
from
transform
import
base_converter
from
transform.base_converter
import
PoolingType
from
transform.base_converter
import
PaddingMode
from
transform.base_converter
import
ActivationType
from
transform.base_converter
import
ActivationType
from
transform.base_converter
import
ConverterUtil
from
transform.base_converter
import
DataFormat
from
transform.base_converter
import
EltwiseType
from
transform.base_converter
import
EltwiseType
from
transform.base_converter
import
ReduceType
from
transform.base_converter
import
FrameworkType
from
transform.base_converter
import
FrameworkType
from
transform.base_converter
import
RoundMode
from
transform.base_converter
import
DataFormat
from
transform.base_converter
import
MaceOp
from
transform.base_converter
import
MaceOp
from
transform.base_converter
import
MaceKeyword
from
transform.base_converter
import
MaceKeyword
from
transform.base_converter
import
ConverterUtil
from
transform.base_converter
import
PoolingType
from
transform.base_converter
import
PaddingMode
from
transform.base_converter
import
PadType
from
transform.base_converter
import
ReduceType
from
transform.base_converter
import
RoundMode
from
utils.util
import
mace_check
from
utils.util
import
mace_check
import
numpy
as
np
import
numpy
as
np
...
@@ -1225,11 +1227,11 @@ class OnnxConverter(base_converter.ConverterInterface):
...
@@ -1225,11 +1227,11 @@ class OnnxConverter(base_converter.ConverterInterface):
padding_type_arg
=
op
.
arg
.
add
()
padding_type_arg
=
op
.
arg
.
add
()
padding_type_arg
.
name
=
MaceKeyword
.
mace_padding_type_str
padding_type_arg
.
name
=
MaceKeyword
.
mace_padding_type_str
if
mode
==
'reflect'
:
if
mode
==
'reflect'
:
padding_type_arg
.
i
=
PadType
.
REFLECT
padding_type_arg
.
i
=
PadType
.
REFLECT
.
value
elif
mode
==
'edge'
:
elif
mode
==
'edge'
:
padding_type_arg
.
i
=
PadType
.
SYMMETRIC
padding_type_arg
.
i
=
PadType
.
SYMMETRIC
.
value
else
:
else
:
padding_type_arg
.
i
=
PadType
.
CONSTANT
padding_type_arg
.
i
=
PadType
.
CONSTANT
.
value
if
'pads'
in
node
.
attrs
:
if
'pads'
in
node
.
attrs
:
paddings_arg
=
op
.
arg
.
add
()
paddings_arg
=
op
.
arg
.
add
()
paddings_arg
.
name
=
MaceKeyword
.
mace_paddings_str
paddings_arg
.
name
=
MaceKeyword
.
mace_paddings_str
...
...
tools/sh_commands.py
浏览文件 @
2ea7b6b1
...
@@ -266,7 +266,6 @@ def bazel_build(target,
...
@@ -266,7 +266,6 @@ def bazel_build(target,
enable_hexagon
=
False
,
enable_hexagon
=
False
,
enable_hta
=
False
,
enable_hta
=
False
,
enable_apu
=
False
,
enable_apu
=
False
,
enable_openmp
=
False
,
enable_neon
=
True
,
enable_neon
=
True
,
enable_opencl
=
True
,
enable_opencl
=
True
,
enable_quantize
=
True
,
enable_quantize
=
True
,
...
@@ -284,8 +283,6 @@ def bazel_build(target,
...
@@ -284,8 +283,6 @@ def bazel_build(target,
"--config"
,
"--config"
,
toolchain
,
toolchain
,
"--define"
,
"--define"
,
"openmp=%s"
%
str
(
enable_openmp
).
lower
(),
"--define"
,
"quantize=%s"
%
str
(
enable_quantize
).
lower
(),
"quantize=%s"
%
str
(
enable_quantize
).
lower
(),
"--define"
,
"--define"
,
"bfloat16=%s"
%
str
(
enable_bfloat16
).
lower
(),
"bfloat16=%s"
%
str
(
enable_bfloat16
).
lower
(),
...
@@ -301,8 +298,6 @@ def bazel_build(target,
...
@@ -301,8 +298,6 @@ def bazel_build(target,
"--define"
,
"--define"
,
"neon=%s"
%
str
(
enable_neon
).
lower
(),
"neon=%s"
%
str
(
enable_neon
).
lower
(),
"--define"
,
"--define"
,
"openmp=%s"
%
str
(
enable_openmp
).
lower
(),
"--define"
,
"opencl=%s"
%
str
(
enable_opencl
).
lower
(),
"opencl=%s"
%
str
(
enable_opencl
).
lower
(),
"--define"
,
"--define"
,
"quantize=%s"
%
str
(
enable_quantize
).
lower
(),
"quantize=%s"
%
str
(
enable_quantize
).
lower
(),
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录