Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
毕竟曾有刹那
Mace
提交
34797c2b
Mace
项目概览
毕竟曾有刹那
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
34797c2b
编写于
9月 18, 2018
作者:
李
李寅
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'cpu_runtime' into 'master'
Refactor CPURuntime See merge request !800
上级
46311d82
840aa2d0
变更
17
隐藏空白更改
内联
并排
Showing
17 changed file
with
143 addition
and
151 deletion
+143
-151
mace/core/BUILD
mace/core/BUILD
+4
-1
mace/core/device.cc
mace/core/device.cc
+6
-2
mace/core/device.h
mace/core/device.h
+3
-1
mace/core/runtime/cpu/cpu_runtime.cc
mace/core/runtime/cpu/cpu_runtime.cc
+10
-17
mace/core/runtime/cpu/cpu_runtime.h
mace/core/runtime/cpu/cpu_runtime.h
+34
-12
mace/core/runtime/opencl/gpu_device.cc
mace/core/runtime/opencl/gpu_device.cc
+4
-2
mace/core/runtime/opencl/gpu_device.h
mace/core/runtime/opencl/gpu_device.h
+3
-1
mace/core/testing/test_benchmark_main.cc
mace/core/testing/test_benchmark_main.cc
+2
-5
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+3
-2
mace/kernels/fully_connected.h
mace/kernels/fully_connected.h
+3
-2
mace/kernels/gemmlowp_util.h
mace/kernels/gemmlowp_util.h
+0
-2
mace/kernels/matmul.h
mace/kernels/matmul.h
+3
-2
mace/kernels/matmul_benchmark.cc
mace/kernels/matmul_benchmark.cc
+15
-7
mace/libmace/mace.cc
mace/libmace/mace.cc
+28
-46
mace/ops/ops_test_util.cc
mace/ops/ops_test_util.cc
+15
-4
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+8
-4
mace/public/mace.h
mace/public/mace.h
+2
-41
未找到文件。
mace/core/BUILD
浏览文件 @
34797c2b
...
...
@@ -104,10 +104,13 @@ cc_library(
"-Werror"
,
"-Wextra"
,
"-Wno-missing-field-initializers"
,
],
]
+
if_opencl_enabled
([
"-DMACE_ENABLE_OPENCL"
,
]),
deps
=
[
":core"
,
"//external:gflags_nothreads"
,
"//mace/ops:test"
,
"//mace/utils"
,
],
)
mace/core/device.cc
浏览文件 @
34797c2b
...
...
@@ -16,8 +16,12 @@
namespace
mace
{
CPUDevice
::
CPUDevice
(
const
int
num_threads
)
:
cpu_runtime_
(
new
CPURuntime
(
num_threads
))
{}
CPUDevice
::
CPUDevice
(
const
int
num_threads
,
const
CPUAffinityPolicy
policy
,
const
bool
use_gemmlowp
)
:
cpu_runtime_
(
new
CPURuntime
(
num_threads
,
policy
,
use_gemmlowp
))
{}
CPUDevice
::~
CPUDevice
()
=
default
;
...
...
mace/core/device.h
浏览文件 @
34797c2b
...
...
@@ -41,7 +41,9 @@ class Device {
class
CPUDevice
:
public
Device
{
public:
explicit
CPUDevice
(
const
int
num_threads
);
CPUDevice
(
const
int
num_threads
,
const
CPUAffinityPolicy
policy
,
const
bool
use_gemmlowp
);
virtual
~
CPUDevice
();
#ifdef MACE_ENABLE_OPENCL
...
...
mace/core/runtime/cpu/cpu_runtime.cc
浏览文件 @
34797c2b
...
...
@@ -27,7 +27,6 @@
#include <utility>
#include <vector>
#include "public/gemmlowp.h"
#include "mace/core/macros.h"
#include "mace/public/mace.h"
#include "mace/utils/logging.h"
...
...
@@ -92,13 +91,6 @@ MaceStatus SetThreadAffinity(cpu_set_t mask) {
}
}
}
// namespace
gemmlowp
::
GemmContext
&
GetGemmlowpContext
()
{
static
auto
*
gemm_context
=
new
gemmlowp
::
GemmContext
;
return
*
gemm_context
;
}
MaceStatus
GetCPUBigLittleCoreIDs
(
std
::
vector
<
int
>
*
big_core_ids
,
std
::
vector
<
int
>
*
little_core_ids
)
{
MACE_CHECK_NOTNULL
(
big_core_ids
);
...
...
@@ -174,13 +166,15 @@ MaceStatus SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads,
#endif
}
MaceStatus
SetOpenMPThreadsAndAffinityPolicy
(
int
omp_num_threads_hint
,
CPUAffinityPolicy
policy
,
bool
use_gemmlowp
)
{
}
// namespace
MaceStatus
CPURuntime
::
SetOpenMPThreadsAndAffinityPolicy
(
int
omp_num_threads_hint
,
CPUAffinityPolicy
policy
,
gemmlowp
::
GemmContext
*
gemm_context
)
{
if
(
policy
==
CPUAffinityPolicy
::
AFFINITY_NONE
)
{
if
(
use_gemmlowp
)
{
gemmlowp
::
GemmContext
&
gemm_context
=
GetGemmlowpContext
();
gemm_context
.
set_max_num_threads
(
std
::
max
(
0
,
omp_num_threads_hint
));
if
(
gemm_context
)
{
gemm_context
->
set_max_num_threads
(
std
::
max
(
0
,
omp_num_threads_hint
));
}
#ifdef MACE_ENABLE_OPENMP
if
(
omp_num_threads_hint
>
0
)
{
...
...
@@ -211,9 +205,8 @@ MaceStatus SetOpenMPThreadsAndAffinityPolicy(int omp_num_threads_hint,
omp_num_threads_hint
=
use_cpu_ids
.
size
();
}
if
(
use_gemmlowp
)
{
gemmlowp
::
GemmContext
&
gemm_context
=
GetGemmlowpContext
();
gemm_context
.
set_max_num_threads
(
omp_num_threads_hint
);
if
(
gemm_context
)
{
gemm_context
->
set_max_num_threads
(
omp_num_threads_hint
);
}
return
SetOpenMPThreadsAndAffinityCPUs
(
omp_num_threads_hint
,
use_cpu_ids
);
...
...
mace/core/runtime/cpu/cpu_runtime.h
浏览文件 @
34797c2b
...
...
@@ -15,33 +15,55 @@
#ifndef MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H_
#define MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H_
#include <memory>
#include <vector>
#include "public/gemmlowp.h"
#include "mace/public/mace.h"
#include "mace/utils/logging.h"
namespace
mace
{
extern
int
MaceOpenMPThreadCount
;
MaceStatus
GetCPUBigLittleCoreIDs
(
std
::
vector
<
int
>
*
big_core_ids
,
std
::
vector
<
int
>
*
little_core_ids
);
MaceStatus
SetOpenMPThreadsAndAffinityCPUs
(
int
omp_num_threads
,
const
std
::
vector
<
int
>
&
cpu_ids
);
MaceStatus
SetOpenMPThreadsAndAffinityPolicy
(
int
omp_num_threads_hint
,
CPUAffinityPolicy
policy
,
bool
use_gemmlowp
=
false
);
class
CPURuntime
{
public:
explicit
CPURuntime
(
const
int
num_threads
)
:
num_threads_
(
num_threads
)
{}
CPURuntime
(
const
int
num_threads
,
CPUAffinityPolicy
policy
,
bool
use_gemmlowp
)
:
num_threads_
(
num_threads
),
policy_
(
policy
),
gemm_context_
(
nullptr
)
{
if
(
use_gemmlowp
)
{
MACE_CHECK_NOTNULL
(
GetGemmlowpContext
());
}
SetOpenMPThreadsAndAffinityPolicy
(
num_threads_
,
policy_
,
gemm_context_
.
get
());
}
~
CPURuntime
()
=
default
;
inline
int
num_threads
()
const
{
gemmlowp
::
GemmContext
*
GetGemmlowpContext
()
{
if
(
!
gemm_context_
)
{
gemm_context_
.
reset
(
new
gemmlowp
::
GemmContext
());
}
return
gemm_context_
.
get
();
}
int
num_threads
()
const
{
return
num_threads_
;
}
private:
MaceStatus
SetOpenMPThreadsAndAffinityPolicy
(
int
omp_num_threads_hint
,
CPUAffinityPolicy
policy
,
gemmlowp
::
GemmContext
*
gemm_context
);
int
num_threads_
;
CPUAffinityPolicy
policy_
;
std
::
unique_ptr
<
gemmlowp
::
GemmContext
>
gemm_context_
;
};
}
// namespace mace
...
...
mace/core/runtime/opencl/gpu_device.cc
浏览文件 @
34797c2b
...
...
@@ -21,8 +21,10 @@ GPUDevice::GPUDevice(Tuner<uint32_t> *tuner,
const
GPUPriorityHint
priority
,
const
GPUPerfHint
perf
,
KVStorage
*
opencl_binary_storage
,
const
int
num_threads
)
:
CPUDevice
(
num_threads
),
const
int
num_threads
,
CPUAffinityPolicy
cpu_affinity_policy
,
bool
use_gemmlowp
)
:
CPUDevice
(
num_threads
,
cpu_affinity_policy
,
use_gemmlowp
),
runtime_
(
new
OpenCLRuntime
(
opencl_cache_storage
,
priority
,
perf
,
opencl_binary_storage
,
tuner
)),
allocator_
(
new
OpenCLAllocator
(
runtime_
.
get
()))
{}
...
...
mace/core/runtime/opencl/gpu_device.h
浏览文件 @
34797c2b
...
...
@@ -30,7 +30,9 @@ class GPUDevice : public CPUDevice {
const
GPUPriorityHint
priority
=
GPUPriorityHint
::
PRIORITY_LOW
,
const
GPUPerfHint
perf
=
GPUPerfHint
::
PERF_NORMAL
,
KVStorage
*
opencl_binary_storage
=
nullptr
,
const
int
num_threads
=
-
1
);
const
int
num_threads
=
-
1
,
CPUAffinityPolicy
cpu_affinity_policy
=
AFFINITY_NONE
,
bool
use_gemmlowp
=
false
);
~
GPUDevice
();
OpenCLRuntime
*
opencl_runtime
()
override
;
Allocator
*
allocator
()
override
;
...
...
mace/core/testing/test_benchmark_main.cc
浏览文件 @
34797c2b
...
...
@@ -17,7 +17,7 @@
#include "gflags/gflags.h"
#include "mace/core/runtime/cpu/cpu_runtime.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/
utils/logging
.h"
#include "mace/
ops/ops_test_util
.h"
DEFINE_string
(
filter
,
"all"
,
"op benchmark regex filter, eg:.*CONV.*"
);
DEFINE_int32
(
omp_num_threads
,
-
1
,
"num of openmp threads"
);
...
...
@@ -31,13 +31,10 @@ int main(int argc, char **argv) {
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
true
);
// config runtime
mace
::
MaceStatus
status
=
mace
::
SetOpenMPThreadsAndAffinityPolicy
(
mace
::
ops
::
test
::
OpTestContext
::
Get
(
FLAGS_omp_num_threads
,
static_cast
<
mace
::
CPUAffinityPolicy
>
(
FLAGS_cpu_affinity_policy
),
true
);
if
(
status
!=
mace
::
MACE_SUCCESS
)
{
LOG
(
WARNING
)
<<
"Set openmp or cpu affinity failed."
;
}
mace
::
testing
::
Benchmark
::
Run
(
FLAGS_filter
.
c_str
());
return
0
;
...
...
mace/kernels/conv_2d.h
浏览文件 @
34797c2b
...
...
@@ -853,7 +853,8 @@ struct Conv2dFunctor<DeviceType::CPU, uint8_t> : Conv2dFunctorBase {
MACE_CHECK
(
dilations_
[
0
]
==
1
&&
dilations_
[
1
]
==
1
,
"Quantization convolution does not support dilation > 1 yet."
);
gemmlowp
::
GemmContext
&
gemm_context
=
GetGemmlowpContext
();
auto
gemm_context
=
context_
->
device
()
->
cpu_runtime
()
->
GetGemmlowpContext
();
MACE_CHECK_NOTNULL
(
gemm_context
);
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
int
>
paddings
(
2
);
...
...
@@ -970,7 +971,7 @@ struct Conv2dFunctor<DeviceType::CPU, uint8_t> : Conv2dFunctorBase {
using
BitDepthParams
=
gemmlowp
::
L8R8WithLhsNonzeroBitDepthParams
;
gemmlowp
::
GemmWithOutputPipeline
<
uint8_t
,
uint8_t
,
BitDepthParams
>
(
&
gemm_context
,
filter_matrix
,
input_matrix
,
&
output_matrix
,
gemm_context
,
filter_matrix
,
input_matrix
,
&
output_matrix
,
-
filter
->
zero_point
(),
-
input
->
zero_point
(),
output_pipeline
);
return
MACE_SUCCESS
;
...
...
mace/kernels/fully_connected.h
浏览文件 @
34797c2b
...
...
@@ -100,7 +100,8 @@ struct FullyConnectedFunctor<DeviceType::CPU, uint8_t>: FullyConnectedBase {
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
gemmlowp
::
GemmContext
&
gemm_context
=
GetGemmlowpContext
();
auto
gemm_context
=
context_
->
device
()
->
cpu_runtime
()
->
GetGemmlowpContext
();
MACE_CHECK_NOTNULL
(
gemm_context
);
std
::
vector
<
index_t
>
output_shape
=
{
input
->
dim
(
0
),
1
,
1
,
weight
->
dim
(
0
)};
MACE_RETURN_IF_ERROR
(
output
->
Resize
(
output_shape
));
...
...
@@ -142,7 +143,7 @@ struct FullyConnectedFunctor<DeviceType::CPU, uint8_t>: FullyConnectedBase {
using
BitDepthParams
=
gemmlowp
::
L8R8WithLhsNonzeroBitDepthParams
;
gemmlowp
::
GemmWithOutputPipeline
<
uint8_t
,
uint8_t
,
BitDepthParams
>
(
&
gemm_context
,
weight_matrix
,
input_matrix
,
&
output_matrix
,
gemm_context
,
weight_matrix
,
input_matrix
,
&
output_matrix
,
-
weight
->
zero_point
(),
-
input
->
zero_point
(),
output_pipeline
);
return
MACE_SUCCESS
;
...
...
mace/kernels/gemmlowp_util.h
浏览文件 @
34797c2b
...
...
@@ -22,8 +22,6 @@
namespace
mace
{
gemmlowp
::
GemmContext
&
GetGemmlowpContext
();
struct
GemmlowpOutputPipeline
{
typedef
gemmlowp
::
VectorMap
<
const
int32_t
,
gemmlowp
::
VectorShape
::
Col
>
ColVectorMap
;
...
...
mace/kernels/matmul.h
浏览文件 @
34797c2b
...
...
@@ -122,7 +122,8 @@ struct MatMulFunctor<CPU, uint8_t> : OpKernel {
const
index_t
K
,
const
index_t
width
,
Tensor
*
C
)
{
gemmlowp
::
GemmContext
&
gemm_context
=
GetGemmlowpContext
();
auto
gemm_context
=
context_
->
device
()
->
cpu_runtime
()
->
GetGemmlowpContext
();
MACE_CHECK_NOTNULL
(
gemm_context
);
Tensor
::
MappingGuard
guarda
(
A
);
Tensor
::
MappingGuard
guardb
(
B
);
...
...
@@ -149,7 +150,7 @@ struct MatMulFunctor<CPU, uint8_t> : OpKernel {
using
BitDepthParams
=
gemmlowp
::
L8R8WithLhsNonzeroBitDepthParams
;
gemmlowp
::
GemmWithOutputPipeline
<
uint8_t
,
uint8_t
,
BitDepthParams
>
(
&
gemm_context
,
a_matrix
,
b_matrix
,
&
c_matrix
,
-
A
->
zero_point
(),
gemm_context
,
a_matrix
,
b_matrix
,
&
c_matrix
,
-
A
->
zero_point
(),
-
B
->
zero_point
(),
output_pipeline
);
}
}
...
...
mace/kernels/matmul_benchmark.cc
浏览文件 @
34797c2b
...
...
@@ -21,8 +21,8 @@
#include "public/gemmlowp.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/kernels/gemm.h"
#include "mace/kernels/gemmlowp_util.h"
#include "mace/kernels/sgemm.h"
#include "mace/ops/ops_test_util.h"
namespace
gemmlowp
{
...
...
@@ -164,18 +164,22 @@ void MatmulBenchmark_gemmlowp_uint8(int iters, int rows, int depth, int cols) {
const
auto
output_pipeline
=
std
::
make_tuple
(
quantize_down_stage
,
saturating_cast_stage
);
gemmlowp
::
GemmContext
&
gemm_context
=
GetGemmlowpContext
();
auto
gemm_context
=
mace
::
ops
::
test
::
OpTestContext
::
Get
()
->
GetDevice
(
CPU
)
->
cpu_runtime
()
->
GetGemmlowpContext
();
MACE_CHECK_NOTNULL
(
gemm_context
);
using
BitDepthParams
=
gemmlowp
::
L8R8WithLhsNonzeroBitDepthParams
;
gemmlowp
::
GemmWithOutputPipeline
<
std
::
uint8_t
,
std
::
uint8_t
,
BitDepthParams
>
(
&
gemm_context
,
lhs
.
const_map
(),
rhs
.
const_map
(),
&
result
.
map
(),
-
128
,
gemm_context
,
lhs
.
const_map
(),
rhs
.
const_map
(),
&
result
.
map
(),
-
128
,
-
128
,
output_pipeline
);
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
gemmlowp
::
GemmWithOutputPipeline
<
std
::
uint8_t
,
std
::
uint8_t
,
BitDepthParams
>
(
&
gemm_context
,
lhs
.
const_map
(),
rhs
.
const_map
(),
&
result
.
map
(),
-
128
,
gemm_context
,
lhs
.
const_map
(),
rhs
.
const_map
(),
&
result
.
map
(),
-
128
,
-
128
,
output_pipeline
);
}
}
...
...
@@ -195,18 +199,22 @@ void MatmulBenchmark_gemmlowp_int32(int iters, int rows, int depth, int cols) {
const
auto
output_pipeline
=
std
::
make_tuple
();
gemmlowp
::
GemmContext
&
gemm_context
=
GetGemmlowpContext
();
auto
gemm_context
=
mace
::
ops
::
test
::
OpTestContext
::
Get
()
->
GetDevice
(
CPU
)
->
cpu_runtime
()
->
GetGemmlowpContext
();
MACE_CHECK_NOTNULL
(
gemm_context
);
using
BitDepthParams
=
gemmlowp
::
L8R8WithLhsNonzeroBitDepthParams
;
gemmlowp
::
GemmWithOutputPipeline
<
std
::
uint8_t
,
std
::
int32_t
,
BitDepthParams
>
(
&
gemm_context
,
lhs
.
const_map
(),
rhs
.
const_map
(),
&
result
.
map
(),
-
128
,
gemm_context
,
lhs
.
const_map
(),
rhs
.
const_map
(),
&
result
.
map
(),
-
128
,
-
128
,
output_pipeline
);
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
gemmlowp
::
GemmWithOutputPipeline
<
std
::
uint8_t
,
std
::
int32_t
,
BitDepthParams
>
(
&
gemm_context
,
lhs
.
const_map
(),
rhs
.
const_map
(),
&
result
.
map
(),
-
128
,
gemm_context
,
lhs
.
const_map
(),
rhs
.
const_map
(),
&
result
.
map
(),
-
128
,
-
128
,
output_pipeline
);
}
}
...
...
mace/libmace/mace.cc
浏览文件 @
34797c2b
...
...
@@ -177,9 +177,6 @@ class MaceEngineConfig::Impl {
CPUAffinityPolicy
policy
,
bool
use_gemmlowp
);
MaceStatus
SetOpenMPThreadAffinity
(
int
num_threads
,
const
std
::
vector
<
int
>
&
cpu_ids
);
inline
DeviceType
device_type
()
const
{
return
device_type_
;
}
...
...
@@ -188,6 +185,14 @@ class MaceEngineConfig::Impl {
return
num_threads_
;
}
inline
CPUAffinityPolicy
cpu_affinity_policy
()
const
{
return
cpu_affinity_policy_
;
}
inline
bool
use_gemmlowp
()
const
{
return
use_gemmlowp_
;
}
inline
std
::
shared_ptr
<
GPUContext
>
gpu_context
()
const
{
return
gpu_context_
;
}
...
...
@@ -203,6 +208,8 @@ class MaceEngineConfig::Impl {
private:
DeviceType
device_type_
;
int
num_threads_
;
CPUAffinityPolicy
cpu_affinity_policy_
;
bool
use_gemmlowp_
;
std
::
shared_ptr
<
GPUContext
>
gpu_context_
;
GPUPriorityHint
gpu_priority_hint_
;
GPUPerfHint
gpu_perf_hint_
;
...
...
@@ -211,6 +218,8 @@ class MaceEngineConfig::Impl {
MaceEngineConfig
::
Impl
::
Impl
(
const
DeviceType
device_type
)
:
device_type_
(
device_type
),
num_threads_
(
-
1
),
cpu_affinity_policy_
(
CPUAffinityPolicy
::
AFFINITY_NONE
),
use_gemmlowp_
(
false
),
gpu_context_
(
new
GPUContext
),
gpu_priority_hint_
(
GPUPriorityHint
::
PRIORITY_LOW
),
gpu_perf_hint_
(
GPUPerfHint
::
PERF_NORMAL
)
{}
...
...
@@ -234,15 +243,9 @@ MaceStatus MaceEngineConfig::Impl::SetCPUThreadPolicy(
CPUAffinityPolicy
policy
,
bool
use_gemmlowp
)
{
num_threads_
=
num_threads
;
return
mace
::
SetOpenMPThreadsAndAffinityPolicy
(
num_threads
,
policy
,
use_gemmlowp
);
}
MaceStatus
MaceEngineConfig
::
Impl
::
SetOpenMPThreadAffinity
(
int
num_threads
,
const
std
::
vector
<
int
>
&
cpu_ids
)
{
num_threads_
=
num_threads
;
return
mace
::
SetOpenMPThreadsAndAffinityCPUs
(
num_threads
,
cpu_ids
);
cpu_affinity_policy_
=
policy
;
use_gemmlowp_
=
use_gemmlowp
;
return
MACE_SUCCESS
;
}
...
...
@@ -270,32 +273,6 @@ MaceStatus MaceEngineConfig::SetCPUThreadPolicy(
return
impl_
->
SetCPUThreadPolicy
(
num_threads_hint
,
policy
,
use_gemmlowp
);
}
MaceStatus
MaceEngineConfig
::
SetOpenMPThreadAffinity
(
int
num_threads
,
const
std
::
vector
<
int
>
&
cpu_ids
)
{
return
impl_
->
SetOpenMPThreadAffinity
(
num_threads
,
cpu_ids
);
}
DeviceType
MaceEngineConfig
::
device_type
()
const
{
return
impl_
->
device_type
();
}
int
MaceEngineConfig
::
num_threads
()
const
{
return
impl_
->
num_threads
();
}
std
::
shared_ptr
<
GPUContext
>
MaceEngineConfig
::
gpu_context
()
const
{
return
impl_
->
gpu_context
();
}
GPUPerfHint
MaceEngineConfig
::
gpu_perf_hint
()
const
{
return
impl_
->
gpu_perf_hint
();
}
GPUPriorityHint
MaceEngineConfig
::
gpu_priority_hint
()
const
{
return
impl_
->
gpu_priority_hint
();
}
// Mace Tensor
class
MaceTensor
::
Impl
{
public:
...
...
@@ -389,7 +366,7 @@ MaceEngine::Impl::Impl(const MaceEngineConfig &config)
:
model_data_
(
nullptr
),
model_data_size_
(
0
),
op_registry_
(
new
OperatorRegistry
()),
device_type_
(
config
.
device_type
()),
device_type_
(
config
.
impl_
->
device_type
()),
device_
(
nullptr
),
ws_
(
new
Workspace
()),
net_
(
nullptr
)
...
...
@@ -399,16 +376,21 @@ MaceEngine::Impl::Impl(const MaceEngineConfig &config)
{
LOG
(
INFO
)
<<
"Creating MaceEngine, MACE version: "
<<
MaceVersion
();
if
(
device_type_
==
DeviceType
::
CPU
||
device_type_
==
DeviceType
::
HEXAGON
)
{
device_
.
reset
(
new
CPUDevice
(
config
.
num_threads
()));
device_
.
reset
(
new
CPUDevice
(
config
.
impl_
->
num_threads
(),
config
.
impl_
->
cpu_affinity_policy
(),
config
.
impl_
->
use_gemmlowp
()));
}
#ifdef MACE_ENABLE_OPENCL
if
(
device_type_
==
DeviceType
::
GPU
)
{
device_
.
reset
(
new
GPUDevice
(
config
.
gpu_context
()
->
opencl_tuner
(),
config
.
gpu_context
()
->
opencl_cache_storage
(),
config
.
gpu_priority_hint
(),
config
.
gpu_perf_hint
(),
config
.
gpu_context
()
->
opencl_binary_storage
(),
config
.
num_threads
()));
device_
.
reset
(
new
GPUDevice
(
config
.
impl_
->
gpu_context
()
->
opencl_tuner
(),
config
.
impl_
->
gpu_context
()
->
opencl_cache_storage
(),
config
.
impl_
->
gpu_priority_hint
(),
config
.
impl_
->
gpu_perf_hint
(),
config
.
impl_
->
gpu_context
()
->
opencl_binary_storage
(),
config
.
impl_
->
num_threads
(),
config
.
impl_
->
cpu_affinity_policy
(),
config
.
impl_
->
use_gemmlowp
()));
}
#endif
}
...
...
mace/ops/ops_test_util.cc
浏览文件 @
34797c2b
...
...
@@ -18,8 +18,12 @@ namespace mace {
namespace
ops
{
namespace
test
{
OpTestContext
*
OpTestContext
::
Get
()
{
static
OpTestContext
instance
;
OpTestContext
*
OpTestContext
::
Get
(
int
num_threads
,
CPUAffinityPolicy
cpu_affinity_policy
,
bool
use_gemmlowp
)
{
static
OpTestContext
instance
(
num_threads
,
cpu_affinity_policy
,
use_gemmlowp
);
return
&
instance
;
}
...
...
@@ -31,8 +35,15 @@ Device *OpTestContext::GetDevice(DeviceType device_type) {
return
device_map_
[
device_type
].
get
();
}
OpTestContext
::
OpTestContext
()
:
gpu_context_
(
new
GPUContext
())
{
device_map_
[
DeviceType
::
CPU
]
=
std
::
unique_ptr
<
Device
>
(
new
CPUDevice
(
-
1
));
OpTestContext
::
OpTestContext
(
int
num_threads
,
CPUAffinityPolicy
cpu_affinity_policy
,
bool
use_gemmlowp
)
:
gpu_context_
(
new
GPUContext
())
{
device_map_
[
DeviceType
::
CPU
]
=
std
::
unique_ptr
<
Device
>
(
new
CPUDevice
(
num_threads
,
cpu_affinity_policy
,
use_gemmlowp
));
device_map_
[
DeviceType
::
GPU
]
=
std
::
unique_ptr
<
Device
>
(
new
GPUDevice
(
gpu_context_
->
opencl_tuner
(),
gpu_context_
->
opencl_cache_storage
(),
...
...
mace/ops/ops_test_util.h
浏览文件 @
34797c2b
...
...
@@ -114,11 +114,17 @@ class OpDefBuilder {
class
OpTestContext
{
public:
static
OpTestContext
*
Get
();
static
OpTestContext
*
Get
(
int
num_threads
=
-
1
,
CPUAffinityPolicy
cpu_affinity_policy
=
AFFINITY_BIG_ONLY
,
bool
use_gemmlowp
=
true
);
std
::
shared_ptr
<
GPUContext
>
gpu_context
()
const
;
Device
*
GetDevice
(
DeviceType
device_type
);
private:
OpTestContext
();
OpTestContext
(
int
num_threads
,
CPUAffinityPolicy
cpu_affinity_policy
,
bool
use_gemmlowp
);
MACE_DISABLE_COPY_AND_ASSIGN
(
OpTestContext
);
std
::
shared_ptr
<
GPUContext
>
gpu_context_
;
...
...
@@ -504,8 +510,6 @@ class OpsTestNet {
class
OpsTestBase
:
public
::
testing
::
Test
{
protected:
virtual
void
SetUp
()
{
SetOpenMPThreadsAndAffinityPolicy
(
-
1
,
CPUAffinityPolicy
::
AFFINITY_BIG_ONLY
);
}
virtual
void
TearDown
()
{
...
...
mace/public/mace.h
浏览文件 @
34797c2b
...
...
@@ -97,21 +97,6 @@ enum MaceStatus {
} \
}
/// \brief Get ARM big.LITTLE configuration.
///
/// This function will detect the max frequencies of all CPU cores, and assume
/// the cores with largest max frequencies as big cores, and all the remaining
/// cores as little. If all cpu core's max frequencies equals, big_core_ids and
/// little_core_ids will both be filled with all cpu core ids.
///
/// \param [out] big_core_ids
/// \param [out] little_core_ids
/// \return If successful, it returns MACE_SUCCESS and error if it can't
/// reliabley detect the frequency of big-LITTLE cores (e.g. MTK).
MACE_API
MaceStatus
GetBigLittleCoreIDs
(
std
::
vector
<
int
>
*
big_core_ids
,
std
::
vector
<
int
>
*
little_core_ids
);
/// \brief GPU context contain the status used for GPU device.
///
/// The life cycle of GPUContext object is the same as MaceEngines use it.
...
...
@@ -170,6 +155,8 @@ class MACE_API GPUContextBuilder {
};
class
MACE_API
MaceEngineConfig
{
friend
class
MaceEngine
;
public:
explicit
MaceEngineConfig
(
const
DeviceType
device_type
);
~
MaceEngineConfig
();
...
...
@@ -219,32 +206,6 @@ class MACE_API MaceEngineConfig {
CPUAffinityPolicy
policy
,
bool
use_gemmlowp
=
false
);
/// \brief Set OpenMP threads number and processor affinity.
///
/// Caution: this function may hurt performance
/// if improper parameters provided.
/// This function may not work well on some chips (e.g. MTK). Setting thread
/// affinity to offline cores may run very slow or unexpectedly.
/// In such cases, please use SetOpenMPThreadPolicy with default policy
/// instead.
///
/// \param num_threads
/// \param cpu_ids
/// \return MACE_SUCCESS for success, other for failed.
MaceStatus
SetOpenMPThreadAffinity
(
int
num_threads
,
const
std
::
vector
<
int
>
&
cpu_ids
);
DeviceType
device_type
()
const
;
int
num_threads
()
const
;
std
::
shared_ptr
<
GPUContext
>
gpu_context
()
const
;
GPUPriorityHint
gpu_priority_hint
()
const
;
GPUPerfHint
gpu_perf_hint
()
const
;
private:
class
Impl
;
std
::
unique_ptr
<
Impl
>
impl_
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录