Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
闹闹不闹!
Mace
提交
4f4882fa
Mace
项目概览
闹闹不闹!
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
4f4882fa
编写于
4月 17, 2019
作者:
L
luxuhui
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
opt the compile option and related code
N/A Signed-off-by:
N
Luxuhui
<
luxuhui@xiaomi.com
>
上级
77df54f2
变更
11
显示空白变更内容
内联
并排
Showing
11 changed file
with
49 addition
and
18 deletion
+49
-18
mace/core/quantize.cc
mace/core/quantize.cc
+2
-2
mace/core/runtime/opencl/opencl_allocator.cc
mace/core/runtime/opencl/opencl_allocator.cc
+6
-0
mace/core/runtime/opencl/opencl_runtime.cc
mace/core/runtime/opencl/opencl_runtime.cc
+1
-1
mace/libmace/mace.cc
mace/libmace/mace.cc
+3
-3
mace/ops/addn.cc
mace/ops/addn.cc
+0
-2
mace/ops/common/transpose.h
mace/ops/common/transpose.h
+1
-1
mace/ops/opencl/image/split.h
mace/ops/opencl/image/split.h
+1
-1
mace/ops/ops_test_util.cc
mace/ops/ops_test_util.cc
+19
-4
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+12
-3
mace/ops/strided_slice.cc
mace/ops/strided_slice.cc
+1
-1
tools/bazel.rc
tools/bazel.rc
+3
-0
未找到文件。
mace/core/quantize.cc
浏览文件 @
4f4882fa
...
...
@@ -77,7 +77,7 @@ void QuantizeUtil<uint8_t>::Dequantize(const uint8_t *input,
thread_pool_
->
Compute1D
([
=
](
index_t
start
,
index_t
end
,
index_t
step
)
{
for
(
index_t
i
=
start
;
i
<
end
;
i
+=
step
)
{
uint8x16_t
vi
=
vld1q_u8
(
input
+
i
*
16
);
float32x4x4_t
vo
=
{
float32x4x4_t
vo
=
{
{
vmulq_f32
(
vscale
,
vcvtq_f32_s32
(
vsubq_s32
(
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_low_u16
(
vmovl_u8
(
vget_low_u8
(
vi
))))),
vzero
))),
...
...
@@ -90,7 +90,7 @@ void QuantizeUtil<uint8_t>::Dequantize(const uint8_t *input,
vmulq_f32
(
vscale
,
vcvtq_f32_s32
(
vsubq_s32
(
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_high_u16
(
vmovl_u8
(
vget_high_u8
(
vi
))))),
vzero
))),
};
}
}
;
vst1q_f32
(
output
+
i
*
16
,
vo
.
val
[
0
]);
vst1q_f32
(
output
+
i
*
16
+
4
,
vo
.
val
[
1
]);
vst1q_f32
(
output
+
i
*
16
+
8
,
vo
.
val
[
2
]);
...
...
mace/core/runtime/opencl/opencl_allocator.cc
浏览文件 @
4f4882fa
...
...
@@ -93,6 +93,12 @@ MaceStatus OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
<<
image_shape
[
0
]
<<
", "
<<
image_shape
[
1
]
<<
"] failed because of "
<<
OpenCLErrorToString
(
error
);
// Many users have doubts at CL_INVALID_IMAGE_SIZE, add some tips.
if
(
error
==
CL_INVALID_IMAGE_SIZE
)
{
auto
max_2d_size
=
opencl_runtime_
->
GetMaxImage2DSize
();
LOG
(
WARNING
)
<<
"The allowable OpenCL image size is: "
<<
max_2d_size
[
0
]
<<
"x"
<<
max_2d_size
[
1
];
}
delete
cl_image
;
*
result
=
nullptr
;
return
MaceStatus
::
MACE_OUT_OF_RESOURCES
;
...
...
mace/core/runtime/opencl/opencl_runtime.cc
浏览文件 @
4f4882fa
...
...
@@ -713,7 +713,7 @@ std::vector<uint64_t> OpenCLRuntime::GetMaxImage2DSize() {
LOG
(
ERROR
)
<<
"error: "
<<
OpenCLErrorToString
(
err
);
return
{};
}
return
{
max_
height
,
max_width
};
return
{
max_
width
,
max_height
};
}
uint64_t
OpenCLRuntime
::
GetKernelMaxWorkGroupSize
(
const
cl
::
Kernel
&
kernel
)
{
...
...
mace/libmace/mace.cc
浏览文件 @
4f4882fa
...
...
@@ -391,12 +391,12 @@ class MaceEngine::Impl {
std
::
unique_ptr
<
Workspace
>
ws_
;
std
::
unique_ptr
<
NetBase
>
net_
;
bool
is_quantized_model_
;
#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
std
::
unique_ptr
<
HexagonControlWrapper
>
hexagon_controller_
;
#endif
std
::
map
<
std
::
string
,
mace
::
InputOutputInfo
>
input_info_map_
;
std
::
map
<
std
::
string
,
mace
::
InputOutputInfo
>
output_info_map_
;
std
::
unique_ptr
<
utils
::
ThreadPool
>
thread_pool_
;
#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
std
::
unique_ptr
<
HexagonControlWrapper
>
hexagon_controller_
;
#endif
MACE_DISABLE_COPY_AND_ASSIGN
(
Impl
);
};
...
...
mace/ops/addn.cc
浏览文件 @
4f4882fa
...
...
@@ -29,8 +29,6 @@
namespace
mace
{
namespace
ops
{
static
constexpr
int
kCostPerGroup
=
1024
;
template
<
DeviceType
D
,
class
T
>
class
AddNOp
;
...
...
mace/ops/common/transpose.h
浏览文件 @
4f4882fa
...
...
@@ -132,7 +132,7 @@ inline void TransposeNCHWToNHWCC2<float>(utils::ThreadPool *thread_pool,
for
(
w
=
0
;
w
+
3
<
width
;
w
+=
4
)
{
float32x4_t
vi0
=
vld1q_f32
(
input
+
in_offset
);
float32x4_t
vi1
=
vld1q_f32
(
input
+
in_offset
+
image_size
);
float32x4x2_t
vi
=
{
vi0
,
vi1
};
float32x4x2_t
vi
=
{
{
vi0
,
vi1
}
};
vst2q_f32
(
output
+
out_offset
,
vi
);
in_offset
+=
4
;
out_offset
+=
8
;
...
...
mace/ops/opencl/image/split.h
浏览文件 @
4f4882fa
...
...
@@ -133,7 +133,7 @@ MaceStatus SplitKernel<T>::Compute(
}
}
if
(
context
->
future
()
!=
nullptr
)
{
context
->
future
()
->
wait_fn
=
[
runtime
,
call_stats
](
CallStats
*
stats
)
{
context
->
future
()
->
wait_fn
=
[
call_stats
](
CallStats
*
stats
)
{
if
(
stats
!=
nullptr
)
{
stats
->
start_micros
=
call_stats
.
start_micros
;
stats
->
end_micros
=
stats
->
start_micros
+
call_stats
.
end_micros
;
...
...
mace/ops/ops_test_util.cc
浏览文件 @
4f4882fa
...
...
@@ -101,11 +101,13 @@ void OpDefBuilder::Finalize(OperatorDef *op_def) const {
}
namespace
{
#ifdef MACE_ENABLE_OPENCL
std
::
string
GetStoragePathFromEnv
()
{
char
*
storage_path_str
=
getenv
(
"MACE_INTERNAL_STORAGE_PATH"
);
if
(
storage_path_str
==
nullptr
)
return
""
;
return
storage_path_str
;
}
#endif
}
// namespace
OpTestContext
*
OpTestContext
::
Get
(
int
num_threads
,
...
...
@@ -117,15 +119,21 @@ OpTestContext *OpTestContext::Get(int num_threads,
OpTestContext
::
OpTestContext
(
int
num_threads
,
CPUAffinityPolicy
cpu_affinity_policy
)
#ifdef MACE_ENABLE_OPENCL
:
gpu_context_
(
std
::
make_shared
<
GPUContext
>
(
GetStoragePathFromEnv
())),
opencl_mem_types_
({
MemoryType
::
GPU_IMAGE
}),
thread_pool_
(
make_unique
<
utils
::
ThreadPool
>
(
num_threads
,
cpu_affinity_policy
))
{
#else
:
thread_pool_
(
make_unique
<
utils
::
ThreadPool
>
(
num_threads
,
cpu_affinity_policy
))
{
#endif
thread_pool_
->
Init
();
device_map_
[
DeviceType
::
CPU
]
=
make_unique
<
CPUDevice
>
(
num_threads
,
cpu_affinity_policy
,
thread_pool_
.
get
());
#ifdef MACE_ENABLE_OPENCL
device_map_
[
DeviceType
::
GPU
]
=
make_unique
<
GPUDevice
>
(
gpu_context_
->
opencl_tuner
(),
gpu_context_
->
opencl_cache_storage
(),
...
...
@@ -135,16 +143,18 @@ OpTestContext::OpTestContext(int num_threads,
num_threads
,
cpu_affinity_policy
,
thread_pool_
.
get
());
}
std
::
shared_ptr
<
GPUContext
>
OpTestContext
::
gpu_context
()
const
{
return
gpu_context_
;
#endif
}
Device
*
OpTestContext
::
GetDevice
(
DeviceType
device_type
)
{
return
device_map_
[
device_type
].
get
();
}
#ifdef MACE_ENABLE_OPENCL
std
::
shared_ptr
<
GPUContext
>
OpTestContext
::
gpu_context
()
const
{
return
gpu_context_
;
}
std
::
vector
<
MemoryType
>
OpTestContext
::
opencl_mem_types
()
{
return
opencl_mem_types_
;
}
...
...
@@ -160,6 +170,7 @@ void OpTestContext::SetOCLImageTestFlag() {
void
OpTestContext
::
SetOCLImageAndBufferTestFlag
()
{
opencl_mem_types_
=
{
MemoryType
::
GPU_IMAGE
,
MemoryType
::
GPU_BUFFER
};
}
#endif // MACE_ENABLE_OPENCL
bool
OpsTestNet
::
Setup
(
mace
::
DeviceType
device
)
{
NetDef
net_def
;
...
...
@@ -231,6 +242,7 @@ MaceStatus OpsTestNet::Run() {
MaceStatus
OpsTestNet
::
RunOp
(
mace
::
DeviceType
device
)
{
if
(
device
==
DeviceType
::
GPU
)
{
#ifdef MACE_ENABLE_OPENCL
auto
opencl_mem_types
=
OpTestContext
::
Get
()
->
opencl_mem_types
();
for
(
auto
type
:
opencl_mem_types
)
{
OpTestContext
::
Get
()
->
GetDevice
(
device
)
...
...
@@ -239,6 +251,9 @@ MaceStatus OpsTestNet::RunOp(mace::DeviceType device) {
MACE_RETURN_IF_ERROR
(
Run
());
}
return
MaceStatus
::
MACE_SUCCESS
;
#else
return
MaceStatus
::
MACE_UNSUPPORTED
;
#endif // MACE_ENABLE_OPENCL
}
else
{
Setup
(
device
);
return
Run
();
...
...
mace/ops/ops_test_util.h
浏览文件 @
4f4882fa
...
...
@@ -29,8 +29,6 @@
#include "mace/core/types.h"
#include "mace/core/net.h"
#include "mace/core/device_context.h"
#include "mace/core/runtime/opencl/gpu_device.h"
#include "mace/core/runtime/opencl/opencl_util.h"
#include "mace/core/tensor.h"
#include "mace/core/workspace.h"
#include "mace/ops/ops_registry.h"
...
...
@@ -40,6 +38,11 @@
#include "mace/core/quantize.h"
#include "mace/ops/testing/test_utils.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/gpu_device.h"
#include "mace/core/runtime/opencl/opencl_util.h"
#endif
namespace
mace
{
namespace
ops
{
namespace
test
{
...
...
@@ -78,12 +81,14 @@ class OpTestContext {
static
OpTestContext
*
Get
(
int
num_threads
=
-
1
,
CPUAffinityPolicy
cpu_affinity_policy
=
AFFINITY_BIG_ONLY
);
std
::
shared_ptr
<
GPUContext
>
gpu_context
()
const
;
Device
*
GetDevice
(
DeviceType
device_type
);
#ifdef MACE_ENABLE_OPENCL
std
::
shared_ptr
<
GPUContext
>
gpu_context
()
const
;
std
::
vector
<
MemoryType
>
opencl_mem_types
();
void
SetOCLBufferTestFlag
();
void
SetOCLImageTestFlag
();
void
SetOCLImageAndBufferTestFlag
();
#endif
utils
::
ThreadPool
*
thread_pool
()
{
return
thread_pool_
.
get
();
}
...
...
@@ -93,8 +98,10 @@ class OpTestContext {
CPUAffinityPolicy
cpu_affinity_policy
);
MACE_DISABLE_COPY_AND_ASSIGN
(
OpTestContext
);
#ifdef MACE_ENABLE_OPENCL
std
::
shared_ptr
<
GPUContext
>
gpu_context_
;
std
::
vector
<
MemoryType
>
opencl_mem_types_
;
#endif
std
::
map
<
DeviceType
,
std
::
unique_ptr
<
Device
>>
device_map_
;
std
::
unique_ptr
<
utils
::
ThreadPool
>
thread_pool_
;
};
...
...
@@ -424,7 +431,9 @@ class OpsTestBase : public ::testing::Test {
}
virtual
void
TearDown
()
{
#ifdef MACE_ENABLE_OPENCL
OpTestContext
::
Get
()
->
SetOCLImageTestFlag
();
#endif
}
};
...
...
mace/ops/strided_slice.cc
浏览文件 @
4f4882fa
...
...
@@ -199,7 +199,7 @@ class StridedSliceOp : public Operation {
strides_indices_vec
[
d
]
>
0
?
0
:
-
1
,
strides_indices_vec
[
d
]
>
0
?
dim_len
:
dim_len
-
1
};
auto
format_indices
=
[
valid_range
,
d
,
d
im_len
](
index_t
indice
)
{
auto
format_indices
=
[
valid_range
,
dim_len
](
index_t
indice
)
{
index_t
forward
=
indice
<
0
?
indice
+
dim_len
:
indice
;
return
Clamp
(
forward
,
valid_range
[
0
],
valid_range
[
1
]);
};
...
...
tools/bazel.rc
浏览文件 @
4f4882fa
...
...
@@ -19,6 +19,9 @@ build:android --linkopt=-lm
build:android --distinct_host_configuration=true
build:android --crosstool_top=//external:android/crosstool
build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:android --copt -Wall
build:android --copt -Wno-mismatched-tags
build:android --copt -Wno-missing-braces
# Linux host build, --config linux
build:linux --define linux=true
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录