Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
毕竟曾有刹那
Mace
提交
28954099
Mace
项目概览
毕竟曾有刹那
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
28954099
编写于
3月 26, 2020
作者:
B
Bin Li
提交者:
luxuhui
4月 21, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Support HTA custom API
上级
f4061000
变更
90
展开全部
隐藏空白更改
内联
并排
Showing
90 changed file
with
1882 addition
and
355 deletion
+1882
-355
mace/core/BUILD.bazel
mace/core/BUILD.bazel
+25
-7
mace/core/CMakeLists.txt
mace/core/CMakeLists.txt
+14
-4
mace/core/allocator.cc
mace/core/allocator.cc
+48
-0
mace/core/allocator.h
mace/core/allocator.h
+13
-40
mace/core/quantize.h
mace/core/quantize.h
+3
-0
mace/core/rpcmem.cc
mace/core/rpcmem.cc
+54
-0
mace/core/rpcmem.h
mace/core/rpcmem.h
+36
-0
mace/core/runtime/hexagon/hexagon_allocator.cc
mace/core/runtime/hexagon/hexagon_allocator.cc
+38
-0
mace/core/runtime/hexagon/hexagon_allocator.h
mace/core/runtime/hexagon/hexagon_allocator.h
+38
-0
mace/core/runtime/hexagon/hexagon_control_wrapper.h
mace/core/runtime/hexagon/hexagon_control_wrapper.h
+2
-16
mace/core/runtime/hexagon/hexagon_device.cc
mace/core/runtime/hexagon/hexagon_device.cc
+71
-0
mace/core/runtime/hexagon/hexagon_device.h
mace/core/runtime/hexagon/hexagon_device.h
+22
-26
mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc
mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc
+3
-13
mace/core/runtime/hexagon/hexagon_dsp_wrapper.h
mace/core/runtime/hexagon/hexagon_dsp_wrapper.h
+2
-1
mace/core/runtime/hexagon/hexagon_hta_transformer.cc
mace/core/runtime/hexagon/hexagon_hta_transformer.cc
+644
-0
mace/core/runtime/hexagon/hexagon_hta_transformer.h
mace/core/runtime/hexagon/hexagon_hta_transformer.h
+85
-0
mace/core/runtime/hexagon/hexagon_hta_wrapper.cc
mace/core/runtime/hexagon/hexagon_hta_wrapper.cc
+176
-113
mace/core/runtime/hexagon/hexagon_hta_wrapper.h
mace/core/runtime/hexagon/hexagon_hta_wrapper.h
+31
-4
mace/core/runtime/opencl/opencl_allocator.cc
mace/core/runtime/opencl/opencl_allocator.cc
+15
-29
mace/core/runtime/opencl/opencl_allocator.h
mace/core/runtime/opencl/opencl_allocator.h
+8
-3
mace/core/runtime/opencl/opencl_helper.cc
mace/core/runtime/opencl/opencl_helper.cc
+4
-4
mace/core/runtime/opencl/opencl_helper.h
mace/core/runtime/opencl/opencl_helper.h
+3
-5
mace/libmace/BUILD.bazel
mace/libmace/BUILD.bazel
+3
-0
mace/libmace/mace.cc
mace/libmace/mace.cc
+26
-8
mace/ops/opencl/buffer/buffer_transform.h
mace/ops/opencl/buffer/buffer_transform.h
+1
-1
mace/ops/opencl/buffer/buffer_type_transform.cc
mace/ops/opencl/buffer/buffer_type_transform.cc
+1
-1
mace/ops/opencl/buffer/conv_2d.h
mace/ops/opencl/buffer/conv_2d.h
+1
-1
mace/ops/opencl/buffer/conv_2d_1x1.cc
mace/ops/opencl/buffer/conv_2d_1x1.cc
+1
-1
mace/ops/opencl/buffer/conv_2d_general.cc
mace/ops/opencl/buffer/conv_2d_general.cc
+1
-1
mace/ops/opencl/buffer/depthwise_conv2d.h
mace/ops/opencl/buffer/depthwise_conv2d.h
+1
-1
mace/ops/opencl/buffer/pooling.h
mace/ops/opencl/buffer/pooling.h
+1
-1
mace/ops/opencl/buffer/reshape.h
mace/ops/opencl/buffer/reshape.h
+1
-1
mace/ops/opencl/buffer/softmax.h
mace/ops/opencl/buffer/softmax.h
+1
-1
mace/ops/opencl/buffer/utils.cc
mace/ops/opencl/buffer/utils.cc
+1
-1
mace/ops/opencl/cl/buffer_transform.cl
mace/ops/opencl/cl/buffer_transform.cl
+199
-0
mace/ops/opencl/image/activation.h
mace/ops/opencl/image/activation.h
+1
-1
mace/ops/opencl/image/addn.h
mace/ops/opencl/image/addn.h
+1
-1
mace/ops/opencl/image/batch_norm.h
mace/ops/opencl/image/batch_norm.h
+1
-1
mace/ops/opencl/image/batch_to_space.h
mace/ops/opencl/image/batch_to_space.h
+1
-1
mace/ops/opencl/image/bias_add.h
mace/ops/opencl/image/bias_add.h
+1
-1
mace/ops/opencl/image/buffer_to_image.h
mace/ops/opencl/image/buffer_to_image.h
+1
-1
mace/ops/opencl/image/channel_shuffle.h
mace/ops/opencl/image/channel_shuffle.h
+1
-1
mace/ops/opencl/image/concat.h
mace/ops/opencl/image/concat.h
+1
-1
mace/ops/opencl/image/conv_2d.h
mace/ops/opencl/image/conv_2d.h
+1
-1
mace/ops/opencl/image/conv_2d_1x1.cc
mace/ops/opencl/image/conv_2d_1x1.cc
+1
-1
mace/ops/opencl/image/conv_2d_3x3.cc
mace/ops/opencl/image/conv_2d_3x3.cc
+1
-1
mace/ops/opencl/image/conv_2d_general.cc
mace/ops/opencl/image/conv_2d_general.cc
+1
-1
mace/ops/opencl/image/crop.h
mace/ops/opencl/image/crop.h
+1
-1
mace/ops/opencl/image/deconv_2d.h
mace/ops/opencl/image/deconv_2d.h
+1
-1
mace/ops/opencl/image/depth_to_space.h
mace/ops/opencl/image/depth_to_space.h
+1
-1
mace/ops/opencl/image/depthwise_conv2d.h
mace/ops/opencl/image/depthwise_conv2d.h
+1
-1
mace/ops/opencl/image/depthwise_deconv2d.h
mace/ops/opencl/image/depthwise_deconv2d.h
+1
-1
mace/ops/opencl/image/eltwise.h
mace/ops/opencl/image/eltwise.h
+1
-1
mace/ops/opencl/image/fully_connected.h
mace/ops/opencl/image/fully_connected.h
+1
-1
mace/ops/opencl/image/image_to_buffer.h
mace/ops/opencl/image/image_to_buffer.h
+1
-1
mace/ops/opencl/image/lpnorm.h
mace/ops/opencl/image/lpnorm.h
+1
-1
mace/ops/opencl/image/lstm_cell.h
mace/ops/opencl/image/lstm_cell.h
+1
-1
mace/ops/opencl/image/matmul.h
mace/ops/opencl/image/matmul.h
+1
-1
mace/ops/opencl/image/mvnorm.h
mace/ops/opencl/image/mvnorm.h
+1
-1
mace/ops/opencl/image/pad.h
mace/ops/opencl/image/pad.h
+1
-1
mace/ops/opencl/image/pooling.h
mace/ops/opencl/image/pooling.h
+1
-1
mace/ops/opencl/image/reduce.h
mace/ops/opencl/image/reduce.h
+1
-1
mace/ops/opencl/image/reshape.h
mace/ops/opencl/image/reshape.h
+1
-1
mace/ops/opencl/image/resize_bicubic.h
mace/ops/opencl/image/resize_bicubic.h
+1
-1
mace/ops/opencl/image/resize_bilinear.h
mace/ops/opencl/image/resize_bilinear.h
+1
-1
mace/ops/opencl/image/resize_nearest_neighbor.h
mace/ops/opencl/image/resize_nearest_neighbor.h
+1
-1
mace/ops/opencl/image/softmax.h
mace/ops/opencl/image/softmax.h
+1
-1
mace/ops/opencl/image/space_to_batch.h
mace/ops/opencl/image/space_to_batch.h
+1
-1
mace/ops/opencl/image/space_to_depth.h
mace/ops/opencl/image/space_to_depth.h
+1
-1
mace/ops/opencl/image/split.h
mace/ops/opencl/image/split.h
+1
-1
mace/ops/opencl/image/sqrdiff_mean.h
mace/ops/opencl/image/sqrdiff_mean.h
+1
-1
mace/ops/opencl/image/winograd_conv2d.cc
mace/ops/opencl/image/winograd_conv2d.cc
+1
-1
mace/tools/mace_run.cc
mace/tools/mace_run.cc
+2
-2
test/ccunit/BUILD.bazel
test/ccunit/BUILD.bazel
+6
-1
test/ccunit/CMakeLists.txt
test/ccunit/CMakeLists.txt
+4
-0
test/ccunit/mace/core/runtime/hexagon/hta_transform_test.cc
test/ccunit/mace/core/runtime/hexagon/hta_transform_test.cc
+152
-0
test/ccunit/mace/ops/opencl/out_of_range_check_test.cc
test/ccunit/mace/ops/opencl/out_of_range_check_test.cc
+1
-1
test/ccunit/mace/ops/quantize_test.cc
test/ccunit/mace/ops/quantize_test.cc
+1
-0
test/ccutils/mace/ops/testing/test_utils.h
test/ccutils/mace/ops/testing/test_utils.h
+18
-2
third_party/nnlib/arm64-v8a/libhexagon_controller.so
third_party/nnlib/arm64-v8a/libhexagon_controller.so
+0
-0
third_party/nnlib/armeabi-v7a/libhexagon_controller.so
third_party/nnlib/armeabi-v7a/libhexagon_controller.so
+0
-0
third_party/nnlib/hexnn_dsp_controller.h
third_party/nnlib/hexnn_dsp_controller.h
+1
-5
third_party/rpcmem/arm64-v8a/rpcmem.a
third_party/rpcmem/arm64-v8a/rpcmem.a
+0
-0
third_party/rpcmem/armeabi-v7a/rpcmem.a
third_party/rpcmem/armeabi-v7a/rpcmem.a
+0
-0
third_party/rpcmem/rpcmem.h
third_party/rpcmem/rpcmem.h
+22
-11
tools/bazel-build-standalone-lib.sh
tools/bazel-build-standalone-lib.sh
+29
-0
tools/bazel_adb_run.py
tools/bazel_adb_run.py
+6
-0
tools/converter.py
tools/converter.py
+2
-1
tools/python/transform/hexagon_converter.py
tools/python/transform/hexagon_converter.py
+14
-0
tools/sh_commands.py
tools/sh_commands.py
+14
-12
未找到文件。
mace/core/BUILD.bazel
浏览文件 @
28954099
...
...
@@ -28,21 +28,37 @@ cc_library(
"*.cc"
,
"runtime/cpu/*.cc"
,
],
exclude
=
[
"rpcmem.cc"
,
],
)
+
if_opencl_enabled
(
glob
(
[
"runtime/opencl/*.cc"
,
],
))
+
if_hexagon_enabled
([
))
+
if_hexagon_or_hta_enabled
([
"runtime/hexagon/hexagon_allocator.h"
,
"runtime/hexagon/hexagon_allocator.cc"
,
"runtime/hexagon/hexagon_device.cc"
,
])
+
if_hexagon_enabled
([
"runtime/hexagon/hexagon_dsp_wrapper.cc"
,
])
+
if_hta_enabled
([
"runtime/hexagon/hexagon_hta_transformer.h"
,
"runtime/hexagon/hexagon_hta_transformer.cc"
,
"runtime/hexagon/hexagon_hta_wrapper.cc"
,
])
+
if_apu_enabled
(
glob
([
"runtime/apu/*.cc"
,
])),
hdrs
=
glob
([
"*.h"
,
"runtime/cpu/*.h"
,
])
+
if_opencl_enabled
(
glob
([
]))
+
if_rpcmem_enabled
([
"rpcmem.cc"
,
]),
hdrs
=
glob
(
[
"*.h"
,
"runtime/cpu/*.h"
,
],
exclude
=
[
"rpcmem.h"
,
],
)
+
if_opencl_enabled
(
glob
([
"runtime/opencl/*.h"
,
]))
+
if_hexagon_or_hta_enabled
(
glob
([
"runtime/hexagon/hexagon_control_wrapper.h"
,
...
...
@@ -53,7 +69,9 @@ cc_library(
"runtime/hexagon/*hta*.h"
,
]))
+
if_apu_enabled
(
glob
([
"runtime/apu/*.h"
])),
]))
+
if_rpcmem_enabled
([
"rpcmem.h"
,
]),
copts
=
[
"-Werror"
,
"-Wextra"
,
...
...
mace/core/CMakeLists.txt
浏览文件 @
28954099
...
...
@@ -17,11 +17,17 @@ set(CORE_SRCS
runtime/cpu/cpu_runtime.cc
)
if
(
MACE_ENABLE_RPCMEM
)
set
(
CORE_SRCS
${
CORE_SRCS
}
rpcmem.cc
)
set
(
EXTRA_LINK_LIBS
${
EXTRA_LINK_LIBS
}
rpcmem
)
endif
(
MACE_ENABLE_RPCMEM
)
if
(
MACE_ENABLE_OPENCL
)
set
(
CORE_SRCS
${
CORE_SRCS
}
runtime/opencl/gpu_device.cc
runtime/opencl/gpu_runtime.cc
runtime/opencl/opencl_allocator.cc
runtime/opencl/opencl_helper.cc
runtime/opencl/opencl_runtime.cc
runtime/opencl/opencl_util.cc
runtime/opencl/opencl_wrapper.cc
...
...
@@ -30,12 +36,20 @@ if(MACE_ENABLE_OPENCL)
set
(
EXTRA_LINK_LIBS
${
EXTRA_LINK_LIBS
}
generated_opencl_kernel dl
)
endif
(
MACE_ENABLE_OPENCL
)
if
(
MACE_ENABLE_HEXAGON_DSP OR MACE_ENABLE_HEXAGON_HTA
)
set
(
CORE_SRCS
${
CORE_SRCS
}
runtime/hexagon/hexagon_allocator.cc
runtime/hexagon/hexagon_device.cc
)
endif
(
MACE_ENABLE_HEXAGON_DSP OR MACE_ENABLE_HEXAGON_HTA
)
if
(
MACE_ENABLE_HEXAGON_DSP
)
set
(
CORE_SRCS
${
CORE_SRCS
}
runtime/hexagon/hexagon_dsp_wrapper.cc
)
set
(
EXTRA_LINK_LIBS
${
EXTRA_LINK_LIBS
}
hexagon_controller
)
endif
(
MACE_ENABLE_HEXAGON_DSP
)
if
(
MACE_ENABLE_HEXAGON_HTA
)
set
(
CORE_SRCS
${
CORE_SRCS
}
runtime/hexagon/hexagon_hta_transformer.cc
)
set
(
CORE_SRCS
${
CORE_SRCS
}
runtime/hexagon/hexagon_hta_wrapper.cc
)
set
(
EXTRA_LINK_LIBS
${
EXTRA_LINK_LIBS
}
hta_hexagon_runtime
)
endif
(
MACE_ENABLE_HEXAGON_HTA
)
...
...
@@ -45,10 +59,6 @@ if(MACE_ENABLE_MTK_APU)
set
(
EXTRA_LINK_LIBS
${
EXTRA_LINK_LIBS
}
apu-frontend
)
endif
(
MACE_ENABLE_MTK_APU
)
if
(
MACE_ENABLE_RPCMEM
)
set
(
EXTRA_LINK_LIBS
${
EXTRA_LINK_LIBS
}
rpcmem
)
endif
(
MACE_ENABLE_RPCMEM
)
add_library
(
core STATIC
${
CORE_SRCS
}
)
target_link_libraries
(
core PRIVATE
proto
...
...
mace/core/allocator.cc
浏览文件 @
28954099
...
...
@@ -16,6 +16,54 @@
namespace
mace
{
MaceStatus
Allocator
::
NewImage
(
const
std
::
vector
<
size_t
>
&
image_shape
,
const
DataType
dt
,
void
**
result
)
{
MACE_UNUSED
(
image_shape
);
MACE_UNUSED
(
dt
);
MACE_UNUSED
(
result
);
MACE_NOT_IMPLEMENTED
;
return
MaceStatus
::
MACE_SUCCESS
;
}
void
Allocator
::
DeleteImage
(
void
*
data
)
{
MACE_UNUSED
(
data
);
MACE_NOT_IMPLEMENTED
;
}
void
*
Allocator
::
Map
(
void
*
buffer
,
size_t
offset
,
size_t
nbytes
,
bool
finish_cmd_queue
)
{
MACE_UNUSED
(
nbytes
);
MACE_UNUSED
(
finish_cmd_queue
);
return
reinterpret_cast
<
char
*>
(
buffer
)
+
offset
;
}
void
*
Allocator
::
MapImage
(
void
*
buffer
,
const
std
::
vector
<
size_t
>
&
image_shape
,
std
::
vector
<
size_t
>
*
mapped_image_pitch
,
bool
finish_cmd_queue
)
{
MACE_UNUSED
(
buffer
);
MACE_UNUSED
(
image_shape
);
MACE_UNUSED
(
mapped_image_pitch
);
MACE_UNUSED
(
finish_cmd_queue
);
MACE_NOT_IMPLEMENTED
;
return
nullptr
;
}
void
Allocator
::
Unmap
(
void
*
buffer
,
void
*
mapper_ptr
)
{
MACE_UNUSED
(
buffer
);
MACE_UNUSED
(
mapper_ptr
);
}
#ifdef MACE_ENABLE_RPCMEM
Rpcmem
*
Allocator
::
rpcmem
()
{
MACE_NOT_IMPLEMENTED
;
return
nullptr
;
}
#endif // MACE_ENABLE_RPCMEM
Allocator
*
GetCPUAllocator
()
{
static
CPUAllocator
allocator
;
return
&
allocator
;
...
...
mace/core/allocator.h
浏览文件 @
28954099
...
...
@@ -27,6 +27,10 @@
#include "mace/public/mace.h"
#include "mace/utils/logging.h"
#ifdef MACE_ENABLE_RPCMEM
#include "mace/core/rpcmem.h"
#endif // MACE_ENABLE_RPCMEM
namespace
mace
{
#if defined(__hexagon__)
...
...
@@ -50,19 +54,22 @@ class Allocator {
virtual
MaceStatus
New
(
size_t
nbytes
,
void
**
result
)
=
0
;
virtual
MaceStatus
NewImage
(
const
std
::
vector
<
size_t
>
&
image_shape
,
const
DataType
dt
,
void
**
result
)
=
0
;
void
**
result
);
virtual
void
Delete
(
void
*
data
)
=
0
;
virtual
void
DeleteImage
(
void
*
data
)
=
0
;
virtual
void
DeleteImage
(
void
*
data
);
virtual
void
*
Map
(
void
*
buffer
,
size_t
offset
,
size_t
nbytes
,
bool
finish_cmd_queue
)
const
=
0
;
bool
finish_cmd_queue
);
virtual
void
*
MapImage
(
void
*
buffer
,
const
std
::
vector
<
size_t
>
&
image_shape
,
std
::
vector
<
size_t
>
*
mapped_image_pitch
,
bool
finish_cmd_queue
)
const
=
0
;
virtual
void
Unmap
(
void
*
buffer
,
void
*
mapper_ptr
)
const
=
0
;
bool
finish_cmd_queue
);
virtual
void
Unmap
(
void
*
buffer
,
void
*
mapper_ptr
);
virtual
bool
OnHost
()
const
=
0
;
#ifdef MACE_ENABLE_RPCMEM
virtual
Rpcmem
*
rpcmem
();
#endif // MACE_ENABLE_RPCMEM
};
class
CPUAllocator
:
public
Allocator
{
...
...
@@ -84,46 +91,12 @@ class CPUAllocator : public Allocator {
return
MaceStatus
::
MACE_SUCCESS
;
}
MaceStatus
NewImage
(
const
std
::
vector
<
size_t
>
&
shape
,
const
DataType
dt
,
void
**
result
)
override
{
MACE_UNUSED
(
shape
);
MACE_UNUSED
(
dt
);
MACE_UNUSED
(
result
);
LOG
(
FATAL
)
<<
"Allocate CPU image"
;
return
MaceStatus
::
MACE_SUCCESS
;
}
void
Delete
(
void
*
data
)
override
{
MACE_CHECK_NOTNULL
(
data
);
VLOG
(
3
)
<<
"Free CPU buffer"
;
free
(
data
);
}
void
DeleteImage
(
void
*
data
)
override
{
LOG
(
FATAL
)
<<
"Free CPU image"
;
free
(
data
);
};
void
*
Map
(
void
*
buffer
,
size_t
offset
,
size_t
nbytes
,
bool
finish_cmd_queue
)
const
override
{
MACE_UNUSED
(
nbytes
);
MACE_UNUSED
(
finish_cmd_queue
);
return
reinterpret_cast
<
char
*>
(
buffer
)
+
offset
;
}
void
*
MapImage
(
void
*
buffer
,
const
std
::
vector
<
size_t
>
&
image_shape
,
std
::
vector
<
size_t
>
*
mapped_image_pitch
,
bool
finish_cmd_queue
)
const
override
{
MACE_UNUSED
(
image_shape
);
MACE_UNUSED
(
mapped_image_pitch
);
MACE_UNUSED
(
finish_cmd_queue
);
return
buffer
;
}
void
Unmap
(
void
*
buffer
,
void
*
mapper_ptr
)
const
override
{
MACE_UNUSED
(
buffer
);
MACE_UNUSED
(
mapper_ptr
);
}
bool
OnHost
()
const
override
{
return
true
;
}
};
...
...
mace/core/quantize.h
浏览文件 @
28954099
...
...
@@ -117,9 +117,12 @@ inline void GetOutputMultiplierAndShift(
template
<
typename
F
,
typename
Q
>
class
QuantizeUtil
{
public:
QuantizeUtil
()
=
default
;
explicit
QuantizeUtil
(
utils
::
ThreadPool
*
thread_pool
)
:
thread_pool_
(
thread_pool
)
{}
void
Init
(
utils
::
ThreadPool
*
thread_pool
)
{
thread_pool_
=
thread_pool
;
}
void
QuantizeWithScaleAndZeropoint
(
const
float
*
input
,
const
index_t
size
,
float
scale
,
...
...
mace/core/rpcmem.cc
0 → 100644
浏览文件 @
28954099
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/rpcmem.h"
#include "mace/utils/logging.h"
namespace
mace
{
Rpcmem
::
Rpcmem
()
{
rpcmem_init
(
&
rm
);
MACE_CHECK
(
rm
.
flag
==
0
,
"rpcmem_init failed!"
);
}
Rpcmem
::~
Rpcmem
()
{
rpcmem_deinit
(
&
rm
);
}
void
*
Rpcmem
::
New
(
int
heapid
,
uint32_t
flags
,
int
nbytes
)
{
return
rpcmem_alloc
(
&
rm
,
heapid
,
flags
,
nbytes
);
}
void
*
Rpcmem
::
New
(
int
nbytes
)
{
return
New
(
RPCMEM_HEAP_ID_SYSTEM
,
RPCMEM_FLAG_CACHED
,
nbytes
);
}
void
Rpcmem
::
Delete
(
void
*
data
)
{
rpcmem_free
(
&
rm
,
data
);
}
int
Rpcmem
::
ToFd
(
void
*
data
)
{
return
rpcmem_to_fd
(
&
rm
,
data
);
}
int
Rpcmem
::
SyncCacheStart
(
void
*
data
)
{
return
rpcmem_sync_cache
(
&
rm
,
data
,
RPCMEM_SYNC_START
);
}
int
Rpcmem
::
SyncCacheEnd
(
void
*
data
)
{
return
rpcmem_sync_cache
(
&
rm
,
data
,
RPCMEM_SYNC_END
);
}
}
// namespace mace
mace/core/rpcmem.h
0 → 100644
浏览文件 @
28954099
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_CORE_RPCMEM_H_
#define MACE_CORE_RPCMEM_H_
#include "third_party/rpcmem/rpcmem.h"
namespace
mace
{
class
Rpcmem
{
public:
Rpcmem
();
~
Rpcmem
();
void
*
New
(
int
heapid
,
uint32_t
flags
,
int
nbytes
);
void
*
New
(
int
nbytes
);
void
Delete
(
void
*
data
);
int
ToFd
(
void
*
data
);
int
SyncCacheStart
(
void
*
data
);
int
SyncCacheEnd
(
void
*
data
);
private:
rpcmem
rm
;
};
}
// namespace mace
#endif // MACE_CORE_RPCMEM_H_
mace/core/runtime/hexagon/hexagon_allocator.cc
0 → 100644
浏览文件 @
28954099
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/runtime/hexagon/hexagon_allocator.h"
namespace
mace
{
MaceStatus
HexagonAllocator
::
New
(
size_t
nbytes
,
void
**
result
)
{
*
result
=
rpcmem_
.
New
(
nbytes
);
MACE_CHECK_NOTNULL
(
*
result
);
memset
(
*
result
,
0
,
nbytes
);
return
MaceStatus
::
MACE_SUCCESS
;
}
void
HexagonAllocator
::
Delete
(
void
*
data
)
{
rpcmem_
.
Delete
(
data
);
}
bool
HexagonAllocator
::
OnHost
()
const
{
return
true
;
}
Rpcmem
*
HexagonAllocator
::
rpcmem
()
{
return
&
rpcmem_
;
}
}
// namespace mace
mace/core/runtime/hexagon/hexagon_allocator.h
0 → 100644
浏览文件 @
28954099
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_ALLOCATOR_H_
#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_ALLOCATOR_H_
#include "mace/core/allocator.h"
namespace
mace
{
class
HexagonAllocator
:
public
Allocator
{
public:
MaceStatus
New
(
size_t
nbytes
,
void
**
result
)
override
;
void
Delete
(
void
*
buffer
)
override
;
bool
OnHost
()
const
override
;
Rpcmem
*
rpcmem
()
override
;
private:
Rpcmem
rpcmem_
;
};
}
// namespace mace
#endif // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_ALLOCATOR_H_
mace/core/runtime/hexagon/hexagon_control_wrapper.h
浏览文件 @
28954099
...
...
@@ -25,27 +25,15 @@
#include "mace/public/mace.h"
namespace
mace
{
struct
InOutInfo
{
InOutInfo
(
const
std
::
string
&
name
,
const
std
::
vector
<
index_t
>
&
shape
,
const
DataType
data_type
,
const
float
scale
,
const
int32_t
zero_point
,
std
::
unique_ptr
<
Tensor
>
tensor_u8
)
:
name
(
name
),
shape
(
shape
),
data_type
(
data_type
),
scale
(
scale
),
zero_point
(
zero_point
),
tensor_u8
(
std
::
move
(
tensor_u8
))
{}
const
DataType
data_type
)
:
name
(
name
),
shape
(
shape
),
data_type
(
data_type
)
{}
std
::
string
name
;
std
::
vector
<
index_t
>
shape
;
DataType
data_type
;
float
scale
;
int32_t
zero_point
;
std
::
unique_ptr
<
Tensor
>
tensor_u8
;
};
class
HexagonControlWrapper
{
...
...
@@ -79,8 +67,6 @@ class HexagonControlWrapper {
int
nn_id_
;
std
::
vector
<
InOutInfo
>
input_info_
;
std
::
vector
<
InOutInfo
>
output_info_
;
int
num_inputs_
;
int
num_outputs_
;
...
...
mace/core/runtime/hexagon/hexagon_device.cc
0 → 100644
浏览文件 @
28954099
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/runtime/hexagon/hexagon_device.h"
namespace
mace
{
HexagonDevice
::
HexagonDevice
(
DeviceType
device_type
,
utils
::
ThreadPool
*
thread_pool
#ifdef MACE_ENABLE_OPENCL
,
std
::
unique_ptr
<
GPUDevice
>
gpu_device
#endif // MACE_ENABLE_OPENCL
)
:
CPUDevice
(
0
,
AFFINITY_NONE
,
thread_pool
),
allocator_
(
make_unique
<
HexagonAllocator
>
()),
device_type_
(
device_type
)
#ifdef MACE_ENABLE_OPENCL
,
gpu_device_
(
std
::
move
(
gpu_device
))
#endif // MACE_ENABLE_OPENCL
{}
#ifdef MACE_ENABLE_OPENCL
GPURuntime
*
HexagonDevice
::
gpu_runtime
()
{
return
gpu_device_
->
gpu_runtime
();
}
#endif // MACE_ENABLE_OPENCL
Allocator
*
HexagonDevice
::
allocator
()
{
#ifdef MACE_ENABLE_OPENCL
return
gpu_device_
->
allocator
();
#else
return
allocator_
.
get
();
#endif // MACE_ENABLE_OPENCL
}
DeviceType
HexagonDevice
::
device_type
()
const
{
return
device_type_
;
}
std
::
unique_ptr
<
HexagonControlWrapper
>
CreateHexagonControlWrapper
(
Device
*
device
)
{
std
::
unique_ptr
<
HexagonControlWrapper
>
hexagon_controller
;
auto
device_type
=
device
->
device_type
();
switch
(
device_type
)
{
#ifdef MACE_ENABLE_HEXAGON
case
HEXAGON
:
hexagon_controller
=
make_unique
<
HexagonDSPWrapper
>
();
break
;
#endif
#ifdef MACE_ENABLE_HTA
case
HTA
:
hexagon_controller
=
make_unique
<
HexagonHTAWrapper
>
(
device
);
break
;
#endif
default:
LOG
(
FATAL
)
<<
"Not supported Hexagon device type: "
<<
device_type
;
}
return
hexagon_controller
;
}
}
// namespace mace
mace/core/runtime/hexagon/hexagon_device.h
浏览文件 @
28954099
...
...
@@ -19,6 +19,7 @@
#include <utility>
#include "mace/core/device.h"
#include "mace/core/runtime/hexagon/hexagon_allocator.h"
#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
#ifdef MACE_ENABLE_HEXAGON
#include "mace/core/runtime/hexagon/hexagon_dsp_wrapper.h"
...
...
@@ -26,44 +27,39 @@
#ifdef MACE_ENABLE_HTA
#include "mace/core/runtime/hexagon/hexagon_hta_wrapper.h"
#endif
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/gpu_device.h"
#include "mace/core/runtime/opencl/gpu_runtime.h"
#endif
namespace
mace
{
class
HexagonDevice
:
public
CPUDevice
{
public:
explicit
HexagonDevice
(
DeviceType
device_type
,
utils
::
ThreadPool
*
thread_pool
)
:
CPUDevice
(
0
,
AFFINITY_NONE
,
thread_pool
),
device_type_
(
device_type
)
{}
HexagonDevice
(
DeviceType
device_type
,
#ifdef MACE_ENABLE_OPENCL
utils
::
ThreadPool
*
thread_pool
,
std
::
unique_ptr
<
GPUDevice
>
gpu_device
);
#else
utils
::
ThreadPool
*
thread_pool
);
#endif // MACE_ENABLE_OPENCL
DeviceType
device_type
()
const
override
{
return
device_type_
;
};
#ifdef MACE_ENABLE_OPENCL
GPURuntime
*
gpu_runtime
()
override
;
#endif // MACE_ENABLE_OPENCL
Allocator
*
allocator
()
override
;
DeviceType
device_type
()
const
override
;
private:
std
::
unique_ptr
<
HexagonAllocator
>
allocator_
;
DeviceType
device_type_
;
#ifdef MACE_ENABLE_OPENCL
std
::
unique_ptr
<
GPUDevice
>
gpu_device_
;
#endif // MACE_ENABLE_OPENCL
};
std
::
unique_ptr
<
HexagonControlWrapper
>
CreateHexagonControlWrapper
(
Device
*
device
)
{
std
::
unique_ptr
<
HexagonControlWrapper
>
hexagon_controller
;
auto
device_type
=
device
->
device_type
();
switch
(
device_type
)
{
#ifdef MACE_ENABLE_HEXAGON
case
HEXAGON
:
hexagon_controller
=
make_unique
<
HexagonDSPWrapper
>
();
break
;
#endif
#ifdef MACE_ENABLE_HTA
case
HTA
:
hexagon_controller
=
make_unique
<
HexagonHTAWrapper
>
(
device
);
break
;
#endif
default:
LOG
(
FATAL
)
<<
"Not supported Hexagon device type: "
<<
device_type
;
}
return
hexagon_controller
;
}
Device
*
device
);
}
// namespace mace
#endif // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DEVICE_H_
mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc
浏览文件 @
28954099
...
...
@@ -107,8 +107,6 @@ hexagon_nn_corner_type TransformCornerType(HexagonNNCornerType corner) {
}
// namespace
HexagonDSPWrapper
::
HexagonDSPWrapper
()
{
hexnn_controller_init
();
std
::
string
env_log_execute_time_str
;
GetEnv
(
"MACE_DSP_LOG_EXECUTE_TIME"
,
&
env_log_execute_time_str
);
if
(
env_log_execute_time_str
.
empty
())
{
...
...
@@ -119,9 +117,7 @@ HexagonDSPWrapper::HexagonDSPWrapper() {
}
}
HexagonDSPWrapper
::~
HexagonDSPWrapper
()
{
hexnn_controller_deinit
();
}
HexagonDSPWrapper
::~
HexagonDSPWrapper
()
{}
int
HexagonDSPWrapper
::
GetVersion
()
{
int
version
;
...
...
@@ -258,10 +254,7 @@ bool HexagonDSPWrapper::SetupGraph(const NetDef &net_def,
}
input_info_
.
emplace_back
(
input_info
.
name
(),
input_shape
,
input_info
.
data_type
(),
input_info
.
scale
(),
input_info
.
zero_point
(),
make_unique
<
Tensor
>
());
input_info
.
data_type
());
}
// output info
...
...
@@ -275,10 +268,7 @@ bool HexagonDSPWrapper::SetupGraph(const NetDef &net_def,
}
output_info_
.
emplace_back
(
output_info
.
name
(),
output_shape
,
output_info
.
data_type
(),
output_info
.
scale
(),
output_info
.
zero_point
(),
make_unique
<
Tensor
>
());
output_info
.
data_type
());
VLOG
(
1
)
<<
"OutputInfo: "
<<
"
\n\t
shape: "
<<
output_shape
[
0
]
<<
" "
<<
output_shape
[
1
]
<<
" "
<<
output_shape
[
2
]
<<
" "
<<
output_shape
[
3
]
...
...
mace/core/runtime/hexagon/hexagon_dsp_wrapper.h
浏览文件 @
28954099
...
...
@@ -56,7 +56,8 @@ class HexagonDSPWrapper : public HexagonControlWrapper {
uint64_t
GetLastExecuteCycles
();
bool
log_execute_time_
;
std
::
vector
<
InOutInfo
>
input_info_
;
std
::
vector
<
InOutInfo
>
output_info_
;
MACE_DISABLE_COPY_AND_ASSIGN
(
HexagonDSPWrapper
);
};
}
// namespace mace
...
...
mace/core/runtime/hexagon/hexagon_hta_transformer.cc
0 → 100644
浏览文件 @
28954099
此差异已折叠。
点击以展开。
mace/core/runtime/hexagon/hexagon_hta_transformer.h
0 → 100644
浏览文件 @
28954099
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_HTA_TRANSFORMER_H_
#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_HTA_TRANSFORMER_H_
#include <memory>
#include <vector>
#include "mace/core/device.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/utils/math.h"
#include "mace/utils/thread_pool.h"
#include "third_party/hta/hta_hexagon_api.h"
namespace
mace
{
class
BaseTransformer
{
public:
BaseTransformer
()
=
default
;
virtual
~
BaseTransformer
()
=
default
;
virtual
void
Init
(
Device
*
device
)
{
device_
=
device
;
}
virtual
MaceStatus
Compute
(
const
Tensor
*
input
,
Tensor
*
output
)
=
0
;
protected:
Device
*
device_
;
};
class
HexagonHTATranformerBase
{
public:
HexagonHTATranformerBase
()
=
default
;
virtual
~
HexagonHTATranformerBase
()
=
default
;
virtual
void
Init
(
Device
*
device
)
=
0
;
virtual
MaceStatus
SetInputTransformer
(
const
hexagon_hta_hw_layout
format
)
=
0
;
virtual
MaceStatus
SetOutputTransformer
(
const
hexagon_hta_hw_layout
format
)
=
0
;
virtual
MaceStatus
Quantize
(
const
Tensor
*
input
,
Tensor
*
output
)
=
0
;
virtual
MaceStatus
Dequantize
(
const
Tensor
*
input
,
Tensor
*
output
)
=
0
;
virtual
MaceStatus
TransformInput
(
const
Tensor
*
input
,
Tensor
*
output
,
int
index
)
=
0
;
virtual
MaceStatus
TransformOutput
(
const
Tensor
*
input
,
Tensor
*
output
,
int
index
)
=
0
;
};
template
<
DeviceType
D
>
class
HexagonHTATranformer
:
public
HexagonHTATranformerBase
{
public:
void
Init
(
Device
*
device
)
override
;
MaceStatus
SetInputTransformer
(
const
hexagon_hta_hw_layout
format
)
override
;
MaceStatus
SetOutputTransformer
(
const
hexagon_hta_hw_layout
format
)
override
;
MaceStatus
Quantize
(
const
Tensor
*
input
,
Tensor
*
output
)
override
;
MaceStatus
Dequantize
(
const
Tensor
*
input
,
Tensor
*
output
)
override
;
MaceStatus
TransformInput
(
const
Tensor
*
input
,
Tensor
*
output
,
int
index
)
override
;
MaceStatus
TransformOutput
(
const
Tensor
*
input
,
Tensor
*
output
,
int
index
)
override
;
private:
Device
*
device_
;
std
::
unique_ptr
<
BaseTransformer
>
quantizer_
;
std
::
unique_ptr
<
BaseTransformer
>
dequantizer_
;
std
::
vector
<
std
::
unique_ptr
<
BaseTransformer
>>
input_transformers_
;
std
::
vector
<
std
::
unique_ptr
<
BaseTransformer
>>
output_transformers_
;
};
}
// namespace mace
#endif // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_HTA_TRANSFORMER_H_
mace/core/runtime/hexagon/hexagon_hta_wrapper.cc
浏览文件 @
28954099
...
...
@@ -13,7 +13,7 @@
// limitations under the License.
#include "mace/core/runtime/hexagon/hexagon_hta_wrapper.h"
#include <sys/types.h>
#include <algorithm>
#include <iomanip>
#include <map>
...
...
@@ -24,48 +24,59 @@
#include <utility>
#include "mace/core/runtime/hexagon/hexagon_hta_ops.h"
#include "mace/core/runtime/hexagon/hexagon_hta_transformer.h"
#include "mace/core/types.h"
#include "mace/utils/memory.h"
#include "mace/core/quantize.h"
#include "third_party/hta/hta_hexagon_api.h"
namespace
mace
{
namespace
{
struct
InputOutputMetadata
{
void
Init
(
float
min_val
,
float
max_val
,
int
needs_quantization
)
{
this
->
min_val
=
min_val
;
this
->
max_val
=
max_val
;
this
->
needs_quantization
=
needs_quantization
;
int
GetHTAEnv
(
const
std
::
string
&
name
,
int
default_value
)
{
int
value
=
default_value
;
std
::
string
env_str
;
MaceStatus
status
=
GetEnv
(
name
.
c_str
(),
&
env_str
);
if
(
status
==
MaceStatus
::
MACE_SUCCESS
&&
!
env_str
.
empty
())
{
value
=
std
::
atoi
(
env_str
.
c_str
());
}
float
min_val
;
float
max_val
;
int
needs_quantization
;
};
template
<
typename
T
>
void
AddInputMetadata
(
const
T
&
data
,
hexagon_hta_nn_tensordef
*
tensor
)
{
tensor
->
batches
=
1
;
tensor
->
height
=
1
;
tensor
->
width
=
1
;
tensor
->
depth
=
1
;
tensor
->
data
=
const_cast
<
unsigned
char
*>
(
reinterpret_cast
<
const
unsigned
char
*>
(
&
data
));
tensor
->
dataLen
=
sizeof
(
data
);
tensor
->
data_valid_len
=
sizeof
(
data
);
tensor
->
unused
=
0
;
return
value
;
}
template
<
typename
T
>
void
AddOutputMetadata
(
const
T
&
data
,
hexagon_hta_nn_tensordef
*
tensor
)
{
tensor
->
data
=
const_cast
<
unsigned
char
*>
(
reinterpret_cast
<
const
unsigned
char
*>
(
&
data
));
tensor
->
dataLen
=
sizeof
(
data
);
// Print the API logs to standard output.
void
HtaApiLog
(
hexagon_hta_nn_nn_id
id
,
const
char
*
api_op
,
const
char
*
const
format
,
...)
{
va_list
arg
;
va_start
(
arg
,
format
);
if
(
api_op
!=
NULL
)
{
printf
(
"Graph ID: %d
\t
"
,
id
);
}
vfprintf
(
stdout
,
format
,
arg
);
va_end
(
arg
);
}
// Print the performance stats to standard output.
void
HtaPerformanceLog
(
int
log_level
,
uint32_t
network_handle
,
uint32_t
thread_id
,
const
char
*
const
format
,
...)
{
va_list
arg
;
va_start
(
arg
,
format
);
printf
(
"Log Level: %d, Network Handle: %d, Thread ID: %d - "
,
log_level
,
network_handle
,
thread_id
);
vfprintf
(
stdout
,
format
,
arg
);
va_end
(
arg
);
}
}
// namespace
HexagonHTAWrapper
::
HexagonHTAWrapper
(
Device
*
device
)
:
quantize_util_
(
&
device
->
cpu_runtime
()
->
thread_pool
())
{
:
allocator_
(
device
->
allocator
()),
#ifdef MACE_ENABLE_OPENCL
transformer_
(
make_unique
<
HexagonHTATranformer
<
GPU
>>
())
{
#else
transformer_
(
make_unique
<
HexagonHTATranformer
<
CPU
>>
())
{
#endif
transformer_
->
Init
(
device
);
}
int
HexagonHTAWrapper
::
GetVersion
()
{
...
...
@@ -84,6 +95,40 @@ bool HexagonHTAWrapper::Init() {
LOG
(
INFO
)
<<
"Hexagon init"
;
MACE_CHECK
(
hexagon_hta_nn_init
(
&
nn_id_
)
==
0
,
"hexagon_nn_init failed"
);
ResetPerfInfo
();
int
ret
;
int
power_level
=
GetHTAEnv
(
"MACE_HTA_POWER_LEVEL"
,
-
1
);
if
(
power_level
!=
-
1
)
{
ret
=
hexagon_hta_nn_set_config_params
(
nn_id_
,
HTA_NN_CONFIG_POWER_LEVEL
,
&
power_level
,
sizeof
(
power_level
));
LOG
(
INFO
)
<<
"HTA_NN_CONFIG_POWER_LEVEL: "
<<
power_level
<<
" returns: "
<<
ret
;
}
int
is_compress
=
GetHTAEnv
(
"MACE_HTA_BANDWIDTH_COMPRESSION"
,
1
);
if
(
is_compress
)
{
ret
=
hexagon_hta_nn_set_config_params
(
nn_id_
,
HTA_NN_CONFIG_BANDWIDTH_COMPRESSION
,
&
is_compress
,
sizeof
(
is_compress
));
LOG
(
INFO
)
<<
"HTA_NN_CONFIG_BANDWIDTH_COMPRESSION: "
<<
is_compress
<<
" returns: "
<<
ret
;
}
if
(
VLOG_IS_ON
(
2
))
{
ret
=
hexagon_hta_nn_set_config_params
(
nn_id_
,
HTA_NN_CONFIG_PERFORMANCE_LOG
,
reinterpret_cast
<
void
*>
(
&
HtaPerformanceLog
),
sizeof
(
&
HtaPerformanceLog
));
MACE_CHECK
(
ret
==
0
,
"HTA_NN_CONFIG_PERFORMANCE_LOG returns: "
,
ret
);
}
if
(
VLOG_IS_ON
(
3
))
{
ret
=
hexagon_hta_nn_set_config_params
(
nn_id_
,
HTA_NN_CONFIG_API_LOG
,
reinterpret_cast
<
void
*>
(
&
HtaApiLog
),
sizeof
(
&
HtaApiLog
));
MACE_CHECK
(
ret
==
0
,
"HTA_NN_CONFIG_API_LOG returns: "
,
ret
);
}
return
true
;
}
...
...
@@ -172,52 +217,107 @@ bool HexagonHTAWrapper::SetupGraph(const NetDef &net_def,
outputs
.
size
());
}
int64_t
t1
=
NowMicros
();
MACE_CHECK
(
hexagon_hta_nn_prepare
(
nn_id_
)
==
0
,
"hexagon_nn_prepare failed"
);
int64_t
t2
=
NowMicros
();
VLOG
(
1
)
<<
"Setup time: "
<<
t1
-
t0
<<
" "
<<
t2
-
t1
;
// input info
num_inputs_
=
net_def
.
input_info_size
();
input_info_
.
reserve
(
num_inputs_
);
for
(
const
InputOutputInfo
&
input_info
:
net_def
.
input_info
())
{
input_tensordef_
.
resize
(
num_inputs_
);
for
(
int
index
=
0
;
index
<
num_inputs_
;
++
index
)
{
auto
input_info
=
net_def
.
input_info
(
index
);
std
::
vector
<
index_t
>
input_shape
(
input_info
.
dims
().
begin
(),
input_info
.
dims
().
end
());
while
(
input_shape
.
size
()
<
4
)
{
input_shape
.
insert
(
input_shape
.
begin
(),
1
);
}
input_info_
.
emplace_back
(
input_info
.
name
(),
input_shape
,
input_info
.
data_type
(),
input_info
.
scale
(),
input_info
.
zero_point
(),
make_unique
<
Tensor
>
());
auto
quantized_tensor
=
make_unique
<
Tensor
>
(
allocator_
,
DT_UINT8
);
auto
hta_tensor
=
make_unique
<
Tensor
>
(
allocator_
,
DT_UINT8
);
hexagon_hta_nn_hw_tensordef
&
input_tensordef
=
input_tensordef_
[
index
];
memset
(
&
input_tensordef
,
0
,
sizeof
(
input_tensordef
));
MACE_CHECK
(
hexagon_hta_nn_get_memory_layout
(
nn_id_
,
0
,
index
,
&
input_tensordef
)
==
0
);
input_tensordef
.
dataLen
=
input_tensordef
.
batchStride
;
VLOG
(
1
)
<<
input_tensordef
.
format
<<
" "
<<
input_tensordef
.
elementSize
<<
" "
<<
input_tensordef
.
numDims
<<
" "
<<
input_tensordef
.
batchStride
;
for
(
uint32_t
i
=
0
;
i
<
input_tensordef
.
numDims
;
++
i
)
{
VLOG
(
1
)
<<
input_tensordef
.
dim
[
i
].
length
<<
" "
<<
input_tensordef
.
dim
[
i
].
lpadding
<<
" "
<<
input_tensordef
.
dim
[
i
].
valid
;
}
hta_tensor
->
Resize
({
input_tensordef
.
dataLen
});
MACE_CHECK
(
hta_tensor
->
raw_size
()
==
input_tensordef
.
dataLen
);
Tensor
::
MappingGuard
input_guard
(
hta_tensor
.
get
());
input_tensordef
.
fd
=
allocator_
->
rpcmem
()
->
ToFd
(
hta_tensor
->
mutable_data
<
void
>
());
MACE_CHECK
(
hexagon_hta_nn_register_tensor
(
nn_id_
,
&
input_tensordef
)
==
0
);
transformer_
->
SetInputTransformer
(
input_tensordef
.
format
);
input_info_
.
emplace_back
(
input_info
.
name
(),
input_shape
,
input_info
.
data_type
(),
input_info
.
scale
(),
input_info
.
zero_point
(),
std
::
move
(
quantized_tensor
),
std
::
move
(
hta_tensor
));
}
// output info
num_outputs_
=
net_def
.
output_info_size
();
output_info_
.
reserve
(
num_outputs_
);
for
(
const
InputOutputInfo
&
output_info
:
net_def
.
output_info
())
{
output_tensordef_
.
resize
(
num_outputs_
);
for
(
int
index
=
0
;
index
<
num_outputs_
;
++
index
)
{
auto
output_info
=
net_def
.
output_info
(
index
);
std
::
vector
<
index_t
>
output_shape
(
output_info
.
dims
().
begin
(),
output_info
.
dims
().
end
());
while
(
output_shape
.
size
()
<
4
)
{
output_shape
.
insert
(
output_shape
.
begin
(),
1
);
}
output_info_
.
emplace_back
(
output_info
.
name
(),
output_shape
,
output_info
.
data_type
(),
output_info
.
scale
(),
output_info
.
zero_point
(),
make_unique
<
Tensor
>
());
auto
quantized_tensor
=
make_unique
<
Tensor
>
(
allocator_
,
DT_UINT8
);
auto
hta_tensor
=
make_unique
<
Tensor
>
(
allocator_
,
DT_UINT8
);
quantized_tensor
->
SetScale
(
output_info
.
scale
());
quantized_tensor
->
SetZeroPoint
(
output_info
.
zero_point
());
hexagon_hta_nn_hw_tensordef
&
output_tensordef
=
output_tensordef_
[
index
];
memset
(
&
output_tensordef
,
0
,
sizeof
(
output_tensordef
));
MACE_CHECK
(
hexagon_hta_nn_get_memory_layout
(
nn_id_
,
1
,
index
,
&
output_tensordef
)
==
0
);
output_tensordef
.
dataLen
=
output_tensordef
.
batchStride
;
VLOG
(
1
)
<<
output_tensordef
.
format
<<
" "
<<
output_tensordef
.
elementSize
<<
" "
<<
output_tensordef
.
numDims
<<
" "
<<
output_tensordef
.
batchStride
;
for
(
uint32_t
i
=
0
;
i
<
output_tensordef
.
numDims
;
++
i
)
{
VLOG
(
1
)
<<
output_tensordef
.
dim
[
i
].
length
<<
" "
<<
output_tensordef
.
dim
[
i
].
lpadding
<<
" "
<<
output_tensordef
.
dim
[
i
].
valid
;
}
hta_tensor
->
Resize
({
output_tensordef
.
batchStride
});
MACE_CHECK
(
hta_tensor
->
raw_size
()
==
output_tensordef
.
dataLen
);
Tensor
::
MappingGuard
output_guard
(
hta_tensor
.
get
());
output_tensordef
.
fd
=
allocator_
->
rpcmem
()
->
ToFd
(
hta_tensor
->
mutable_data
<
void
>
());
MACE_CHECK
(
hexagon_hta_nn_register_tensor
(
nn_id_
,
&
output_tensordef
)
==
0
);
transformer_
->
SetOutputTransformer
(
output_tensordef
.
format
);
output_info_
.
emplace_back
(
output_info
.
name
(),
output_shape
,
output_info
.
data_type
(),
output_info
.
scale
(),
output_info
.
zero_point
(),
std
::
move
(
quantized_tensor
),
std
::
move
(
hta_tensor
));
VLOG
(
1
)
<<
"OutputInfo: "
<<
"
\n\t
shape: "
<<
output_shape
[
0
]
<<
" "
<<
output_shape
[
1
]
<<
" "
<<
output_shape
[
2
]
<<
" "
<<
output_shape
[
3
]
<<
"
\n\t
type: "
<<
output_info
.
data_type
();
}
int64_t
t1
=
NowMicros
();
MACE_CHECK
(
hexagon_hta_nn_prepare
(
nn_id_
)
==
0
,
"hexagon_nn_prepare failed"
);
int64_t
t2
=
NowMicros
();
VLOG
(
1
)
<<
"Setup time: "
<<
t1
-
t0
<<
" "
<<
t2
-
t1
;
return
true
;
}
...
...
@@ -266,78 +366,41 @@ bool HexagonHTAWrapper::ExecuteGraphNew(
MACE_CHECK
(
num_outputs_
==
static_cast
<
int
>
(
num_outputs
),
"Wrong outputs num"
);
std
::
vector
<
hexagon_hta_nn_tensordef
>
inputs
(
num_inputs
*
kNumMetaData
);
std
::
vector
<
hexagon_hta_nn_tensordef
>
outputs
(
num_outputs
*
kNumMetaData
);
std
::
vector
<
InputOutputMetadata
>
input_metadata
(
num_inputs
);
std
::
vector
<
InputOutputMetadata
>
output_metadata
(
num_outputs
);
for
(
size_t
i
=
0
;
i
<
num_inputs
;
++
i
)
{
const
auto
input_tensor
=
input_tensors
.
at
(
input_info_
[
i
].
name
);
const
auto
&
input_shape
=
input_tensor
->
shape
();
size_t
index
=
i
*
kNumMetaData
;
inputs
[
index
].
batches
=
static_cast
<
uint32_t
>
(
input_shape
[
0
]);
inputs
[
index
].
height
=
static_cast
<
uint32_t
>
(
input_shape
[
1
]);
inputs
[
index
].
width
=
static_cast
<
uint32_t
>
(
input_shape
[
2
]);
inputs
[
index
].
depth
=
static_cast
<
uint32_t
>
(
input_shape
[
3
]);
inputs
[
index
].
data
=
const_cast
<
unsigned
char
*>
(
reinterpret_cast
<
const
unsigned
char
*>
(
input_tensor
->
raw_data
()));
inputs
[
index
].
dataLen
=
static_cast
<
int
>
(
input_tensor
->
raw_size
());
inputs
[
index
].
data_valid_len
=
static_cast
<
uint32_t
>
(
input_tensor
->
raw_size
());
inputs
[
index
].
unused
=
0
;
input_metadata
[
i
].
Init
(
.0
f
,
.0
f
,
1
);
AddInputMetadata
(
input_metadata
[
i
].
min_val
,
&
inputs
[
index
+
1
]);
AddInputMetadata
(
input_metadata
[
i
].
max_val
,
&
inputs
[
index
+
2
]);
AddInputMetadata
(
input_metadata
[
i
].
needs_quantization
,
&
inputs
[
index
+
3
]);
}
input_tensor
->
SetScale
(
input_info_
[
i
].
scale
);
input_tensor
->
SetZeroPoint
(
input_info_
[
i
].
zero_point
);
MACE_CHECK_SUCCESS
(
transformer_
->
Quantize
(
input_tensors
.
at
(
input_info_
[
i
].
name
),
input_info_
[
i
].
quantized_tensor
.
get
()));
// transform mace output to hexagon output
for
(
size_t
i
=
0
;
i
<
num_outputs
;
++
i
)
{
auto
output_tensor
=
output_tensors
->
at
(
output_info_
[
i
].
name
);
size_t
index
=
i
*
kNumMetaData
;
output_tensor
->
SetDtype
(
output_info_
[
i
].
data_type
);
output_tensor
->
Resize
(
output_info_
[
i
].
shape
);
outputs
[
index
].
data
=
reinterpret_cast
<
unsigned
char
*>
(
output_tensor
->
raw_mutable_data
());
outputs
[
index
].
dataLen
=
static_cast
<
int
>
(
output_tensor
->
raw_size
());
output_metadata
[
i
].
Init
(
.0
f
,
.0
f
,
1
);
AddOutputMetadata
(
output_metadata
[
i
].
min_val
,
&
outputs
[
index
+
1
]);
AddOutputMetadata
(
output_metadata
[
i
].
max_val
,
&
outputs
[
index
+
2
]);
AddOutputMetadata
(
output_metadata
[
i
].
needs_quantization
,
&
outputs
[
index
+
3
]);
MACE_CHECK_SUCCESS
(
transformer_
->
TransformInput
(
input_info_
[
i
].
quantized_tensor
.
get
(),
input_info_
[
i
].
hta_tensor
.
get
(),
i
));
Tensor
::
MappingGuard
input_guard
(
input_info_
[
i
].
hta_tensor
.
get
());
}
int
res
=
hexagon_hta_nn_execute_new
(
nn_id_
,
inputs
.
data
(),
num_inputs
*
kNumMetaData
,
outputs
.
data
(),
num_outputs
*
kNumMetaData
);
MACE_CHECK
(
res
==
0
,
"execute error"
);
MACE_CHECK
(
hexagon_hta_nn_execute_hw
(
nn_id_
,
input_tensordef_
.
data
(),
num_inputs
,
output_tensordef_
.
data
(),
num_outputs
,
nullptr
,
nullptr
)
==
0
);
for
(
size_t
i
=
0
;
i
<
num_outputs
;
++
i
)
{
size_t
index
=
i
*
kNumMetaData
;
std
::
vector
<
uint32_t
>
output_shape
{
outputs
[
index
].
batches
,
outputs
[
index
].
height
,
outputs
[
index
].
width
,
outputs
[
index
].
depth
};
MACE_CHECK
(
output_shape
.
size
()
==
output_info_
[
i
].
shape
.
size
(),
output_shape
.
size
(),
" vs "
,
output_info_
[
i
].
shape
.
size
(),
" wrong output shape inferred"
);
for
(
size_t
j
=
0
;
j
<
output_shape
.
size
();
++
j
)
{
MACE_CHECK
(
static_cast
<
index_t
>
(
output_shape
[
j
])
==
output_info_
[
i
].
shape
[
j
],
output_shape
[
j
],
" vs "
,
output_info_
[
i
].
shape
[
j
],
" wrong output shape["
,
j
,
"] inferred"
);
{
// To sync cache
Tensor
::
MappingGuard
output_guard
(
output_info_
[
i
].
hta_tensor
.
get
());
}
output_info_
[
i
].
quantized_tensor
->
Resize
(
output_info_
[
i
].
shape
);
transformer_
->
TransformOutput
(
output_info_
[
i
].
hta_tensor
.
get
(),
output_info_
[
i
].
quantized_tensor
.
get
(),
i
);
auto
output_tensor
=
output_tensors
->
at
(
output_info_
[
i
].
name
);
MACE_CHECK
(
static_cast
<
index_t
>
(
outputs
[
index
].
data_valid_len
)
==
output_tensor
->
raw_size
(),
outputs
[
index
].
data_valid_len
,
" vs "
,
output_tensor
->
raw_size
(),
" wrong output bytes inferred."
);
MaceStatus
st
=
transformer_
->
Dequantize
(
output_info_
[
i
].
quantized_tensor
.
get
(),
output_tensor
);
}
return
res
==
0
;
return
true
;
}
}
// namespace mace
mace/core/runtime/hexagon/hexagon_hta_wrapper.h
浏览文件 @
28954099
...
...
@@ -16,18 +16,40 @@
#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_HTA_WRAPPER_H_
#include <map>
#include <memory>
#include <string>
#include <vector>
#include <utility>
#include "mace/utils/thread_pool.h"
#include "mace/core/quantize.h"
#include "mace/core/device.h"
#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
#include "mace/core/runtime/hexagon/hexagon_hta_transformer.h"
#include "mace/core/tensor.h"
#include "mace/core/device.h"
#include "mace/public/mace.h"
#include "third_party/hta/hta_hexagon_api.h"
namespace
mace
{
struct
HTAInOutInfo
:
public
InOutInfo
{
HTAInOutInfo
(
const
std
::
string
&
name
,
const
std
::
vector
<
index_t
>
&
shape
,
const
DataType
data_type
,
const
float
scale
,
const
int32_t
zero_point
,
std
::
unique_ptr
<
Tensor
>
quantized_tensor
,
std
::
unique_ptr
<
Tensor
>
hta_tensor
)
:
InOutInfo
(
name
,
shape
,
data_type
),
scale
(
scale
),
zero_point
(
zero_point
),
quantized_tensor
(
std
::
move
(
quantized_tensor
)),
hta_tensor
(
std
::
move
(
hta_tensor
))
{}
float
scale
;
int32_t
zero_point
;
std
::
unique_ptr
<
Tensor
>
quantized_tensor
;
std
::
unique_ptr
<
Tensor
>
hta_tensor
;
};
class
HexagonHTAWrapper
:
public
HexagonControlWrapper
{
public:
explicit
HexagonHTAWrapper
(
Device
*
device
);
...
...
@@ -50,7 +72,12 @@ class HexagonHTAWrapper : public HexagonControlWrapper {
void
SetDebugLevel
(
int
level
)
override
;
private:
QuantizeUtil
<
float
,
uint8_t
>
quantize_util_
;
Allocator
*
allocator_
;
std
::
vector
<
HTAInOutInfo
>
input_info_
;
std
::
vector
<
HTAInOutInfo
>
output_info_
;
std
::
vector
<
hexagon_hta_nn_hw_tensordef
>
input_tensordef_
;
std
::
vector
<
hexagon_hta_nn_hw_tensordef
>
output_tensordef_
;
std
::
unique_ptr
<
HexagonHTATranformerBase
>
transformer_
;
MACE_DISABLE_COPY_AND_ASSIGN
(
HexagonHTAWrapper
);
};
}
// namespace mace
...
...
mace/core/runtime/opencl/opencl_allocator.cc
浏览文件 @
28954099
...
...
@@ -16,9 +16,6 @@
#include "mace/core/runtime/opencl/opencl_allocator.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#ifdef MACE_ENABLE_RPCMEM
#include "third_party/rpcmem/rpcmem.h"
#endif // MACE_ENABLE_RPCMEM
namespace
mace
{
namespace
{
...
...
@@ -39,24 +36,10 @@ static cl_channel_type DataTypeToCLChannelType(const DataType t) {
}
}
#ifdef MACE_ENABLE_RPCMEM
std
::
once_flag
ion_prepared
;
void
PrepareQualcommION
()
{
rpcmem_init
();
std
::
atexit
(
rpcmem_deinit
);
}
#endif // MACE_ENABLE_RPCMEM
}
// namespace
OpenCLAllocator
::
OpenCLAllocator
(
OpenCLRuntime
*
opencl_runtime
)
:
opencl_runtime_
(
opencl_runtime
)
{
#ifdef MACE_ENABLE_RPCMEM
if
(
opencl_runtime_
->
ion_type
()
==
IONType
::
QUALCOMM_ION
)
{
std
::
call_once
(
ion_prepared
,
PrepareQualcommION
);
}
#endif // MACE_ENABLE_RPCMEM
}
OpenCLRuntime
*
opencl_runtime
)
:
opencl_runtime_
(
opencl_runtime
)
{}
OpenCLAllocator
::~
OpenCLAllocator
()
{}
...
...
@@ -168,7 +151,7 @@ void OpenCLAllocator::Delete(void *buffer) {
if
(
opencl_runtime_
->
ion_type
()
==
IONType
::
QUALCOMM_ION
)
{
auto
it
=
cl_to_host_map_
.
find
(
buffer
);
MACE_CHECK
(
it
!=
cl_to_host_map_
.
end
(),
"OpenCL buffer not found!"
);
rpcmem_
fre
e
(
it
->
second
);
rpcmem_
.
Delet
e
(
it
->
second
);
cl_to_host_map_
.
erase
(
buffer
);
}
#endif // MACE_ENABLE_RPCMEM
...
...
@@ -184,7 +167,7 @@ void OpenCLAllocator::DeleteImage(void *buffer) {
if
(
opencl_runtime_
->
ion_type
()
==
IONType
::
QUALCOMM_ION
)
{
auto
it
=
cl_to_host_map_
.
find
(
buffer
);
MACE_CHECK
(
it
!=
cl_to_host_map_
.
end
(),
"OpenCL image not found!"
);
rpcmem_
fre
e
(
it
->
second
);
rpcmem_
.
Delet
e
(
it
->
second
);
cl_to_host_map_
.
erase
(
buffer
);
}
#endif // MACE_ENABLE_RPCMEM
...
...
@@ -194,7 +177,7 @@ void OpenCLAllocator::DeleteImage(void *buffer) {
void
*
OpenCLAllocator
::
Map
(
void
*
buffer
,
size_t
offset
,
size_t
nbytes
,
bool
finish_cmd_queue
)
const
{
bool
finish_cmd_queue
)
{
MACE_LATENCY_LOGGER
(
1
,
"Map OpenCL buffer"
);
void
*
mapped_ptr
=
nullptr
;
#ifdef MACE_ENABLE_RPCMEM
...
...
@@ -209,7 +192,7 @@ void *OpenCLAllocator::Map(void *buffer,
if
(
opencl_runtime_
->
qcom_host_cache_policy
()
==
CL_MEM_HOST_WRITEBACK_QCOM
)
{
MACE_CHECK
(
rpcmem_
sync_cache
(
mapped_ptr
,
RPCMEM_SYNC_START
)
==
0
);
MACE_CHECK
(
rpcmem_
.
SyncCacheStart
(
mapped_ptr
)
==
0
);
}
}
else
{
#endif // MACE_ENABLE_RPCMEM
...
...
@@ -234,7 +217,7 @@ void *OpenCLAllocator::Map(void *buffer,
void
*
OpenCLAllocator
::
MapImage
(
void
*
buffer
,
const
std
::
vector
<
size_t
>
&
image_shape
,
std
::
vector
<
size_t
>
*
mapped_image_pitch
,
bool
finish_cmd_queue
)
const
{
bool
finish_cmd_queue
)
{
MACE_LATENCY_LOGGER
(
1
,
"Map OpenCL Image"
);
MACE_CHECK
(
image_shape
.
size
()
==
2
)
<<
"Just support map 2d image"
;
void
*
mapped_ptr
=
nullptr
;
...
...
@@ -251,7 +234,7 @@ void *OpenCLAllocator::MapImage(void *buffer,
if
(
opencl_runtime_
->
qcom_host_cache_policy
()
==
CL_MEM_HOST_WRITEBACK_QCOM
)
{
MACE_CHECK
(
rpcmem_
sync_cache
(
mapped_ptr
,
RPCMEM_SYNC_START
)
==
0
);
MACE_CHECK
(
rpcmem_
.
SyncCacheStart
(
mapped_ptr
)
==
0
);
}
}
else
{
#endif // MACE_ENABLE_RPCMEM
...
...
@@ -275,13 +258,13 @@ void *OpenCLAllocator::MapImage(void *buffer,
return
mapped_ptr
;
}
void
OpenCLAllocator
::
Unmap
(
void
*
buffer
,
void
*
mapped_ptr
)
const
{
void
OpenCLAllocator
::
Unmap
(
void
*
buffer
,
void
*
mapped_ptr
)
{
MACE_LATENCY_LOGGER
(
1
,
"Unmap OpenCL buffer/Image"
);
#ifdef MACE_ENABLE_RPCMEM
if
(
opencl_runtime_
->
ion_type
()
==
IONType
::
QUALCOMM_ION
)
{
if
(
opencl_runtime_
->
qcom_host_cache_policy
()
==
CL_MEM_HOST_WRITEBACK_QCOM
)
{
MACE_CHECK
(
rpcmem_
sync_cache
(
mapped_ptr
,
RPCMEM_SYNC_END
)
==
0
);
MACE_CHECK
(
rpcmem_
.
SyncCacheEnd
(
mapped_ptr
)
==
0
);
}
}
else
{
#endif // MACE_ENABLE_RPCMEM
...
...
@@ -301,17 +284,20 @@ void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) const {
bool
OpenCLAllocator
::
OnHost
()
const
{
return
false
;
}
#ifdef MACE_ENABLE_RPCMEM
Rpcmem
*
OpenCLAllocator
::
rpcmem
()
{
return
&
rpcmem_
;
}
void
OpenCLAllocator
::
CreateQualcommBufferIONHostPtr
(
const
size_t
nbytes
,
cl_mem_ion_host_ptr
*
ion_host
)
{
void
*
host
=
rpcmem_alloc
(
RPCMEM_HEAP_ID_SYSTEM
,
RPCMEM_FLAG_CACHED
,
nbytes
+
opencl_runtime_
->
qcom_ext_mem_padding
());
void
*
host
=
rpcmem_
.
New
(
nbytes
+
opencl_runtime_
->
qcom_ext_mem_padding
());
MACE_CHECK_NOTNULL
(
host
);
auto
host_addr
=
reinterpret_cast
<
std
::
uintptr_t
>
(
host
);
auto
page_size
=
opencl_runtime_
->
qcom_page_size
();
MACE_CHECK
(
host_addr
%
page_size
==
0
,
"ION memory address: "
,
host_addr
,
" must be aligned to page size: "
,
page_size
);
int
fd
=
rpcmem_
to_f
d
(
host
);
int
fd
=
rpcmem_
.
ToF
d
(
host
);
MACE_CHECK
(
fd
>=
0
,
"Invalid rpcmem file descriptor: "
,
fd
);
ion_host
->
ext_host_ptr
.
allocation_type
=
CL_MEM_ION_HOST_PTR_QCOM
;
...
...
mace/core/runtime/opencl/opencl_allocator.h
浏览文件 @
28954099
...
...
@@ -48,17 +48,21 @@ class OpenCLAllocator : public Allocator {
void
*
Map
(
void
*
buffer
,
size_t
offset
,
size_t
nbytes
,
bool
finish_cmd_queue
)
const
override
;
bool
finish_cmd_queue
)
override
;
void
*
MapImage
(
void
*
buffer
,
const
std
::
vector
<
size_t
>
&
image_shape
,
std
::
vector
<
size_t
>
*
mapped_image_pitch
,
bool
finish_cmd_queue
)
const
override
;
bool
finish_cmd_queue
)
override
;
void
Unmap
(
void
*
buffer
,
void
*
mapped_ptr
)
const
override
;
void
Unmap
(
void
*
buffer
,
void
*
mapped_ptr
)
override
;
bool
OnHost
()
const
override
;
#ifdef MACE_ENABLE_RPCMEM
Rpcmem
*
rpcmem
()
override
;
#endif // MACE_ENABLE_RPCMEM
private:
#ifdef MACE_ENABLE_RPCMEM
void
CreateQualcommBufferIONHostPtr
(
const
size_t
nbytes
,
...
...
@@ -69,6 +73,7 @@ class OpenCLAllocator : public Allocator {
cl_mem_ion_host_ptr
*
ion_host
);
std
::
unordered_map
<
void
*
,
void
*>
cl_to_host_map_
;
Rpcmem
rpcmem_
;
#endif // MACE_ENABLE_RPCMEM
OpenCLRuntime
*
opencl_runtime_
;
};
...
...
mace/
ops/opencl/
helper.cc
→
mace/
core/runtime/opencl/opencl_
helper.cc
浏览文件 @
28954099
...
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
#include <algorithm>
#include <string>
...
...
@@ -22,7 +22,6 @@
#include "mace/utils/math.h"
namespace
mace
{
namespace
ops
{
std
::
vector
<
index_t
>
FormatBufferShape
(
const
std
::
vector
<
index_t
>
&
buffer_shape
,
...
...
@@ -59,8 +58,10 @@ std::string DtToCLDt(const DataType dt) {
return
"float"
;
case
DT_HALF
:
return
"half"
;
case
DT_UINT8
:
return
"uchar"
;
default:
LOG
(
FATAL
)
<<
"Unsupported data type
"
;
LOG
(
FATAL
)
<<
"Unsupported data type
: "
<<
dt
;
return
""
;
}
}
...
...
@@ -365,5 +366,4 @@ MaceStatus TuningOrRun2DKernel(OpenCLRuntime *runtime,
return
MaceStatus
::
MACE_SUCCESS
;
}
}
// namespace ops
}
// namespace mace
mace/
ops/opencl/
helper.h
→
mace/
core/runtime/opencl/opencl_
helper.h
浏览文件 @
28954099
...
...
@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_
OPS
_OPENCL_HELPER_H_
#define MACE_
OPS
_OPENCL_HELPER_H_
#ifndef MACE_
CORE_RUNTIME_OPENCL
_OPENCL_HELPER_H_
#define MACE_
CORE_RUNTIME_OPENCL
_OPENCL_HELPER_H_
#include <memory>
#include <string>
...
...
@@ -30,7 +30,6 @@
#include "mace/utils/math.h"
namespace
mace
{
namespace
ops
{
// oorc for 'Out Of Range Check'
#define MACE_OUT_OF_RANGE_DEFINITION \
std::shared_ptr<BufferBase> oorc_flag;
...
...
@@ -161,6 +160,5 @@ std::vector<uint32_t> Default3DLocalWS(OpenCLRuntime *runtime,
const
uint32_t
*
gws
,
const
uint32_t
kwg_size
);
}
// namespace ops
}
// namespace mace
#endif // MACE_
OPS
_OPENCL_HELPER_H_
#endif // MACE_
CORE_RUNTIME_OPENCL
_OPENCL_HELPER_H_
mace/libmace/BUILD.bazel
浏览文件 @
28954099
...
...
@@ -21,6 +21,7 @@ load(
"if_opencl_enabled"
,
"if_openmp_enabled"
,
"if_quantize_enabled"
,
"if_rpcmem_enabled"
,
)
cc_library
(
...
...
@@ -47,6 +48,8 @@ cc_library(
"-DMACE_ENABLE_HTA"
,
])
+
if_apu_enabled
([
"-DMACE_ENABLE_APU"
,
])
+
if_rpcmem_enabled
([
"-DMACE_ENABLE_RPCMEM"
,
]),
deps
=
[
"//mace/ops"
,
...
...
mace/libmace/mace.cc
浏览文件 @
28954099
...
...
@@ -520,9 +520,23 @@ MaceEngine::Impl::Impl(const MaceEngineConfig &config)
#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
if
(
device_type_
==
DeviceType
::
HEXAGON
||
device_type_
==
DeviceType
::
HTA
)
{
#ifdef MACE_ENABLE_OPENCL
device_
.
reset
(
new
HexagonDevice
(
device_type_
,
thread_pool_
.
get
(),
make_unique
<
GPUDevice
>
(
config
.
impl_
->
gpu_context
()
->
opencl_tuner
(),
config
.
impl_
->
gpu_context
()
->
opencl_cache_storage
(),
config
.
impl_
->
gpu_priority_hint
(),
config
.
impl_
->
gpu_perf_hint
(),
config
.
impl_
->
gpu_context
()
->
opencl_binary_storage
(),
config
.
impl_
->
num_threads
(),
config
.
impl_
->
cpu_affinity_policy
(),
thread_pool_
.
get
())));
#else
device_
.
reset
(
new
HexagonDevice
(
device_type_
,
thread_pool_
.
get
()));
#endif // MACE_ENABLE_OPENCL
}
#endif
#endif
// defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
#ifdef MACE_ENABLE_APU
if
(
device_type_
==
DeviceType
::
APU
)
{
device_
.
reset
(
new
ApuDevice
(
thread_pool_
.
get
()));
...
...
@@ -579,15 +593,19 @@ MaceStatus MaceEngine::Impl::Init(
<<
MakeString
(
MapKeys
(
output_info_map_
));
}
#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
DataType
output_dt
=
output_info_map_
[
output_name
].
data_type
();
Tensor
*
output_tensor
=
ws_
->
CreateTensor
(
output_name
,
device_
->
allocator
(),
output_dt
);
output_tensor
->
set_data_format
(
DataFormat
::
NHWC
);
if
(
device_type_
==
HEXAGON
||
device_type_
==
HTA
)
{
DataType
output_dt
=
output_info_map_
[
output_name
].
data_type
();
Tensor
*
output_tensor
=
ws_
->
CreateTensor
(
output_name
,
device_
->
allocator
(),
output_dt
);
output_tensor
->
set_data_format
(
DataFormat
::
NHWC
);
}
#endif
#if defined(MACE_ENABLE_APU)
Tensor
*
output_tensor
=
ws_
->
CreateTensor
(
output_name
,
device_
->
allocator
(),
DT_FLOAT
);
output_tensor
->
set_data_format
(
DataFormat
::
NHWC
);
if
(
device_type_
==
DeviceType
::
APU
)
{
Tensor
*
output_tensor
=
ws_
->
CreateTensor
(
output_name
,
device_
->
allocator
(),
DT_FLOAT
);
output_tensor
->
set_data_format
(
DataFormat
::
NHWC
);
}
#endif
}
#ifdef MACE_ENABLE_HEXAGON
...
...
mace/ops/opencl/buffer/buffer_transform.h
浏览文件 @
28954099
...
...
@@ -21,7 +21,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/buffer/buffer_type_transform.cc
浏览文件 @
28954099
...
...
@@ -15,7 +15,7 @@
#include "mace/core/op_context.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/buffer/conv_2d.h
浏览文件 @
28954099
...
...
@@ -21,7 +21,7 @@
#include <vector>
#include "mace/ops/opencl/buffer/utils.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
#include "mace/utils/memory.h"
namespace
mace
{
...
...
mace/ops/opencl/buffer/conv_2d_1x1.cc
浏览文件 @
28954099
...
...
@@ -15,7 +15,7 @@
#include "mace/core/op_context.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/ops/common/activation_type.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/buffer/conv_2d_general.cc
浏览文件 @
28954099
...
...
@@ -15,7 +15,7 @@
#include "mace/core/op_context.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/ops/common/activation_type.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/buffer/depthwise_conv2d.h
浏览文件 @
28954099
...
...
@@ -21,7 +21,7 @@
#include <vector>
#include "mace/ops/opencl/buffer/utils.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
#include "mace/utils/memory.h"
namespace
mace
{
...
...
mace/ops/opencl/buffer/pooling.h
浏览文件 @
28954099
...
...
@@ -23,7 +23,7 @@
#include <vector>
#include "mace/ops/opencl/buffer/utils.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
#include "mace/utils/memory.h"
namespace
mace
{
...
...
mace/ops/opencl/buffer/reshape.h
浏览文件 @
28954099
...
...
@@ -19,7 +19,7 @@
#include <vector>
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/buffer/softmax.h
浏览文件 @
28954099
...
...
@@ -23,7 +23,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/buffer/utils.cc
浏览文件 @
28954099
...
...
@@ -19,7 +19,7 @@
#include <vector>
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/cl/buffer_transform.cl
浏览文件 @
28954099
...
...
@@ -209,3 +209,202 @@ __kernel void transform_data_type(BUFFER_OUT_OF_RANGE_PARAMS
DATA_TYPE4 input_value = CONVERT4(vload4(out_idx, input + input_offset));
vstore4(input_value, out_idx, output);
}
__kernel void buffer_quantize(BUFFER_OUT_OF_RANGE_PARAMS
__private const int global_size_dim0,
__private const float scale,
__private const int zero_point,
__global float *input,
__private const int input_offset,
__global uchar *output) {
const int out_idx = get_global_id(0);
#ifndef NON_UNIFORM_WORK_GROUP
if (out_idx >= global_size_dim0) {
return;
}
#endif
uchar4 output_value =
convert_uchar4_sat_rte(vload4(out_idx, input) / scale + zero_point);
vstore4(output_value, out_idx, output);
}
__kernel void buffer_dequantize(BUFFER_OUT_OF_RANGE_PARAMS
__private const int global_size_dim0,
__private const float scale,
__private const int zero_point,
__global uchar *input,
__private const int input_offset,
__global float *output) {
const int out_idx = get_global_id(0);
#ifndef NON_UNIFORM_WORK_GROUP
if (out_idx >= global_size_dim0) {
return;
}
#endif
float4 output_value =
convert_float4(convert_int4(vload4(out_idx, input)) - zero_point) * scale;
vstore4(output_value, out_idx, output);
}
// NHWC -> NCHW (W roundup to 32)
__kernel void transform_nhwc_to_nchw32(BUFFER_OUT_OF_RANGE_PARAMS
GLOBAL_WORK_GROUP_SIZE_DIM3
__global uchar *input, // NHWC
__private const int input_offset,
__private const int zero_point,
__global uchar *output,
__private const int batch,
__private const int height,
__private const int width,
__private const int channels) {
const int width_blk_idx = get_global_id(0);
const int h_idx = get_global_id(1);
const int bc_idx = get_global_id(2);
#ifndef NON_UNIFORM_WORK_GROUP
if (width_blk_idx >= global_size_dim0 ||
h_idx >= global_size_dim1 ||
bc_idx >= global_size_dim2) {
return;
}
#endif
const int b_idx = bc_idx / channels;
const int chan_idx = bc_idx - mul24(b_idx, channels);
const int w_idx = width_blk_idx << 2;
const int padded_width = global_size_dim0 << 2;
const int in_offset = mad24(mad24(mad24(b_idx, height, h_idx),
width, w_idx), channels, chan_idx) + input_offset;
const int out_offset = (mad24(mad24(mad24(b_idx, channels, chan_idx),
height, h_idx), padded_width, w_idx));
uchar4 value = zero_point;
if (w_idx + 3 < width) {
value.x = input[in_offset];
value.y = input[in_offset + channels];
value.z = input[in_offset + 2 * channels];
value.w = input[in_offset + 3 * channels];
} else if (w_idx < width) {
const int diff = width - w_idx;
switch(diff) {
case 3:
value.z = input[in_offset + 2 * channels];
case 2:
value.y = input[in_offset + channels];
case 1:
value.x = input[in_offset];
}
}
VSTORE4(value, output, out_offset);
}
// N H ceil(C/32) W 32 -> NHWC
__kernel void transform_d32_to_nhwc(BUFFER_OUT_OF_RANGE_PARAMS
GLOBAL_WORK_GROUP_SIZE_DIM3
__global uchar *input, // D32
__private const int input_offset,
__global uchar *output, // NHWC
__private const int batch,
__private const int height,
__private const int width,
__private const int channels,
__private const int channel_slices) {
const int chan_blk_idx = get_global_id(0);
const int w_idx = get_global_id(1);
const int bh_idx = get_global_id(2);
#ifndef NON_UNIFORM_WORK_GROUP
if (chan_blk_idx >= global_size_dim0 ||
w_idx >= global_size_dim1 ||
bh_idx >= global_size_dim2) {
return;
}
#endif
const int b_idx = bh_idx / height;
const int h_idx = bh_idx - mul24(b_idx, height);
const int c_idx = chan_blk_idx << 2;
const int c_slice = c_idx >> 5;
const int c_slice_idx = c_idx & 31;
const int in_offset = mad24(mad24(mad24(mad24(b_idx, height, h_idx),
channel_slices, c_slice), width, w_idx), 32, c_slice_idx) + input_offset;
const int out_offset = (mad24(mad24(mad24(b_idx, height, h_idx),
width, w_idx), channels, c_idx));
uchar4 value = vload4(0, input + in_offset);
if (c_idx + 3 < channels) {
VSTORE4(value, output, out_offset);
} else {
const int diff = channels - c_idx;
switch(diff) {
case 3:
vstore3(value.xyz, 0, output + out_offset);
break;
case 2:
vstore2(value.xy, 0, output + out_offset);
break;
case 1:
output[out_offset] = value.x;
break;
}
}
}
// NHWC -> N H ceil(C/32) W 32
__kernel void transform_nhwc_to_d32(BUFFER_OUT_OF_RANGE_PARAMS
GLOBAL_WORK_GROUP_SIZE_DIM3
__global uchar *input, // NHWC
__private const int input_offset,
__private const int zero_point,
__global uchar *output, // D32
__private const int batch,
__private const int height,
__private const int width,
__private const int channels,
__private const int channel_slices) {
const int w_32_idx = get_global_id(0);
const int c_slice = get_global_id(1);
const int bh_idx = get_global_id(2);
#ifndef NON_UNIFORM_WORK_GROUP
if (w_32_idx >= global_size_dim0 ||
c_slice >= global_size_dim1 |
|
bh_idx
>=
global_size_dim2
)
{
return
;
}
#
endif
const
int
b_idx
=
bh_idx
/
height
;
const
int
h_idx
=
bh_idx
-
mul24
(
b_idx,
height
)
;
const
int
w_idx
=
w_32_idx
>>
3
;
const
int
c_slice_blk_idx
=
w_32_idx
&
7
;
const
int
c_slice_idx
=
c_slice_blk_idx
<<
2
;
const
int
c_idx
=
(
c_slice
<<
5
)
+
c_slice_idx
;
const
int
in_offset
=
(
mad24
(
mad24
(
mad24
(
b_idx,
height,
h_idx
)
,
width,
w_idx
)
,
channels,
c_idx
))
+
input_offset
;
const
int
out_offset
=
mad24
(
mad24
(
mad24
(
mad24
(
b_idx,
height,
h_idx
)
,
channel_slices,
c_slice
)
,
width,
w_idx
)
,
32
,
c_slice_idx
)
;
uchar4
value
=
zero_point
;
if
(
c_idx
+
3
<
channels
)
{
value
=
vload4
(
0
,
input
+
in_offset
)
;
}
else
if
(
c_idx
<
channels
)
{
value
=
vload4
(
0
,
input
+
in_offset
)
;
const
int
diff
=
channels
-
c_idx
;
switch
(
diff
)
{
case
3:
value.w
=
zero_point
; break;
case
2:
value.zw
=
zero_point
; break;
case
1:
value.yzw
=
zero_point
; break;
}
}
//
else
value
=
zero_point
VSTORE4
(
value,
output,
out_offset
)
;
}
mace/ops/opencl/image/activation.h
浏览文件 @
28954099
...
...
@@ -24,7 +24,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/common/activation_type.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/addn.h
浏览文件 @
28954099
...
...
@@ -23,7 +23,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/batch_norm.h
浏览文件 @
28954099
...
...
@@ -24,7 +24,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/common/activation_type.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/batch_to_space.h
浏览文件 @
28954099
...
...
@@ -23,7 +23,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/bias_add.h
浏览文件 @
28954099
...
...
@@ -23,7 +23,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/buffer_to_image.h
浏览文件 @
28954099
...
...
@@ -23,7 +23,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/channel_shuffle.h
浏览文件 @
28954099
...
...
@@ -23,7 +23,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/concat.h
浏览文件 @
28954099
...
...
@@ -21,7 +21,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/conv_2d.h
浏览文件 @
28954099
...
...
@@ -21,7 +21,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/conv_2d_1x1.cc
浏览文件 @
28954099
...
...
@@ -15,7 +15,7 @@
#include "mace/core/op_context.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/ops/common/activation_type.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/conv_2d_3x3.cc
浏览文件 @
28954099
...
...
@@ -15,7 +15,7 @@
#include "mace/core/op_context.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/ops/common/activation_type.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
#include "mace/utils/math.h"
namespace
mace
{
...
...
mace/ops/opencl/image/conv_2d_general.cc
浏览文件 @
28954099
...
...
@@ -14,7 +14,7 @@
#include "mace/core/op_context.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
#include "mace/ops/common/activation_type.h"
#include "mace/utils/math.h"
...
...
mace/ops/opencl/image/crop.h
浏览文件 @
28954099
...
...
@@ -23,7 +23,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/deconv_2d.h
浏览文件 @
28954099
...
...
@@ -23,7 +23,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/depth_to_space.h
浏览文件 @
28954099
...
...
@@ -23,7 +23,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/depthwise_conv2d.h
浏览文件 @
28954099
...
...
@@ -21,7 +21,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/depthwise_deconv2d.h
浏览文件 @
28954099
...
...
@@ -23,7 +23,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/eltwise.h
浏览文件 @
28954099
...
...
@@ -25,7 +25,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/common/eltwise_type.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/fully_connected.h
浏览文件 @
28954099
...
...
@@ -24,7 +24,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/common/activation_type.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/image_to_buffer.h
浏览文件 @
28954099
...
...
@@ -21,7 +21,7 @@
#include "mace/core/op_context.h"
#include "mace/ops/opencl/buffer_transform_kernel.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/lpnorm.h
浏览文件 @
28954099
...
...
@@ -17,7 +17,7 @@
#include "mace/core/op_context.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
#include "mace/ops/opencl/lpnorm.h"
namespace
mace
{
...
...
mace/ops/opencl/image/lstm_cell.h
浏览文件 @
28954099
...
...
@@ -23,7 +23,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/matmul.h
浏览文件 @
28954099
...
...
@@ -24,7 +24,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/mvnorm.h
浏览文件 @
28954099
...
...
@@ -20,7 +20,7 @@
#include "mace/core/op_context.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
#include "mace/ops/opencl/mvnorm.h"
namespace
mace
{
...
...
mace/ops/opencl/image/pad.h
浏览文件 @
28954099
...
...
@@ -24,7 +24,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/common/pad_type.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/pooling.h
浏览文件 @
28954099
...
...
@@ -24,7 +24,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/reduce.h
浏览文件 @
28954099
...
...
@@ -23,7 +23,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
#include "mace/ops/common/reduce_type.h"
namespace
mace
{
...
...
mace/ops/opencl/image/reshape.h
浏览文件 @
28954099
...
...
@@ -21,7 +21,7 @@
#include <memory>
#include "mace/core/operator.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
#include "mace/ops/opencl/buffer_transform_kernel.h"
namespace
mace
{
...
...
mace/ops/opencl/image/resize_bicubic.h
浏览文件 @
28954099
...
...
@@ -24,7 +24,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/resize_bilinear.h
浏览文件 @
28954099
...
...
@@ -24,7 +24,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/resize_nearest_neighbor.h
浏览文件 @
28954099
...
...
@@ -24,7 +24,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/softmax.h
浏览文件 @
28954099
...
...
@@ -24,7 +24,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/space_to_batch.h
浏览文件 @
28954099
...
...
@@ -23,7 +23,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/space_to_depth.h
浏览文件 @
28954099
...
...
@@ -23,7 +23,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/split.h
浏览文件 @
28954099
...
...
@@ -24,7 +24,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/sqrdiff_mean.h
浏览文件 @
28954099
...
...
@@ -23,7 +23,7 @@
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
namespace
mace
{
namespace
ops
{
...
...
mace/ops/opencl/image/winograd_conv2d.cc
浏览文件 @
28954099
...
...
@@ -16,7 +16,7 @@
#include "mace/core/op_context.h"
#include "mace/ops/common/activation_type.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
#include "mace/utils/memory.h"
#include "mace/utils/math.h"
...
...
mace/tools/mace_run.cc
浏览文件 @
28954099
...
...
@@ -175,9 +175,9 @@ bool RunModel(const std::string &model_name,
if
(
status
!=
MaceStatus
::
MACE_SUCCESS
)
{
LOG
(
WARNING
)
<<
"Set openmp or cpu affinity failed."
;
}
#if
def MACE_ENABLE_OPENCL
#if
defined(MACE_ENABLE_OPENCL) || defined(MACE_ENABLE_HTA)
std
::
shared_ptr
<
GPUContext
>
gpu_context
;
if
(
device_type
==
DeviceType
::
GPU
)
{
if
(
device_type
==
DeviceType
::
GPU
||
device_type
==
DeviceType
::
HTA
)
{
const
char
*
storage_path_ptr
=
getenv
(
"MACE_INTERNAL_STORAGE_PATH"
);
const
std
::
string
storage_path
=
std
::
string
(
storage_path_ptr
==
nullptr
?
...
...
test/ccunit/BUILD.bazel
浏览文件 @
28954099
...
...
@@ -9,6 +9,7 @@ load(
"if_android"
,
"if_android_armv7"
,
"if_hexagon_enabled"
,
"if_hta_enabled"
,
"if_neon_enabled"
,
"if_opencl_enabled"
,
"if_openmp_enabled"
,
...
...
@@ -41,7 +42,9 @@ cc_test(
[
"mace/ops/opencl/*.cc"
,
]
)),
))
+
if_hta_enabled
([
"mace/core/runtime/hexagon/hta_transform_test.cc"
,
]),
copts
=
[
"-Werror"
,
"-Wextra"
,
...
...
@@ -59,6 +62,8 @@ cc_test(
"-DMACE_ENABLE_QUANTIZE"
,
])
+
if_hexagon_enabled
([
"-DMACE_ENABLE_HEXAGON"
,
])
+
if_hta_enabled
([
"-DMACE_ENABLE_HTA"
,
]),
linkopts
=
if_openmp_enabled
([
"-fopenmp"
,
...
...
test/ccunit/CMakeLists.txt
浏览文件 @
28954099
...
...
@@ -7,6 +7,10 @@ file(GLOB MACE_CC_TEST_SRCS
mace/libmace/*.cc
)
if
(
MACE_ENABLE_HTA
)
set
(
MACE_CC_TEST_SRCS
${
MACE_CC_TEST_SRCS
}
mace/core/runtime/hexagon/hta_transform_test.cc
)
endif
(
MACE_ENABLE_HTA
)
add_executable
(
mace_cc_test
${
MACE_CC_TEST_SRCS
}
)
target_link_libraries
(
mace_cc_test PUBLIC
mace_cc_test_utils
...
...
test/ccunit/mace/core/runtime/hexagon/hta_transform_test.cc
0 → 100644
浏览文件 @
28954099
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/quantize.h"
#include "mace/core/runtime/hexagon/hexagon_hta_transformer.h"
#include "mace/ops/ops_test_util.h"
namespace
mace
{
namespace
ops
{
namespace
test
{
class
HTATransformTest
:
public
OpsTestBase
{};
namespace
{
template
<
DeviceType
D
>
void
TestHTAQuantizeDequantize
(
const
std
::
vector
<
float
>
&
input
)
{
float
min_val
,
max_val
;
FindMinMax
(
input
.
data
(),
input
.
size
(),
&
min_val
,
&
max_val
);
float
scale
;
int32_t
zero
;
AdjustRange
<
uint8_t
>
(
min_val
,
max_val
,
false
,
&
scale
,
&
zero
);
OpsTestNet
net
;
Device
*
device
=
OpTestContext
::
Get
()
->
GetDevice
(
D
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
static_cast
<
index_t
>
(
input
.
size
())},
input
);
Tensor
*
input_tensor
=
net
.
GetOutput
(
"Input"
);
input_tensor
->
SetScale
(
scale
);
input_tensor
->
SetZeroPoint
(
zero
);
Tensor
*
quantized_output
=
net
.
ws
()
->
CreateTensor
(
"QuantizedOutput"
,
device
->
allocator
(),
DT_UINT8
);
Tensor
*
dequantized_output
=
net
.
ws
()
->
CreateTensor
(
"DequantizedOutput"
,
device
->
allocator
(),
DT_FLOAT
);
mace
::
HexagonHTATranformer
<
D
>
transformer
;
transformer
.
Init
(
device
);
transformer
.
Quantize
(
input_tensor
,
quantized_output
);
transformer
.
Dequantize
(
quantized_output
,
dequantized_output
);
ExpectTensorNear
<
float
>
(
*
input_tensor
,
*
dequantized_output
,
0.1
);
}
}
// namespace
TEST_F
(
HTATransformTest
,
TestHTAQuantize
)
{
TestHTAQuantizeDequantize
<
CPU
>
({
-
2
,
-
1
,
0
,
1
,
2
,
3
,
4
});
TestHTAQuantizeDequantize
<
GPU
>
({
-
2
,
-
1
,
0
,
1
,
2
,
3
,
4
});
}
namespace
{
void
TestHTAInputTransform
(
const
std
::
vector
<
index_t
>
&
input_shape
,
const
hexagon_hta_hw_layout
format
)
{
OpsTestNet
net
;
Device
*
device
=
OpTestContext
::
Get
()
->
GetDevice
(
DeviceType
::
GPU
);
net
.
AddRandomInput
<
GPU
,
uint8_t
>
(
"Input"
,
input_shape
);
Tensor
*
input_tensor
=
net
.
GetOutput
(
"Input"
);
input_tensor
->
SetScale
(
0.1
);
input_tensor
->
SetZeroPoint
(
1
);
Tensor
*
cpu_transformed_tensor
=
net
.
ws
()
->
CreateTensor
(
"CpuTransformedOutput"
,
device
->
allocator
(),
DT_UINT8
);
Tensor
*
gpu_transformed_tensor
=
net
.
ws
()
->
CreateTensor
(
"GpuTransformedOutput"
,
device
->
allocator
(),
DT_UINT8
);
mace
::
HexagonHTATranformer
<
CPU
>
cpu_transformer
;
mace
::
HexagonHTATranformer
<
GPU
>
gpu_transformer
;
cpu_transformer
.
Init
(
device
);
gpu_transformer
.
Init
(
device
);
cpu_transformer
.
SetInputTransformer
(
format
);
gpu_transformer
.
SetInputTransformer
(
format
);
cpu_transformer
.
TransformInput
(
input_tensor
,
cpu_transformed_tensor
,
0
);
gpu_transformer
.
TransformInput
(
input_tensor
,
gpu_transformed_tensor
,
0
);
net
.
Sync
();
ExpectTensorNear
<
uint8_t
>
(
*
cpu_transformed_tensor
,
*
gpu_transformed_tensor
);
}
}
// namespace
TEST_F
(
HTATransformTest
,
TestHTAInputTransform
)
{
TestHTAInputTransform
({
1
,
15
,
33
,
2
},
HEXAGON_HTA_HW_FORMAT_PLANAR
);
TestHTAInputTransform
({
1
,
19
,
31
,
3
},
HEXAGON_HTA_HW_FORMAT_PLANAR
);
TestHTAInputTransform
({
1
,
224
,
224
,
3
},
HEXAGON_HTA_HW_FORMAT_PLANAR
);
TestHTAInputTransform
({
1
,
19
,
31
,
3
},
HEXAGON_HTA_HW_FORMAT_D32
);
TestHTAInputTransform
({
1
,
15
,
33
,
27
},
HEXAGON_HTA_HW_FORMAT_D32
);
TestHTAInputTransform
({
1
,
15
,
33
,
35
},
HEXAGON_HTA_HW_FORMAT_D32
);
TestHTAInputTransform
({
1
,
224
,
224
,
3
},
HEXAGON_HTA_HW_FORMAT_D32
);
}
namespace
{
void
TestHTAOutputTransform
(
const
std
::
vector
<
index_t
>
&
output_shape
,
const
hexagon_hta_hw_layout
format
)
{
index_t
batch
=
output_shape
[
0
];
index_t
height
=
output_shape
[
1
];
index_t
width
=
output_shape
[
2
];
index_t
channels
=
output_shape
[
3
];
MACE_CHECK
(
format
==
HEXAGON_HTA_HW_FORMAT_D32
);
std
::
vector
<
index_t
>
input_shape
{
batch
,
height
,
RoundUpDiv
<
index_t
>
(
channels
,
32
),
width
,
32
};
OpsTestNet
net
;
Device
*
device
=
OpTestContext
::
Get
()
->
GetDevice
(
DeviceType
::
GPU
);
net
.
AddRandomInput
<
GPU
,
uint8_t
>
(
"Input"
,
input_shape
);
Tensor
*
input_tensor
=
net
.
GetOutput
(
"Input"
);
Tensor
*
cpu_transformed_tensor
=
net
.
ws
()
->
CreateTensor
(
"CpuTransformedOutput"
,
device
->
allocator
(),
DT_UINT8
);
Tensor
*
gpu_transformed_tensor
=
net
.
ws
()
->
CreateTensor
(
"GpuTransformedOutput"
,
device
->
allocator
(),
DT_UINT8
);
cpu_transformed_tensor
->
Resize
(
output_shape
);
gpu_transformed_tensor
->
Resize
(
output_shape
);
mace
::
HexagonHTATranformer
<
CPU
>
cpu_transformer
;
mace
::
HexagonHTATranformer
<
GPU
>
gpu_transformer
;
cpu_transformer
.
Init
(
device
);
gpu_transformer
.
Init
(
device
);
cpu_transformer
.
SetOutputTransformer
(
format
);
gpu_transformer
.
SetOutputTransformer
(
format
);
cpu_transformer
.
TransformOutput
(
input_tensor
,
cpu_transformed_tensor
,
0
);
gpu_transformer
.
TransformOutput
(
input_tensor
,
gpu_transformed_tensor
,
0
);
net
.
Sync
();
ExpectTensorNear
<
uint8_t
>
(
*
cpu_transformed_tensor
,
*
gpu_transformed_tensor
);
}
}
// namespace
TEST_F
(
HTATransformTest
,
TestHTAOutputTransform
)
{
TestHTAOutputTransform
({
1
,
15
,
33
,
2
},
HEXAGON_HTA_HW_FORMAT_D32
);
TestHTAOutputTransform
({
1
,
19
,
31
,
27
},
HEXAGON_HTA_HW_FORMAT_D32
);
TestHTAOutputTransform
({
1
,
19
,
31
,
35
},
HEXAGON_HTA_HW_FORMAT_D32
);
TestHTAOutputTransform
({
1
,
224
,
224
,
2
},
HEXAGON_HTA_HW_FORMAT_D32
);
TestHTAOutputTransform
({
1
,
384
,
384
,
3
},
HEXAGON_HTA_HW_FORMAT_D32
);
}
}
// namespace test
}
// namespace ops
}
// namespace mace
test/ccunit/mace/ops/opencl/out_of_range_check_test.cc
浏览文件 @
28954099
...
...
@@ -21,7 +21,7 @@
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/tensor.h"
#include "mace/core/workspace.h"
#include "mace/
ops/opencl/
helper.h"
#include "mace/
core/runtime/opencl/opencl_
helper.h"
#include "mace/utils/memory.h"
namespace
mace
{
...
...
test/ccunit/mace/ops/quantize_test.cc
浏览文件 @
28954099
...
...
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/quantize.h"
#include "mace/ops/ops_test_util.h"
namespace
mace
{
...
...
test/ccutils/mace/ops/testing/test_utils.h
浏览文件 @
28954099
...
...
@@ -250,8 +250,24 @@ struct Expector<EXP_TYPE, RES_TYPE, false> {
Tensor
::
MappingGuard
y_mapper
(
&
y
);
auto
a
=
x
.
data
<
EXP_TYPE
>
();
auto
b
=
y
.
data
<
RES_TYPE
>
();
for
(
int
i
=
0
;
i
<
x
.
size
();
++
i
)
{
ExpectEqual
(
a
[
i
],
b
[
i
]);
if
(
x
.
dim_size
()
==
4
)
{
for
(
int
n
=
0
;
n
<
x
.
dim
(
0
);
++
n
)
{
for
(
int
h
=
0
;
h
<
x
.
dim
(
1
);
++
h
)
{
for
(
int
w
=
0
;
w
<
x
.
dim
(
2
);
++
w
)
{
for
(
int
c
=
0
;
c
<
x
.
dim
(
3
);
++
c
)
{
EXPECT_EQ
(
*
a
,
*
b
)
<<
"with index = ["
<<
n
<<
", "
<<
h
<<
", "
<<
w
<<
", "
<<
c
<<
"]"
;
a
++
;
b
++
;
}
}
}
}
}
else
{
for
(
int
i
=
0
;
i
<
x
.
size
();
++
i
)
{
EXPECT_EQ
(
a
[
i
],
b
[
i
])
<<
"a = "
<<
a
<<
" b = "
<<
b
<<
" index = "
<<
i
;
}
}
}
...
...
third_party/nnlib/arm64-v8a/libhexagon_controller.so
浏览文件 @
28954099
无法预览此类型文件
third_party/nnlib/armeabi-v7a/libhexagon_controller.so
浏览文件 @
28954099
无法预览此类型文件
third_party/nnlib/hexnn_dsp_controller.h
浏览文件 @
28954099
/*
* Copyright (c) 201
6-201
9, The Linux Foundation. All rights reserved.
* Copyright (c) 2019, The Linux Foundation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted (subject to the limitations in the
...
...
@@ -42,10 +42,6 @@ extern "C" {
int
hexnn_controller_request_unsigned_pd
();
int
hexnn_controller_init
();
int
hexnn_controller_deinit
();
#ifdef __cplusplus
}
#endif
...
...
third_party/rpcmem/arm64-v8a/rpcmem.a
浏览文件 @
28954099
无法预览此类型文件
third_party/rpcmem/armeabi-v7a/rpcmem.a
浏览文件 @
28954099
无法预览此类型文件
third_party/rpcmem/rpcmem.h
浏览文件 @
28954099
...
...
@@ -6,6 +6,8 @@
#ifndef RPCMEM_H
#define RPCMEM_H
#include <stdint.h>
/**
* RPCMEM_DEFAULT_HEAP
* Dynamicaly select the heap to use. This should be ok for most usecases.
...
...
@@ -77,16 +79,25 @@
extern
"C"
{
#endif
typedef
struct
rpcmem
rpcmem
;
struct
rpcmem
{
void
*
lst
;
// QList*
void
*
mt
;
// pthread_mutex_t*
int
ionfd
;
int
flag
;
int
ionversion
;
};
/**
* call once to initialize the library
* NOTE: rpcmem_init is no
t
thread safe
* NOTE: rpcmem_init is no
w
thread safe
*/
void
rpcmem_init
(
void
);
void
rpcmem_init
(
rpcmem
*
rm
);
/**
* call once for cleanup
* NOTE: rpcmem_deinit is no
t
thread safe
* NOTE: rpcmem_deinit is no
w
thread safe
*/
void
rpcmem_deinit
(
void
);
void
rpcmem_deinit
(
rpcmem
*
rm
);
/**
* Allocate via ION a buffer of size
...
...
@@ -96,10 +107,10 @@ void rpcmem_deinit(void);
* @retval, 0 on failure, pointer to buffer on success
*
* For example:
* buf = rpcmem_alloc(RPCMEM_DEFAULT_HEAP, RPCMEM_DEFAULT_FLAGS, size);
* buf = rpcmem_alloc(
rm,
RPCMEM_DEFAULT_HEAP, RPCMEM_DEFAULT_FLAGS, size);
*/
void
*
rpcmem_alloc
(
int
heapid
,
unsigned
in
t
flags
,
int
size
);
void
*
rpcmem_alloc
(
rpcmem
*
rm
,
int
heapid
,
uint32_
t
flags
,
int
size
);
/**
* allocate with default settings
...
...
@@ -107,24 +118,24 @@ void* rpcmem_alloc(int heapid, unsigned int flags, int size);
#if !defined(WINNT) && !defined (_WIN32_WINNT)
__attribute__
((
unused
))
#endif
static
__inline
void
*
rpcmem_alloc_def
(
int
size
)
{
return
rpcmem_alloc
(
RPCMEM_DEFAULT_HEAP
,
RPCMEM_DEFAULT_FLAGS
,
size
);
static
__inline
void
*
rpcmem_alloc_def
(
rpcmem
*
rm
,
int
size
)
{
return
rpcmem_alloc
(
rm
,
RPCMEM_DEFAULT_HEAP
,
RPCMEM_DEFAULT_FLAGS
,
size
);
}
/**
* free buffer, ignores invalid buffers
*/
void
rpcmem_free
(
void
*
po
);
void
rpcmem_free
(
rpcmem
*
rm
,
void
*
po
);
/**
* returns associated fd
*/
int
rpcmem_to_fd
(
void
*
po
);
int
rpcmem_to_fd
(
rpcmem
*
rm
,
void
*
po
);
/**
* cache coherency management
*/
int
rpcmem_sync_cache
(
void
*
po
,
unsigned
in
t
flags
);
int
rpcmem_sync_cache
(
rpcmem
*
rm
,
void
*
po
,
uint32_
t
flags
);
#ifdef __cplusplus
}
...
...
tools/bazel-build-standalone-lib.sh
浏览文件 @
28954099
...
...
@@ -21,6 +21,11 @@ mkdir -p $LIB_DIR/arm64-v8a/cpu_gpu_dsp
mkdir
-p
$LIB_DIR
/arm64-v8a/cpu_gpu
mkdir
-p
$LIB_DIR
/arm64-v8a/cpu_gpu_apu
if
[[
"
$BUILD_HTA
"
==
"1"
]]
;
then
mkdir
-p
$LIB_DIR
/armeabi-v7a/cpu_gpu_hta
mkdir
-p
$LIB_DIR
/arm64-v8a/cpu_gpu_hta
fi
rm
-rf
$LIB_DIR
/linux-x86-64
mkdir
-p
$LIB_DIR
/linux-x86-64
...
...
@@ -33,6 +38,18 @@ mkdir -p $LIB_DIR/aarch64_linux_gnu/cpu_gpu
# build shared libraries
if
[[
"
$BUILD_HTA
"
==
"1"
]]
;
then
echo
"build shared lib for armeabi-v7a + cpu_gpu_hta"
bazel build
--config
android
--config
optimization mace/libmace:libmace_dynamic
--define
neon
=
true
--define
opencl
=
true
--define
hta
=
true
--define
quantize
=
true
--cpu
=
armeabi-v7a
--define
rpcmem
=
true
cp
bazel-bin/mace/libmace/libmace.so
$LIB_DIR
/armeabi-v7a/cpu_gpu_hta/
cp
third_party/hta/armeabi-v7a/
*
so
$LIB_DIR
/armeabi-v7a/cpu_gpu_hta/
echo
"build shared lib for arm64-v8a + cpu_gpu_hta"
bazel build
--config
android
--config
optimization mace/libmace:libmace_dynamic
--define
neon
=
true
--define
opencl
=
true
--define
hta
=
true
--define
quantize
=
true
--cpu
=
arm64-v8a
--define
rpcmem
=
true
cp
bazel-bin/mace/libmace/libmace.so
$LIB_DIR
/arm64-v8a/cpu_gpu_hta/
cp
third_party/hta/arm64-v8a/
*
so
$LIB_DIR
/arm64-v8a/cpu_gpu_hta/
fi
echo
"build shared lib for armeabi-v7a + cpu_gpu_dsp"
bazel build
--config
android
--config
optimization mace/libmace:libmace_dynamic
--define
neon
=
true
--define
opencl
=
true
--define
hexagon
=
true
--define
quantize
=
true
--cpu
=
armeabi-v7a
--define
rpcmem
=
true
cp
bazel-bin/mace/libmace/libmace.so
$LIB_DIR
/armeabi-v7a/cpu_gpu_dsp/
...
...
@@ -71,6 +88,18 @@ if [[ "$OSTYPE" != "darwin"* ]];then
fi
# build static libraries
if
[[
"
$BUILD_HTA
"
==
"1"
]]
;
then
echo
"build static lib for armeabi-v7a + cpu_gpu_hta"
bazel build
--config
android
--config
optimization mace/libmace:libmace_static
--config
symbol_hidden
--define
neon
=
true
--define
opencl
=
true
--define
hta
=
true
--define
quantize
=
true
--cpu
=
armeabi-v7a
--define
rpcmem
=
true
cp
bazel-genfiles/mace/libmace/libmace.a
$LIB_DIR
/armeabi-v7a/cpu_gpu_hta/
cp
third_party/hta/armeabi-v7a/
*
so
$LIB_DIR
/armeabi-v7a/cpu_gpu_hta/
echo
"build static lib for arm64-v8a + cpu_gpu_hta"
bazel build
--config
android
--config
optimization mace/libmace:libmace_static
--config
symbol_hidden
--define
neon
=
true
--define
opencl
=
true
--define
hta
=
true
--define
quantize
=
true
--cpu
=
arm64-v8a
--define
rpcmem
=
true
cp
bazel-genfiles/mace/libmace/libmace.a
$LIB_DIR
/arm64-v8a/cpu_gpu_hta/
cp
third_party/hta/arm64-v8a/
*
so
$LIB_DIR
/arm64-v8a/cpu_gpu_hta/
fi
echo
"build static lib for armeabi-v7a + cpu_gpu_dsp"
bazel build
--config
android
--config
optimization mace/libmace:libmace_static
--config
symbol_hidden
--define
neon
=
true
--define
opencl
=
true
--define
hexagon
=
true
--define
quantize
=
true
--cpu
=
armeabi-v7a
--define
rpcmem
=
true
cp
bazel-genfiles/mace/libmace/libmace.a
$LIB_DIR
/armeabi-v7a/cpu_gpu_dsp/
...
...
tools/bazel_adb_run.py
浏览文件 @
28954099
...
...
@@ -100,6 +100,11 @@ def parse_args():
type
=
str2bool
,
default
=
True
,
help
=
"Whether to use rpcmem"
)
parser
.
add_argument
(
"--enable_hta"
,
type
=
str2bool
,
default
=
False
,
help
=
"Whether to use hta"
)
parser
.
add_argument
(
'--address_sanitizer'
,
action
=
"store_true"
,
...
...
@@ -170,6 +175,7 @@ def main(unused_args):
enable_neon
=
FLAGS
.
enable_neon
,
enable_quantize
=
FLAGS
.
enable_quantize
,
enable_rpcmem
=
FLAGS
.
enable_rpcmem
,
enable_hta
=
FLAGS
.
enable_hta
,
address_sanitizer
=
FLAGS
.
address_sanitizer
,
debug_mode
=
FLAGS
.
debug_mode
)
if
FLAGS
.
run_target
:
...
...
tools/converter.py
浏览文件 @
28954099
...
...
@@ -218,7 +218,8 @@ def get_opencl_mode(configs):
YAMLKeyword
.
runtime
,
""
)
runtime_list
.
append
(
model_runtime
.
lower
())
if
RuntimeType
.
gpu
in
runtime_list
or
RuntimeType
.
cpu_gpu
in
runtime_list
:
if
RuntimeType
.
gpu
in
runtime_list
or
RuntimeType
.
cpu_gpu
in
runtime_list
\
or
RuntimeType
.
hta
in
runtime_list
:
return
True
return
False
...
...
tools/python/transform/hexagon_converter.py
浏览文件 @
28954099
...
...
@@ -295,6 +295,20 @@ class HexagonConverter(base_converter.ConverterInterface):
else
:
index
+=
1
if
self
.
_option
.
device
==
DeviceType
.
HTA
.
value
:
# replace QuantizeINPUT_f_to_8 with INPUT
quantize_input_op
.
type
=
HexagonOp
.
INPUT
.
name
del
quantize_input_op
.
output_shape
[
1
:]
del
quantize_input_op
.
output_type
[
1
:]
del
quantize_input_op
.
out_max_byte_size
[
1
:]
# replace first op's input min max with constant
self
.
add_constant_min_max_for_first_op
(
self
.
_model
.
op
[
1
])
# replace DequantizeOUTPUT_8tof with OUTPUT
dequantize_output_op
.
type
=
HexagonOp
.
OUTPUT
.
name
del
dequantize_output_op
.
input
[
1
:]
return
quantize_input_op
.
output
def
add_node_id
(
self
,
model_inputs
):
...
...
tools/sh_commands.py
浏览文件 @
28954099
...
...
@@ -605,18 +605,20 @@ def create_internal_storage_dir(serialno, phone_data_dir):
def
push_depended_so_libs
(
libmace_dynamic_library_path
,
abi
,
phone_data_dir
,
serialno
):
dep_so_libs
=
sh
.
bash
(
os
.
environ
[
"ANDROID_NDK_HOME"
]
+
"/ndk-depends"
,
libmace_dynamic_library_path
)
src_file
=
""
for
dep
in
split_stdout
(
dep_so_libs
):
if
dep
==
"libgnustl_shared.so"
:
src_file
=
"%s/sources/cxx-stl/gnu-libstdc++/4.9/libs/"
\
"%s/libgnustl_shared.so"
\
%
(
os
.
environ
[
"ANDROID_NDK_HOME"
],
abi
)
elif
dep
==
"libc++_shared.so"
:
src_file
=
"%s/sources/cxx-stl/llvm-libc++/libs/"
\
"%s/libc++_shared.so"
\
%
(
os
.
environ
[
"ANDROID_NDK_HOME"
],
abi
)
src_file
=
"%s/sources/cxx-stl/llvm-libc++/libs/"
\
"%s/libc++_shared.so"
\
%
(
os
.
environ
[
"ANDROID_NDK_HOME"
],
abi
)
try
:
dep_so_libs
=
sh
.
bash
(
os
.
environ
[
"ANDROID_NDK_HOME"
]
+
"/ndk-depends"
,
libmace_dynamic_library_path
)
except
sh
.
ErrorReturnCode_127
:
print
(
"Find no ndk-depends, use default libc++_shared.so"
)
else
:
for
dep
in
split_stdout
(
dep_so_libs
):
if
dep
==
"libgnustl_shared.so"
:
src_file
=
"%s/sources/cxx-stl/gnu-libstdc++/4.9/libs/"
\
"%s/libgnustl_shared.so"
\
%
(
os
.
environ
[
"ANDROID_NDK_HOME"
],
abi
)
print
(
"push %s to %s"
%
(
src_file
,
phone_data_dir
))
adb_push
(
src_file
,
phone_data_dir
,
serialno
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录