Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Mr.Vain
Mace
提交
c8d5c88e
Mace
项目概览
Mr.Vain
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
c8d5c88e
编写于
1月 06, 2020
作者:
L
luxuhui
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
opt the performance of ResizeNearestNeighbor&Deconv OP
N/A Signed-off-by:
N
Luxuhui
<
luxuhui@xiaomi.com
>
上级
a2f49f02
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
62 addition
and
26 deletion
+62
-26
mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc
mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc
+2
-1
mace/core/runtime/opencl/opencl_allocator.cc
mace/core/runtime/opencl/opencl_allocator.cc
+7
-7
mace/ops/deconv_2d.cc
mace/ops/deconv_2d.cc
+13
-8
mace/ops/opencl/image/resize_nearest_neighbor.cc
mace/ops/opencl/image/resize_nearest_neighbor.cc
+11
-3
mace/ops/opencl/image/resize_nearest_neighbor.h
mace/ops/opencl/image/resize_nearest_neighbor.h
+1
-0
mace/ops/opencl/resize_nearest_neighbor.h
mace/ops/opencl/resize_nearest_neighbor.h
+3
-0
mace/ops/resize_nearest_neighbor.cc
mace/ops/resize_nearest_neighbor.cc
+3
-2
tools/layers_validate.py
tools/layers_validate.py
+3
-2
tools/python/transform/base_converter.py
tools/python/transform/base_converter.py
+2
-0
tools/python/transform/transformer.py
tools/python/transform/transformer.py
+17
-3
未找到文件。
mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc
浏览文件 @
c8d5c88e
...
...
@@ -114,7 +114,8 @@ HexagonDSPWrapper::HexagonDSPWrapper() {
if
(
env_log_execute_time_str
.
empty
())
{
log_execute_time_
=
false
;
}
else
{
log_execute_time_
=
static_cast
<
bool
>
(
std
::
stoi
(
env_log_execute_time_str
));
log_execute_time_
=
static_cast
<
bool
>
(
std
::
atoi
(
env_log_execute_time_str
.
c_str
()));
}
}
...
...
mace/core/runtime/opencl/opencl_allocator.cc
浏览文件 @
c8d5c88e
...
...
@@ -74,8 +74,8 @@ MaceStatus OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
const
DataType
dt
,
void
**
result
)
const
{
MACE_CHECK
(
image_shape
.
size
()
==
2
,
"Image shape's size must equal 2"
);
VLOG
(
3
)
<<
"Allocate OpenCL image: "
<<
image_shape
[
0
]
<<
", "
<<
image_shape
[
1
]
;
MACE_LATENCY_LOGGER
(
1
,
"Allocate OpenCL image: "
,
image_shape
[
0
],
", "
,
image_shape
[
1
])
;
if
(
ShouldMockRuntimeFailure
())
{
return
MaceStatus
::
MACE_OUT_OF_RESOURCES
;
...
...
@@ -109,7 +109,7 @@ MaceStatus OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
}
void
OpenCLAllocator
::
Delete
(
void
*
buffer
)
const
{
VLOG
(
3
)
<<
"Free OpenCL buffer"
;
MACE_LATENCY_LOGGER
(
1
,
"Free OpenCL buffer"
)
;
if
(
buffer
!=
nullptr
)
{
cl
::
Buffer
*
cl_buffer
=
static_cast
<
cl
::
Buffer
*>
(
buffer
);
delete
cl_buffer
;
...
...
@@ -117,7 +117,7 @@ void OpenCLAllocator::Delete(void *buffer) const {
}
void
OpenCLAllocator
::
DeleteImage
(
void
*
buffer
)
const
{
VLOG
(
3
)
<<
"Free OpenCL image"
;
MACE_LATENCY_LOGGER
(
1
,
"Free OpenCL image"
)
;
if
(
buffer
!=
nullptr
)
{
cl
::
Image2D
*
cl_image
=
static_cast
<
cl
::
Image2D
*>
(
buffer
);
delete
cl_image
;
...
...
@@ -125,7 +125,7 @@ void OpenCLAllocator::DeleteImage(void *buffer) const {
}
void
*
OpenCLAllocator
::
Map
(
void
*
buffer
,
size_t
offset
,
size_t
nbytes
)
const
{
VLOG
(
3
)
<<
"Map OpenCL buffer"
;
MACE_LATENCY_LOGGER
(
1
,
"Map OpenCL buffer"
)
;
auto
cl_buffer
=
static_cast
<
cl
::
Buffer
*>
(
buffer
);
auto
queue
=
opencl_runtime_
->
command_queue
();
// TODO(heliangliang) Non-blocking call
...
...
@@ -144,7 +144,7 @@ void *OpenCLAllocator::Map(void *buffer, size_t offset, size_t nbytes) const {
void
*
OpenCLAllocator
::
MapImage
(
void
*
buffer
,
const
std
::
vector
<
size_t
>
&
image_shape
,
std
::
vector
<
size_t
>
*
mapped_image_pitch
)
const
{
VLOG
(
3
)
<<
"Map OpenCL Image"
;
MACE_LATENCY_LOGGER
(
1
,
"Map OpenCL Image"
)
;
MACE_CHECK
(
image_shape
.
size
()
==
2
)
<<
"Just support map 2d image"
;
auto
cl_image
=
static_cast
<
cl
::
Image2D
*>
(
buffer
);
std
::
array
<
size_t
,
3
>
origin
=
{{
0
,
0
,
0
}};
...
...
@@ -164,7 +164,7 @@ void *OpenCLAllocator::MapImage(void *buffer,
}
void
OpenCLAllocator
::
Unmap
(
void
*
buffer
,
void
*
mapped_ptr
)
const
{
VLOG
(
3
)
<<
"Unmap OpenCL buffer/Image"
;
MACE_LATENCY_LOGGER
(
1
,
"Unmap OpenCL buffer/Image"
)
;
auto
cl_buffer
=
static_cast
<
cl
::
Buffer
*>
(
buffer
);
auto
queue
=
opencl_runtime_
->
command_queue
();
cl_int
error
=
queue
.
enqueueUnmapMemObject
(
*
cl_buffer
,
mapped_ptr
,
...
...
mace/ops/deconv_2d.cc
浏览文件 @
c8d5c88e
...
...
@@ -170,8 +170,8 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
template
<
>
class
Deconv2dOp
<
DeviceType
::
GPU
,
float
>
:
public
Deconv2dOpBase
{
public:
explicit
Deconv2dOp
(
OpConstructContext
*
context
)
:
Deconv2dOpBase
(
context
)
{
explicit
Deconv2dOp
(
OpConstructContext
*
context
)
:
Deconv2dOpBase
(
context
),
dim_
(
Operation
::
GetRepeatedArgs
<
index_t
>
(
"dim"
)
)
{
MemoryType
mem_type
=
MemoryType
::
GPU_IMAGE
;
if
(
context
->
GetOpMemoryType
()
==
MemoryType
::
GPU_IMAGE
)
{
kernel_
=
make_unique
<
opencl
::
image
::
Deconv2dKernel
>
();
...
...
@@ -219,12 +219,16 @@ class Deconv2dOp<DeviceType::GPU, float> : public Deconv2dOpBase {
std
::
vector
<
index_t
>
out_shape
;
if
(
output_shape_tensor
)
{
Tensor
::
MappingGuard
out_shape_guard
(
output_shape_tensor
);
MACE_CHECK
(
output_shape_tensor
->
size
()
==
4
,
"output shape should be 4-dims"
);
out_shape
=
std
::
vector
<
index_t
>
(
output_shape_tensor
->
data
<
int32_t
>
(),
output_shape_tensor
->
data
<
int32_t
>
()
+
4
);
if
(
dim_
.
size
()
<
2
)
{
Tensor
::
MappingGuard
out_shape_guard
(
output_shape_tensor
);
MACE_CHECK
(
output_shape_tensor
->
size
()
==
4
,
"output shape should be 4-dims"
);
out_shape
=
std
::
vector
<
index_t
>
(
output_shape_tensor
->
data
<
int32_t
>
(),
output_shape_tensor
->
data
<
int32_t
>
()
+
4
);
}
else
{
out_shape
=
dim_
;
}
}
std
::
vector
<
int
>
in_paddings
;
std
::
vector
<
int
>
out_paddings
;
...
...
@@ -249,6 +253,7 @@ class Deconv2dOp<DeviceType::GPU, float> : public Deconv2dOpBase {
}
private:
std
::
vector
<
index_t
>
dim_
;
std
::
unique_ptr
<
OpenCLDeconv2dKernel
>
kernel_
;
};
#endif // MACE_ENABLE_OPENCL
...
...
mace/ops/opencl/image/resize_nearest_neighbor.cc
浏览文件 @
c8d5c88e
...
...
@@ -25,14 +25,22 @@ MaceStatus ResizeNearestNeighborKernel::Compute(
OpContext
*
context
,
const
Tensor
*
input
,
const
Tensor
*
size
,
const
std
::
vector
<
index_t
>
&
dims
,
Tensor
*
output
)
{
const
index_t
batch
=
input
->
dim
(
0
);
const
index_t
in_height
=
input
->
dim
(
1
);
const
index_t
in_width
=
input
->
dim
(
2
);
const
index_t
channels
=
input
->
dim
(
3
);
Tensor
::
MappingGuard
size_mapper
(
size
);
const
index_t
out_height
=
size
->
data
<
int32_t
>
()[
0
];
const
index_t
out_width
=
size
->
data
<
int32_t
>
()[
1
];
index_t
out_height
=
0
;
index_t
out_width
=
0
;
if
(
dims
.
size
()
<
2
)
{
Tensor
::
MappingGuard
size_mapper
(
size
);
out_height
=
size
->
data
<
int32_t
>
()[
0
];
out_width
=
size
->
data
<
int32_t
>
()[
1
];
}
else
{
out_height
=
dims
[
0
];
out_width
=
dims
[
1
];
}
const
index_t
channel_blocks
=
RoundUpDiv4
(
channels
);
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
channel_blocks
),
...
...
mace/ops/opencl/image/resize_nearest_neighbor.h
浏览文件 @
c8d5c88e
...
...
@@ -73,6 +73,7 @@ class ResizeNearestNeighborKernel : public OpenCLResizeNearestNeighborKernel {
OpContext
*
context
,
const
Tensor
*
input
,
const
Tensor
*
size
,
const
std
::
vector
<
index_t
>
&
dims
,
Tensor
*
output
)
override
;
private:
...
...
mace/ops/opencl/resize_nearest_neighbor.h
浏览文件 @
c8d5c88e
...
...
@@ -15,6 +15,8 @@
#ifndef MACE_OPS_OPENCL_RESIZE_NEAREST_NEIGHBOR_H_
#define MACE_OPS_OPENCL_RESIZE_NEAREST_NEIGHBOR_H_
#include <vector>
#include "mace/core/types.h"
#include "mace/public/mace.h"
#include "mace/utils/math.h"
...
...
@@ -31,6 +33,7 @@ class OpenCLResizeNearestNeighborKernel {
OpContext
*
context
,
const
Tensor
*
input
,
const
Tensor
*
size
,
const
std
::
vector
<
index_t
>
&
dims
,
Tensor
*
output
)
=
0
;
MACE_EMPTY_VIRTUAL_DESTRUCTOR
(
OpenCLResizeNearestNeighborKernel
);
};
...
...
mace/ops/resize_nearest_neighbor.cc
浏览文件 @
c8d5c88e
...
...
@@ -145,7 +145,7 @@ template<>
class
ResizeNearestNeighborOp
<
DeviceType
::
GPU
,
float
>
:
public
Operation
{
public:
explicit
ResizeNearestNeighborOp
(
OpConstructContext
*
context
)
:
Operation
(
context
)
{
:
Operation
(
context
)
,
dim_
(
Operation
::
GetRepeatedArgs
<
index_t
>
(
"dim"
))
{
bool
align_corners
=
Operation
::
GetOptionalArg
<
bool
>
(
"align_corners"
,
false
);
if
(
context
->
GetOpMemoryType
()
==
MemoryType
::
GPU_IMAGE
)
{
...
...
@@ -163,10 +163,11 @@ class ResizeNearestNeighborOp<DeviceType::GPU, float> : public Operation {
"input must be 4-dimensional and size must be 1-dimensional."
,
input
->
dim_size
(),
size
->
dim_size
());
return
kernel_
->
Compute
(
context
,
input
,
size
,
output
);
return
kernel_
->
Compute
(
context
,
input
,
size
,
dim_
,
output
);
}
private:
std
::
vector
<
index_t
>
dim_
;
std
::
unique_ptr
<
OpenCLResizeNearestNeighborKernel
>
kernel_
;
};
#endif // MACE_ENABLE_OPENCL
...
...
tools/layers_validate.py
浏览文件 @
c8d5c88e
...
...
@@ -144,8 +144,9 @@ def convert(model_file, output_dir, layers):
output_info
.
data_format
=
data_format
output_info
.
dims
.
extend
(
op
.
output_shape
[
i
].
dims
)
output_info
.
data_type
=
mace_pb2
.
DT_FLOAT
output_info
.
scale
=
op
.
quantize_info
[
0
].
scale
output_info
.
zero_point
=
op
.
quantize_info
[
0
].
zero_point
if
is_quantize
:
output_info
.
scale
=
op
.
quantize_info
[
0
].
scale
output_info
.
zero_point
=
op
.
quantize_info
[
0
].
zero_point
# modify output op
if
is_quantize
:
output_name
=
op
.
output
[
i
]
...
...
tools/python/transform/base_converter.py
浏览文件 @
c8d5c88e
...
...
@@ -324,6 +324,7 @@ class TransformerRule(Enum):
FP16_MATMUL_WEIGHT
=
41
FP16_GATHER_WEIGHT
=
42
QUANTIZE_LARGE_WEIGHTS
=
43
TRANSPOSE_SHAPE_TENSOR_TO_PARAM
=
44
class
ConverterInterface
(
object
):
...
...
@@ -534,6 +535,7 @@ class ConverterOption(object):
TransformerRule
.
TRANSFORM_LSTMCELL_ZEROSTATE
,
TransformerRule
.
TRANSFORM_BASIC_LSTMCELL
,
TransformerRule
.
TRANSPOSE_RESHAPE_AND_FLATTEN
,
TransformerRule
.
TRANSPOSE_SHAPE_TENSOR_TO_PARAM
,
TransformerRule
.
FOLD_RESHAPE
,
TransformerRule
.
TRANSFORM_MATMUL_TO_FC
,
# For StoB -> conv -> BtoS -> BN pattern
...
...
tools/python/transform/transformer.py
浏览文件 @
c8d5c88e
...
...
@@ -99,6 +99,8 @@ class Transformer(base_converter.ConverterInterface):
TransformerRule
.
UPDATE_DATA_FORMAT
:
self
.
update_data_format
,
TransformerRule
.
TRANSPOSE_RESHAPE_AND_FLATTEN
:
self
.
transform_reshape_and_flatten
,
TransformerRule
.
TRANSPOSE_SHAPE_TENSOR_TO_PARAM
:
self
.
transform_shape_tensor_to_param
,
TransformerRule
.
TRANSPOSE_DATA_FORMAT
:
self
.
transpose_data_format
,
TransformerRule
.
CHECK_QUANTIZE_INFO
:
self
.
check_quantize_info
,
...
...
@@ -2119,9 +2121,21 @@ class Transformer(base_converter.ConverterInterface):
mace_check
(
False
,
"Only support reshape and flatten"
)
shape_tensor
.
int32_data
.
extend
(
dims
)
op
.
input
.
append
(
shape_tensor
.
name
)
if
len
(
op
.
input
)
==
2
and
dim_arg
is
None
:
if
shape_tensor
is
None
and
op
.
input
[
1
]
in
self
.
_consts
:
shape_tensor
=
self
.
_consts
[
op
.
input
[
1
]]
def
transform_shape_tensor_to_param
(
self
):
kOpTypeInputIdxMap
=
{
MaceOp
.
ResizeNearestNeighbor
.
name
:
1
,
MaceOp
.
Deconv2D
.
name
:
2
,
MaceOp
.
Reshape
.
name
:
1
,
}
net
=
self
.
_model
for
op
in
net
.
op
:
if
op
.
type
not
in
kOpTypeInputIdxMap
:
continue
shape_idx
=
kOpTypeInputIdxMap
[
op
.
type
]
dim_arg
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_dim_str
)
if
len
(
op
.
input
)
>
shape_idx
and
dim_arg
is
None
:
shape_tensor
=
self
.
_consts
[
op
.
input
[
shape_idx
]]
if
shape_tensor
is
not
None
:
dim_arg
=
op
.
arg
.
add
()
dim_arg
.
name
=
MaceKeyword
.
mace_dim_str
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录