Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
0caade30
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
0caade30
编写于
1月 15, 2019
作者:
B
Bin Li
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add Reduce for quantized CPU and DSP
上级
c23719f2
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
451 addition
and
25 deletion
+451
-25
mace/ops/quantization_util.cc
mace/ops/quantization_util.cc
+1
-1
mace/ops/quantization_util.h
mace/ops/quantization_util.h
+1
-1
mace/ops/reduce.cc
mace/ops/reduce.cc
+322
-11
mace/ops/reduce_test.cc
mace/ops/reduce_test.cc
+83
-0
mace/python/tools/converter_tool/hexagon_converter.py
mace/python/tools/converter_tool/hexagon_converter.py
+39
-0
mace/python/tools/converter_tool/transformer.py
mace/python/tools/converter_tool/transformer.py
+5
-12
未找到文件。
mace/ops/quantization_util.cc
浏览文件 @
0caade30
// Copyright 2018
Xiaomi, Inc. All rights r
eserved.
// Copyright 2018
The MACE Authors. All Rights R
eserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
...
...
mace/ops/quantization_util.h
浏览文件 @
0caade30
// Copyright 2018
Xiaomi, Inc. All rights r
eserved.
// Copyright 2018
The MACE Authors. All Rights R
eserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
...
...
mace/ops/reduce.cc
浏览文件 @
0caade30
...
...
@@ -73,6 +73,9 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
const
Tensor
*
input
=
this
->
Input
(
0
);
Tensor
*
output
=
this
->
Output
(
0
);
Simplify
(
input
);
// Use the same scale and zero point with input and output.
output
->
SetScale
(
input
->
scale
());
output
->
SetZeroPoint
(
input
->
zero_point
());
output
->
Resize
(
out_shape_
);
Compute
(
input
,
output
);
return
MaceStatus
::
MACE_SUCCESS
;
...
...
@@ -92,7 +95,8 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
axis_
[
i
]
+
input
->
dim_size
();
auto
df
=
static_cast
<
DataFormat
>
(
Operation
::
GetOptionalArg
<
int
>
(
"data_format"
,
DataFormat
::
DF_NONE
));
if
(
df
==
DataFormat
::
NHWC
&&
input
->
dim_size
()
==
4
)
{
if
(
df
==
DataFormat
::
NHWC
&&
DataTypeToEnum
<
T
>::
value
!=
DT_UINT8
&&
input
->
dim_size
()
==
4
)
{
if
(
index
==
1
||
index
==
2
)
index
=
index
+
1
;
else
if
(
index
==
3
)
index
=
1
;
}
...
...
@@ -132,7 +136,7 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
}
}
void
compute_reduce_1
(
const
T
*
input
,
ReduceType
type
,
T
*
output
)
{
void
Reduce1Dims
(
const
T
*
input
,
ReduceType
type
,
T
*
output
)
{
if
(
reduce_first_axis_
)
{
if
(
type
==
ReduceType
::
MEAN
)
{
T
tmp
=
0
;
...
...
@@ -166,7 +170,7 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
}
}
void
compute_reduce_2
(
const
T
*
input
,
ReduceType
type
,
T
*
output
)
{
void
Reduce2Dims
(
const
T
*
input
,
ReduceType
type
,
T
*
output
)
{
if
(
reduce_first_axis_
)
{
if
(
type
==
ReduceType
::
MEAN
)
{
#pragma omp parallel for schedule(runtime)
...
...
@@ -250,7 +254,7 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
}
}
void
compute_reduce_3
(
const
T
*
input
,
ReduceType
type
,
T
*
output
)
{
void
Reduce3Dims
(
const
T
*
input
,
ReduceType
type
,
T
*
output
)
{
if
(
reduce_first_axis_
)
{
if
(
type
==
ReduceType
::
MEAN
)
{
#pragma omp parallel for collapse(1) schedule(runtime)
...
...
@@ -364,7 +368,7 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
}
}
void
compute_reduce_4
(
const
T
*
input
,
ReduceType
type
,
T
*
output
)
{
void
Reduce4Dims
(
const
T
*
input
,
ReduceType
type
,
T
*
output
)
{
if
(
reduce_first_axis_
)
{
if
(
type
==
ReduceType
::
MEAN
)
{
#pragma omp parallel for collapse(2) schedule(runtime)
...
...
@@ -498,7 +502,6 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
}
}
void
Compute
(
const
Tensor
*
input
,
Tensor
*
output
)
{
Tensor
::
MappingGuard
input_mapper
(
input
);
const
T
*
input_ptr
=
input
->
data
<
T
>
();
...
...
@@ -507,16 +510,16 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
memset
(
output_ptr
,
0
,
output
->
size
()
*
sizeof
(
T
));
switch
(
data_reshape_
.
size
())
{
case
1
:
compute_reduce_1
(
input_ptr
,
reduce_type_
,
output_ptr
);
Reduce1Dims
(
input_ptr
,
reduce_type_
,
output_ptr
);
break
;
case
2
:
compute_reduce_2
(
input_ptr
,
reduce_type_
,
output_ptr
);
Reduce2Dims
(
input_ptr
,
reduce_type_
,
output_ptr
);
break
;
case
3
:
compute_reduce_3
(
input_ptr
,
reduce_type_
,
output_ptr
);
Reduce3Dims
(
input_ptr
,
reduce_type_
,
output_ptr
);
break
;
case
4
:
compute_reduce_4
(
input_ptr
,
reduce_type_
,
output_ptr
);
Reduce4Dims
(
input_ptr
,
reduce_type_
,
output_ptr
);
break
;
default:
MACE_CHECK
(
false
,
"not implemented in mace"
)
...
...
@@ -532,6 +535,311 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
std
::
vector
<
index_t
>
out_shape_
;
};
#ifdef MACE_ENABLE_QUANTIZE
template
<
>
void
ReduceOp
<
DeviceType
::
CPU
,
uint8_t
>::
Reduce1Dims
(
const
uint8_t
*
input
,
ReduceType
type
,
uint8_t
*
output
)
{
if
(
reduce_first_axis_
)
{
if
(
type
==
ReduceType
::
MEAN
)
{
uint32_t
tmp
=
0
;
for
(
int
i
=
0
;
i
<
data_reshape_
[
0
];
++
i
)
{
tmp
=
tmp
+
input
[
i
];
}
output
[
0
]
=
static_cast
<
uint8_t
>
(
(
tmp
+
data_reshape_
[
0
]
/
2
)
/
data_reshape_
[
0
]);
}
else
if
(
type
==
ReduceType
::
MIN
)
{
uint8_t
tmp
=
input
[
0
];
for
(
int
i
=
1
;
i
<
data_reshape_
[
0
];
++
i
)
{
tmp
=
std
::
min
<
uint8_t
>
(
tmp
,
input
[
i
]);
}
output
[
0
]
=
tmp
;
}
else
if
(
type
==
ReduceType
::
MAX
)
{
uint8_t
tmp
=
input
[
0
];
for
(
int
i
=
1
;
i
<
data_reshape_
[
0
];
++
i
)
{
tmp
=
std
::
max
<
uint8_t
>
(
tmp
,
input
[
i
]);
}
output
[
0
]
=
tmp
;
}
else
{
MACE_NOT_IMPLEMENTED
;
}
}
else
{
memcpy
(
output
,
input
,
data_reshape_
[
0
]
*
sizeof
(
uint8_t
));
}
}
template
<
>
void
ReduceOp
<
DeviceType
::
CPU
,
uint8_t
>::
Reduce2Dims
(
const
uint8_t
*
input
,
ReduceType
type
,
uint8_t
*
output
)
{
if
(
reduce_first_axis_
)
{
if
(
type
==
ReduceType
::
MEAN
)
{
#pragma omp parallel for schedule(runtime)
for
(
int
i
=
0
;
i
<
data_reshape_
[
1
];
++
i
)
{
uint32_t
tmp
=
0
;
for
(
int
j
=
0
;
j
<
data_reshape_
[
0
];
++
j
)
{
tmp
+=
input
[
j
*
data_reshape_
[
1
]
+
i
];
}
output
[
i
]
=
static_cast
<
uint8_t
>
(
(
tmp
+
data_reshape_
[
0
]
/
2
)
/
data_reshape_
[
0
]);
}
}
else
if
(
type
==
ReduceType
::
MIN
)
{
#pragma omp parallel for schedule(runtime)
for
(
int
i
=
0
;
i
<
data_reshape_
[
1
];
++
i
)
{
uint8_t
tmp
=
input
[
i
];
for
(
int
j
=
1
;
j
<
data_reshape_
[
0
];
++
j
)
{
tmp
=
std
::
min
(
tmp
,
input
[
j
*
data_reshape_
[
1
]
+
i
]);
}
output
[
i
]
=
tmp
;
}
}
else
if
(
type
==
ReduceType
::
MAX
)
{
#pragma omp parallel for schedule(runtime)
for
(
int
i
=
0
;
i
<
data_reshape_
[
1
];
++
i
)
{
uint8_t
tmp
=
input
[
i
];
for
(
int
j
=
1
;
j
<
data_reshape_
[
0
];
++
j
)
{
tmp
=
std
::
max
(
tmp
,
input
[
j
*
data_reshape_
[
1
]
+
i
]);
}
output
[
i
]
=
tmp
;
}
}
else
{
MACE_NOT_IMPLEMENTED
;
}
}
else
{
if
(
type
==
ReduceType
::
MEAN
)
{
#pragma omp parallel for schedule(runtime)
for
(
int
i
=
0
;
i
<
data_reshape_
[
0
];
++
i
)
{
uint32_t
tmp
=
0
;
for
(
int
j
=
0
;
j
<
data_reshape_
[
1
];
++
j
)
{
tmp
+=
input
[
i
*
data_reshape_
[
1
]
+
j
];
}
output
[
i
]
=
static_cast
<
uint8_t
>
(
(
tmp
+
data_reshape_
[
1
]
/
2
)
/
data_reshape_
[
1
]);
}
}
else
if
(
type
==
ReduceType
::
MIN
)
{
#pragma omp parallel for schedule(runtime)
for
(
int
i
=
0
;
i
<
data_reshape_
[
0
];
++
i
)
{
uint8_t
tmp
=
input
[
i
*
data_reshape_
[
1
]];
for
(
int
j
=
1
;
j
<
data_reshape_
[
1
];
++
j
)
{
tmp
=
std
::
min
(
tmp
,
input
[
i
*
data_reshape_
[
1
]
+
j
]);
}
output
[
i
]
=
tmp
;
}
}
else
if
(
type
==
ReduceType
::
MAX
)
{
#pragma omp parallel for schedule(runtime)
for
(
int
i
=
0
;
i
<
data_reshape_
[
0
];
++
i
)
{
uint8_t
tmp
=
input
[
i
*
data_reshape_
[
1
]];
for
(
int
j
=
1
;
j
<
data_reshape_
[
1
];
++
j
)
{
tmp
=
std
::
max
(
tmp
,
input
[
i
*
data_reshape_
[
1
]
+
j
]);
}
output
[
i
]
=
tmp
;
}
}
else
{
MACE_NOT_IMPLEMENTED
;
}
}
}
template
<
>
void
ReduceOp
<
DeviceType
::
CPU
,
uint8_t
>::
Reduce3Dims
(
const
uint8_t
*
input
,
ReduceType
type
,
uint8_t
*
output
)
{
if
(
reduce_first_axis_
)
{
if
(
type
==
ReduceType
::
MEAN
)
{
#pragma omp parallel for collapse(1) schedule(runtime)
for
(
int
i
=
0
;
i
<
data_reshape_
[
1
];
++
i
)
{
uint32_t
tmp
=
0
;
for
(
int
j
=
0
;
j
<
data_reshape_
[
2
];
++
j
)
{
for
(
int
k
=
0
;
k
<
data_reshape_
[
0
];
++
k
)
{
tmp
+=
input
[(
k
*
data_reshape_
[
1
]
+
i
)
*
data_reshape_
[
2
]
+
j
];
}
}
index_t
dim
=
data_reshape_
[
0
]
*
data_reshape_
[
2
];
output
[
i
]
=
static_cast
<
uint8_t
>
((
tmp
+
dim
/
2
)
/
dim
);
}
}
else
if
(
type
==
ReduceType
::
MIN
)
{
#pragma omp parallel for collapse(1) schedule(runtime)
for
(
int
i
=
0
;
i
<
data_reshape_
[
1
];
++
i
)
{
uint8_t
tmp
=
input
[
i
*
data_reshape_
[
2
]];
for
(
int
j
=
0
;
j
<
data_reshape_
[
2
];
++
j
)
{
for
(
int
k
=
0
;
k
<
data_reshape_
[
0
];
++
k
)
{
tmp
=
std
::
min
(
tmp
,
input
[(
k
*
data_reshape_
[
1
]
+
i
)
*
data_reshape_
[
2
]
+
j
]);
}
}
output
[
i
]
=
tmp
;
}
}
else
if
(
type
==
ReduceType
::
MAX
)
{
#pragma omp parallel for collapse(1) schedule(runtime)
for
(
int
i
=
0
;
i
<
data_reshape_
[
1
];
++
i
)
{
uint8_t
tmp
=
input
[
i
*
data_reshape_
[
2
]];
for
(
int
j
=
0
;
j
<
data_reshape_
[
2
];
++
j
)
{
for
(
int
k
=
0
;
k
<
data_reshape_
[
0
];
++
k
)
{
tmp
=
std
::
max
(
tmp
,
input
[(
k
*
data_reshape_
[
1
]
+
i
)
*
data_reshape_
[
2
]
+
j
]);
}
}
output
[
i
]
=
tmp
;
}
}
else
{
MACE_NOT_IMPLEMENTED
;
}
}
else
{
if
(
type
==
ReduceType
::
MEAN
)
{
#pragma omp parallel for collapse(2) schedule(runtime)
for
(
int
i
=
0
;
i
<
data_reshape_
[
0
];
++
i
)
{
for
(
int
j
=
0
;
j
<
data_reshape_
[
2
];
++
j
)
{
uint32_t
tmp
=
0
;
for
(
int
k
=
0
;
k
<
data_reshape_
[
1
];
++
k
)
{
tmp
+=
input
[(
i
*
data_reshape_
[
1
]
+
k
)
*
data_reshape_
[
2
]
+
j
];
}
output
[
i
*
data_reshape_
[
2
]
+
j
]
=
static_cast
<
uint8_t
>
((
tmp
+
data_reshape_
[
1
]
/
2
)
/
data_reshape_
[
1
]);
}
}
}
else
if
(
type
==
ReduceType
::
MIN
)
{
#pragma omp parallel for collapse(2) schedule(runtime)
for
(
int
i
=
0
;
i
<
data_reshape_
[
0
];
++
i
)
{
for
(
int
j
=
0
;
j
<
data_reshape_
[
2
];
++
j
)
{
uint8_t
tmp
=
input
[
i
*
data_reshape_
[
1
]
*
data_reshape_
[
2
]
+
j
];
for
(
int
k
=
1
;
k
<
data_reshape_
[
1
];
++
k
)
{
tmp
=
std
::
min
(
tmp
,
input
[(
i
*
data_reshape_
[
1
]
+
k
)
*
data_reshape_
[
2
]
+
j
]);
}
output
[
i
*
data_reshape_
[
2
]
+
j
]
=
tmp
;
}
}
}
else
if
(
type
==
ReduceType
::
MAX
)
{
#pragma omp parallel for collapse(2) schedule(runtime)
for
(
int
i
=
0
;
i
<
data_reshape_
[
0
];
++
i
)
{
for
(
int
j
=
0
;
j
<
data_reshape_
[
2
];
++
j
)
{
uint8_t
tmp
=
input
[
i
*
data_reshape_
[
1
]
*
data_reshape_
[
2
]
+
j
];
for
(
int
k
=
1
;
k
<
data_reshape_
[
1
];
++
k
)
{
tmp
=
std
::
max
(
tmp
,
input
[(
i
*
data_reshape_
[
1
]
+
k
)
*
data_reshape_
[
2
]
+
j
]);
}
output
[
i
*
data_reshape_
[
2
]
+
j
]
=
tmp
;
}
}
}
else
{
MACE_NOT_IMPLEMENTED
;
}
}
}
template
<
>
void
ReduceOp
<
DeviceType
::
CPU
,
uint8_t
>::
Reduce4Dims
(
const
uint8_t
*
input
,
ReduceType
type
,
uint8_t
*
output
)
{
if
(
reduce_first_axis_
)
{
if
(
type
==
ReduceType
::
MEAN
)
{
#pragma omp parallel for collapse(2) schedule(runtime)
for
(
int
i
=
0
;
i
<
data_reshape_
[
1
];
++
i
)
{
for
(
int
j
=
0
;
j
<
data_reshape_
[
3
];
++
j
)
{
uint32_t
tmp
=
0
;
for
(
int
k
=
0
;
k
<
data_reshape_
[
2
];
++
k
)
{
for
(
int
t
=
0
;
t
<
data_reshape_
[
0
];
++
t
)
{
tmp
+=
input
[((
t
*
data_reshape_
[
1
]
+
i
)
*
data_reshape_
[
2
]
+
k
)
*
data_reshape_
[
3
]
+
j
];
}
}
index_t
dim
=
data_reshape_
[
0
]
*
data_reshape_
[
2
];
output
[
i
*
data_reshape_
[
3
]
+
j
]
=
static_cast
<
uint8_t
>
((
tmp
+
dim
/
2
)
/
dim
);
}
}
}
else
if
(
type
==
ReduceType
::
MIN
)
{
#pragma omp parallel for collapse(2) schedule(runtime)
for
(
int
i
=
0
;
i
<
data_reshape_
[
1
];
++
i
)
{
for
(
int
j
=
0
;
j
<
data_reshape_
[
3
];
++
j
)
{
uint8_t
tmp
=
input
[
i
*
data_reshape_
[
2
]
*
data_reshape_
[
3
]
+
j
];
for
(
int
k
=
0
;
k
<
data_reshape_
[
2
];
++
k
)
{
for
(
int
t
=
0
;
t
<
data_reshape_
[
0
];
++
t
)
{
tmp
=
std
::
min
(
tmp
,
input
[((
t
*
data_reshape_
[
1
]
+
i
)
*
data_reshape_
[
2
]
+
k
)
*
data_reshape_
[
3
]
+
j
]);
}
}
output
[
i
*
data_reshape_
[
3
]
+
j
]
=
tmp
;
}
}
}
else
if
(
type
==
ReduceType
::
MAX
)
{
#pragma omp parallel for collapse(2) schedule(runtime)
for
(
int
i
=
0
;
i
<
data_reshape_
[
1
];
++
i
)
{
for
(
int
j
=
0
;
j
<
data_reshape_
[
3
];
++
j
)
{
uint8_t
tmp
=
input
[
i
*
data_reshape_
[
2
]
*
data_reshape_
[
3
]
+
j
];
for
(
int
k
=
0
;
k
<
data_reshape_
[
2
];
++
k
)
{
for
(
int
t
=
0
;
t
<
data_reshape_
[
0
];
++
t
)
{
tmp
=
std
::
max
(
tmp
,
input
[((
t
*
data_reshape_
[
1
]
+
i
)
*
data_reshape_
[
2
]
+
k
)
*
data_reshape_
[
3
]
+
j
]);
}
}
output
[
i
*
data_reshape_
[
3
]
+
j
]
=
tmp
;
}
}
}
else
{
MACE_NOT_IMPLEMENTED
;
}
}
else
{
if
(
type
==
ReduceType
::
MEAN
)
{
#pragma omp parallel for collapse(2) schedule(runtime)
for
(
int
i
=
0
;
i
<
data_reshape_
[
0
];
++
i
)
{
for
(
int
j
=
0
;
j
<
data_reshape_
[
2
];
++
j
)
{
uint32_t
tmp
=
0
;
for
(
int
k
=
0
;
k
<
data_reshape_
[
1
];
++
k
)
{
for
(
int
t
=
0
;
t
<
data_reshape_
[
3
];
++
t
)
{
tmp
+=
input
[((
i
*
data_reshape_
[
1
]
+
k
)
*
data_reshape_
[
2
]
+
j
)
*
data_reshape_
[
3
]
+
t
];
}
}
index_t
dim
=
data_reshape_
[
1
]
*
data_reshape_
[
3
];
output
[
i
*
data_reshape_
[
2
]
+
j
]
=
static_cast
<
uint8_t
>
((
tmp
+
dim
/
2
)
/
dim
);
}
}
}
else
if
(
type
==
ReduceType
::
MIN
)
{
#pragma omp parallel for collapse(2) schedule(runtime)
for
(
int
i
=
0
;
i
<
data_reshape_
[
0
];
++
i
)
{
for
(
int
j
=
0
;
j
<
data_reshape_
[
2
];
++
j
)
{
uint8_t
tmp
=
input
[(
i
*
data_reshape_
[
1
]
*
data_reshape_
[
2
]
+
j
)
*
data_reshape_
[
3
]];
for
(
int
k
=
0
;
k
<
data_reshape_
[
1
];
++
k
)
{
for
(
int
t
=
0
;
t
<
data_reshape_
[
3
];
++
t
)
{
tmp
=
std
::
min
(
tmp
,
input
[((
i
*
data_reshape_
[
1
]
+
k
)
*
data_reshape_
[
2
]
+
j
)
*
data_reshape_
[
3
]
+
t
]);
}
}
output
[
i
*
data_reshape_
[
2
]
+
j
]
=
tmp
;
}
}
}
else
if
(
type
==
ReduceType
::
MAX
)
{
#pragma omp parallel for collapse(2) schedule(runtime)
for
(
int
i
=
0
;
i
<
data_reshape_
[
0
];
++
i
)
{
for
(
int
j
=
0
;
j
<
data_reshape_
[
2
];
++
j
)
{
uint8_t
tmp
=
input
[(
i
*
data_reshape_
[
1
]
*
data_reshape_
[
2
]
+
j
)
*
data_reshape_
[
3
]];
for
(
int
k
=
0
;
k
<
data_reshape_
[
1
];
++
k
)
{
for
(
int
t
=
0
;
t
<
data_reshape_
[
3
];
++
t
)
{
tmp
=
std
::
max
(
tmp
,
input
[((
i
*
data_reshape_
[
1
]
+
k
)
*
data_reshape_
[
2
]
+
j
)
*
data_reshape_
[
3
]
+
t
]);
}
}
output
[
i
*
data_reshape_
[
2
]
+
j
]
=
tmp
;
}
}
}
else
{
MACE_NOT_IMPLEMENTED
;
}
}
}
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
template
<
typename
T
>
class
ReduceOp
<
DeviceType
::
GPU
,
T
>
:
public
ReduceOpBase
{
...
...
@@ -562,7 +870,10 @@ class ReduceOp<DeviceType::GPU, T> : public ReduceOpBase {
void
RegisterReduce
(
OpRegistryBase
*
op_registry
)
{
MACE_REGISTER_OP
(
op_registry
,
"Reduce"
,
ReduceOp
,
DeviceType
::
CPU
,
float
);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP
(
op_registry
,
"Reduce"
,
ReduceOp
,
DeviceType
::
CPU
,
uint8_t
);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP
(
op_registry
,
"Reduce"
,
ReduceOp
,
DeviceType
::
GPU
,
float
);
...
...
mace/ops/reduce_test.cc
浏览文件 @
0caade30
...
...
@@ -644,6 +644,89 @@ TEST_F(ReduceOpTest, GPURandomHalf) {
RandomTest
<
DeviceType
::
GPU
,
half
>
({
1
,
511
,
561
,
11
},
{
1
,
2
});
}
namespace
{
void
TestQuant
(
const
std
::
vector
<
index_t
>
&
input_shape
,
const
std
::
vector
<
int
>
&
axis
)
{
auto
func
=
[
&
](
ReduceType
type
)
{
OpsTestNet
net
;
net
.
AddRandomInput
<
CPU
,
float
>
(
"Input"
,
input_shape
,
false
,
false
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
NHWC
,
"InputNCHW"
,
NCHW
);
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"OutputNCHW"
,
input_shape
,
false
,
true
,
true
);
OpDefBuilder
(
"Reduce"
,
"ReduceTest"
)
.
Input
(
"InputNCHW"
)
.
AddIntsArg
(
"axis"
,
axis
)
.
AddIntArg
(
"keepdims"
,
1
)
.
AddIntArg
(
"reduce_type"
,
type
)
.
AddIntArg
(
"data_format"
,
DataFormat
::
NHWC
)
.
Output
(
"OutputNCHW"
)
.
AddIntArg
(
"T"
,
DT_FLOAT
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
(
CPU
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"OutputNCHW"
,
NCHW
,
"Output"
,
NHWC
);
OpDefBuilder
(
"Quantize"
,
"QuantizeInput"
)
.
Input
(
"Input"
)
.
Output
(
"QuantizedInput"
)
.
OutputType
({
DT_UINT8
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
AddIntArg
(
"non_zero"
,
true
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
net
.
AddRandomInput
<
DeviceType
::
CPU
,
uint8_t
>
(
"QuantizedOutput"
,
input_shape
);
OpDefBuilder
(
"Reduce"
,
"ReduceTest"
)
.
Input
(
"QuantizedInput"
)
.
Output
(
"QuantizedOutput"
)
.
AddIntsArg
(
"axis"
,
axis
)
.
AddIntArg
(
"keepdims"
,
1
)
.
AddIntArg
(
"reduce_type"
,
type
)
.
AddIntArg
(
"data_format"
,
DataFormat
::
NHWC
)
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
OpDefBuilder
(
"Dequantize"
,
"DeQuantizeTest"
)
.
Input
(
"QuantizedOutput"
)
.
Output
(
"DequantizedOutput"
)
.
OutputType
({
DT_FLOAT
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
// Check
ExpectTensorSimilar
<
float
>
(
*
net
.
GetOutput
(
"Output"
),
*
net
.
GetTensor
(
"DequantizedOutput"
),
0.01
);
};
for
(
ReduceType
type
:
{
MEAN
,
MIN
,
MAX
})
{
func
(
type
);
}
}
}
// namespace
TEST_F
(
ReduceOpTest
,
Quant
)
{
// reduce 1, first axis
TestQuant
({
1
,
1
,
3
,
4
},
{
2
,
3
});
// reduce 2, first axis
TestQuant
({
1
,
4
,
4
,
320
},
{
1
,
2
});
// reduce 2, not first axis
TestQuant
({
16
,
320
,
4
,
4
},
{
2
,
3
});
// reduce 3, first axis
TestQuant
({
1
,
4
,
323
,
4
},
{
1
,
3
});
// reduce 3, not first axis
TestQuant
({
15
,
117
,
15
,
32
},
{
2
});
// reduce 4, first axis
TestQuant
({
4
,
323
,
4
,
4
},
{
0
,
2
});
// reduce 4, not first axis
TestQuant
({
32
,
4
,
323
,
16
},
{
1
,
3
});
}
}
// namespace test
}
// namespace ops
}
// namespace mace
mace/python/tools/converter_tool/hexagon_converter.py
浏览文件 @
0caade30
...
...
@@ -25,6 +25,7 @@ from mace.python.tools.converter_tool.base_converter import MaceKeyword
from
mace.python.tools.converter_tool.base_converter
import
MaceOp
from
mace.python.tools.converter_tool.base_converter
import
PaddingMode
from
mace.python.tools.converter_tool.base_converter
import
PoolingType
from
mace.python.tools.converter_tool.base_converter
import
ReduceType
from
mace.python.tools.convert_util
import
mace_check
from
mace.python.tools
import
graph_util
...
...
@@ -63,6 +64,7 @@ class HexagonOps(object):
MaceOp
.
Quantize
.
name
:
HexagonOp
.
QuantizeINPUT_f_to_8
.
name
,
MaceOp
.
Pooling
.
name
:
[
HexagonOp
.
QuantizedAvgPool_8
.
name
,
HexagonOp
.
QuantizedMaxPool_8
.
name
],
MaceOp
.
Reduce
.
name
:
HexagonOp
.
QuantizedAvgPool_8
.
name
,
MaceOp
.
ResizeBilinear
.
name
:
HexagonOp
.
QuantizedResizeBilinear_8
.
name
,
MaceOp
.
SpaceToBatchND
.
name
:
HexagonOp
.
SpaceToBatchND_8
.
name
,
...
...
@@ -222,6 +224,43 @@ class HexagonConverter(base_converter.ConverterInterface):
strides_tensor
.
dims
.
extend
(
[
1
,
strides_arg
.
ints
[
0
],
strides_arg
.
ints
[
1
],
1
])
op
.
input
.
extend
([
window_tensor
.
name
,
strides_tensor
.
name
])
elif
op
.
type
==
MaceOp
.
Reduce
.
name
:
self
.
add_min_max_const_node
(
op
,
op
.
input
[
0
])
reduce_type_arg
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_reduce_type_str
)
mace_check
(
reduce_type_arg
.
i
==
ReduceType
.
MEAN
.
value
,
"Hexagon Reduce only supports Mean now."
)
keep_dims_arg
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_keepdims_str
)
mace_check
(
keep_dims_arg
.
i
==
1
,
"Hexagon Reduce Mean only supports keep dims now."
)
axis_arg
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_axis_str
)
mace_check
(
1
<=
len
(
axis_arg
.
ints
)
<=
2
,
"Hexagon Reduce Mean only supports spatial now."
)
for
i
in
axis_arg
.
ints
:
mace_check
(
1
<=
i
<=
2
,
"Hexagon Reduce Mean only supports spatial now"
)
producer_op_name
,
_
=
get_op_and_port_from_tensor
(
op
.
input
[
0
])
input_dims
=
None
for
producer_op
in
self
.
_model
.
op
:
if
producer_op
.
name
==
producer_op_name
:
input_dims
=
producer_op
.
output_shape
[
0
].
dims
break
mace_check
(
input_dims
is
not
None
,
"Missing input shape."
)
window_tensor
=
self
.
_model
.
tensors
.
add
()
window_tensor
.
name
=
op
.
name
+
'/window:0'
window_tensor
.
data_type
=
mace_pb2
.
DT_INT32
if
len
(
axis_arg
.
ints
)
==
1
:
dim1
,
dim2
=
(
input_dims
[
1
],
1
)
\
if
axis_arg
.
ints
[
0
]
==
1
else
(
1
,
input_dims
[
2
])
else
:
dim1
,
dim2
=
input_dims
[
1
],
input_dims
[
2
]
window_tensor
.
dims
.
extend
([
1
,
dim1
,
dim2
,
1
])
strides_tensor
=
self
.
_model
.
tensors
.
add
()
strides_tensor
.
name
=
op
.
name
+
'/strides:0'
strides_tensor
.
data_type
=
mace_pb2
.
DT_INT32
strides_tensor
.
dims
.
extend
([
1
,
dim1
,
dim2
,
1
])
op
.
input
.
extend
([
window_tensor
.
name
,
strides_tensor
.
name
])
elif
op
.
type
==
MaceOp
.
ResizeBilinear
.
name
:
newdim_arg
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_resize_size_str
)
...
...
mace/python/tools/converter_tool/transformer.py
浏览文件 @
0caade30
...
...
@@ -113,7 +113,6 @@ class Transformer(base_converter.ConverterInterface):
self
.
_consts
=
{}
self
.
_consumers
=
{}
self
.
_producer
=
{}
self
.
_target_data_format
=
DataFormat
.
NHWC
self
.
_quantize_activation_info
=
{}
self
.
_quantized_tensor
=
set
()
...
...
@@ -996,8 +995,7 @@ class Transformer(base_converter.ConverterInterface):
if
arg
.
name
==
MaceKeyword
.
mace_paddings_str
:
mace_check
(
len
(
arg
.
ints
)
==
8
,
"pad dim rank should be 8."
)
if
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NCHW
\
and
self
.
_target_data_format
==
DataFormat
.
NHWC
:
# noqa
if
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NCHW
:
print
(
"Transpose pad args: %s(%s)"
%
(
op
.
name
,
op
.
type
))
self
.
transpose_shape
(
arg
.
ints
,
...
...
@@ -1006,7 +1004,6 @@ class Transformer(base_converter.ConverterInterface):
for
arg
in
op
.
arg
:
if
arg
.
name
==
MaceKeyword
.
mace_axis_str
:
if
(
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NCHW
and
self
.
_target_data_format
==
DataFormat
.
NHWC
and
len
(
op
.
output_shape
[
0
].
dims
)
==
4
):
print
(
"Transpose concat/split args: %s(%s)"
%
(
op
.
name
,
op
.
type
))
...
...
@@ -1023,8 +1020,7 @@ class Transformer(base_converter.ConverterInterface):
len
(
input_shape
)
==
2
:
axis_arg
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_axis_str
)
if
axis_arg
.
i
==
1
\
and
self
.
_target_data_format
==
DataFormat
.
NHWC
:
# noqa
if
axis_arg
.
i
==
1
:
axis_arg
.
i
=
3
elif
op
.
type
==
MaceOp
.
Squeeze
.
name
:
...
...
@@ -1041,8 +1037,7 @@ class Transformer(base_converter.ConverterInterface):
for
arg
in
op
.
arg
:
if
arg
.
name
==
MaceKeyword
.
mace_axis_str
:
if
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NCHW
\
and
self
.
_target_data_format
==
DataFormat
.
NHWC
:
# noqa
op
)
==
DataFormat
.
NCHW
:
print
(
"Transpose reduce args: %s(%s)"
%
(
op
.
name
,
op
.
type
))
reduce_axises
=
list
(
arg
.
ints
)
...
...
@@ -1062,15 +1057,12 @@ class Transformer(base_converter.ConverterInterface):
# transpose op output shape
data_format
=
ConverterUtil
.
data_format
(
op
)
if
data_format
is
not
None
\
and
data_format
!=
self
.
_target_data_format
:
and
data_format
!=
DataFormat
.
NHWC
:
print
(
"Transpose output shapes: %s(%s)"
%
(
op
.
name
,
op
.
type
))
for
output_shape
in
op
.
output_shape
:
if
len
(
output_shape
.
dims
)
==
4
:
self
.
transpose_shape
(
output_shape
.
dims
,
[
0
,
2
,
3
,
1
])
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_data_format_str
).
i
=
\
self
.
_target_data_format
.
value
return
False
...
...
@@ -1683,6 +1675,7 @@ class Transformer(base_converter.ConverterInterface):
print
(
"Add default quantize info for ops like Pooling, Softmax"
)
for
op
in
self
.
_model
.
op
:
if
op
.
type
in
[
MaceOp
.
Pooling
.
name
,
MaceOp
.
Reduce
.
name
,
MaceOp
.
Squeeze
.
name
,
MaceOp
.
Reshape
.
name
,
MaceOp
.
ResizeBilinear
.
name
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录