Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
毕竟曾有刹那
Mace
提交
5a2ce097
Mace
项目概览
毕竟曾有刹那
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
5a2ce097
编写于
11月 01, 2018
作者:
李
李寅
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Support quantized matmul of output int32
上级
8d3e9277
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
269 addition
and
105 deletion
+269
-105
mace/kernels/matmul.cc
mace/kernels/matmul.cc
+125
-60
mace/kernels/quantize.cc
mace/kernels/quantize.cc
+6
-4
mace/ops/matmul_test.cc
mace/ops/matmul_test.cc
+119
-23
mace/python/tools/converter_tool/base_converter.py
mace/python/tools/converter_tool/base_converter.py
+1
-0
mace/python/tools/converter_tool/transformer.py
mace/python/tools/converter_tool/transformer.py
+18
-18
未找到文件。
mace/kernels/matmul.cc
浏览文件 @
5a2ce097
...
@@ -68,7 +68,10 @@ class MatMulOpBase : public Operation {
...
@@ -68,7 +68,10 @@ class MatMulOpBase : public Operation {
};
};
template
<
DeviceType
D
,
class
T
>
template
<
DeviceType
D
,
class
T
>
class
MatMulOp
:
public
MatMulOpBase
{
class
MatMulOp
;
template
<
>
class
MatMulOp
<
CPU
,
float
>
:
public
MatMulOpBase
{
public:
public:
explicit
MatMulOp
(
OpConstructContext
*
context
)
explicit
MatMulOp
(
OpConstructContext
*
context
)
:
MatMulOpBase
(
context
)
{}
:
MatMulOpBase
(
context
)
{}
...
@@ -107,9 +110,9 @@ class MatMulOp : public MatMulOpBase {
...
@@ -107,9 +110,9 @@ class MatMulOp : public MatMulOpBase {
Tensor
::
MappingGuard
guarda
(
A
);
Tensor
::
MappingGuard
guarda
(
A
);
Tensor
::
MappingGuard
guardb
(
B
);
Tensor
::
MappingGuard
guardb
(
B
);
Tensor
::
MappingGuard
guardc
(
C
);
Tensor
::
MappingGuard
guardc
(
C
);
const
T
*
a_ptr_base
=
A
->
data
<
T
>
();
const
float
*
a_ptr_base
=
A
->
data
<
float
>
();
const
T
*
b_ptr_base
=
B
->
data
<
T
>
();
const
float
*
b_ptr_base
=
B
->
data
<
float
>
();
T
*
c_ptr_base
=
C
->
mutable_data
<
T
>
();
float
*
c_ptr_base
=
C
->
mutable_data
<
float
>
();
const
index_t
height_a
=
A
->
dim
(
rank
-
2
);
const
index_t
height_a
=
A
->
dim
(
rank
-
2
);
const
index_t
width_a
=
A
->
dim
(
rank
-
1
);
const
index_t
width_a
=
A
->
dim
(
rank
-
1
);
...
@@ -147,6 +150,100 @@ class MatMulOp : public MatMulOpBase {
...
@@ -147,6 +150,100 @@ class MatMulOp : public MatMulOpBase {
SGemm
sgemm_
;
SGemm
sgemm_
;
};
};
template
<
gemmlowp
::
MapOrder
AOrder
,
gemmlowp
::
MapOrder
BOrder
,
typename
OutputType
>
class
MatMulFixpointImpl
;
template
<
gemmlowp
::
MapOrder
AOrder
,
gemmlowp
::
MapOrder
BOrder
>
class
MatMulFixpointImpl
<
AOrder
,
BOrder
,
uint8_t
>
{
public:
void
operator
()(
OpContext
*
context
,
const
Tensor
*
A
,
const
Tensor
*
B
,
const
index_t
height
,
const
index_t
K
,
const
index_t
width
,
Tensor
*
C
)
{
auto
gemm_context
=
context
->
device
()
->
cpu_runtime
()
->
GetGemmlowpContext
();
MACE_CHECK_NOTNULL
(
gemm_context
);
Tensor
::
MappingGuard
guarda
(
A
);
Tensor
::
MappingGuard
guardb
(
B
);
Tensor
::
MappingGuard
guardc
(
C
);
auto
a_ptr_base
=
A
->
data
<
uint8_t
>
();
auto
b_ptr_base
=
B
->
data
<
uint8_t
>
();
auto
c_ptr_base
=
C
->
mutable_data
<
uint8_t
>
();
index_t
batch
=
std
::
accumulate
(
A
->
shape
().
begin
(),
A
->
shape
().
end
()
-
2
,
1
,
std
::
multiplies
<
index_t
>
());
index_t
a_size
=
height
*
K
;
index_t
b_size
=
K
*
width
;
index_t
c_size
=
height
*
width
;
const
auto
&
output_pipeline
=
GemmlowpOutputPipeline
::
MakeNoBias
(
A
->
scale
(),
B
->
scale
(),
C
->
scale
(),
C
->
zero_point
());
for
(
index_t
i
=
0
;
i
<
batch
;
++
i
)
{
gemmlowp
::
MatrixMap
<
const
uint8_t
,
AOrder
>
a_matrix
(
a_ptr_base
+
i
*
a_size
,
height
,
K
);
gemmlowp
::
MatrixMap
<
const
uint8_t
,
BOrder
>
b_matrix
(
b_ptr_base
+
i
*
b_size
,
K
,
width
);
gemmlowp
::
MatrixMap
<
uint8_t
,
gemmlowp
::
MapOrder
::
RowMajor
>
c_matrix
(
c_ptr_base
+
i
*
c_size
,
height
,
width
);
using
BitDepthParams
=
gemmlowp
::
L8R8WithLhsNonzeroBitDepthParams
;
gemmlowp
::
GemmWithOutputPipeline
<
uint8_t
,
uint8_t
,
BitDepthParams
>
(
gemm_context
,
a_matrix
,
b_matrix
,
&
c_matrix
,
-
A
->
zero_point
(),
-
B
->
zero_point
(),
output_pipeline
);
}
}
};
template
<
gemmlowp
::
MapOrder
AOrder
,
gemmlowp
::
MapOrder
BOrder
>
class
MatMulFixpointImpl
<
AOrder
,
BOrder
,
int32_t
>
{
public:
void
operator
()(
OpContext
*
context
,
const
Tensor
*
A
,
const
Tensor
*
B
,
const
index_t
height
,
const
index_t
K
,
const
index_t
width
,
Tensor
*
C
)
{
auto
gemm_context
=
context
->
device
()
->
cpu_runtime
()
->
GetGemmlowpContext
();
MACE_CHECK_NOTNULL
(
gemm_context
);
Tensor
::
MappingGuard
guarda
(
A
);
Tensor
::
MappingGuard
guardb
(
B
);
Tensor
::
MappingGuard
guardc
(
C
);
auto
a_ptr_base
=
A
->
data
<
uint8_t
>
();
auto
b_ptr_base
=
B
->
data
<
uint8_t
>
();
auto
c_ptr_base
=
C
->
mutable_data
<
int32_t
>
();
index_t
batch
=
std
::
accumulate
(
A
->
shape
().
begin
(),
A
->
shape
().
end
()
-
2
,
1
,
std
::
multiplies
<
index_t
>
());
index_t
a_size
=
height
*
K
;
index_t
b_size
=
K
*
width
;
index_t
c_size
=
height
*
width
;
const
auto
output_pipeline
=
std
::
make_tuple
();
for
(
index_t
i
=
0
;
i
<
batch
;
++
i
)
{
gemmlowp
::
MatrixMap
<
const
uint8_t
,
AOrder
>
a_matrix
(
a_ptr_base
+
i
*
a_size
,
height
,
K
);
gemmlowp
::
MatrixMap
<
const
uint8_t
,
BOrder
>
b_matrix
(
b_ptr_base
+
i
*
b_size
,
K
,
width
);
gemmlowp
::
MatrixMap
<
int32_t
,
gemmlowp
::
MapOrder
::
RowMajor
>
c_matrix
(
c_ptr_base
+
i
*
c_size
,
height
,
width
);
using
BitDepthParams
=
gemmlowp
::
L8R8WithLhsNonzeroBitDepthParams
;
gemmlowp
::
GemmWithOutputPipeline
<
uint8_t
,
int32_t
,
BitDepthParams
>
(
gemm_context
,
a_matrix
,
b_matrix
,
&
c_matrix
,
-
A
->
zero_point
(),
-
B
->
zero_point
(),
output_pipeline
);
}
C
->
SetScale
(
A
->
scale
()
*
B
->
scale
());
C
->
SetZeroPoint
(
0
);
}
};
template
<
>
template
<
>
class
MatMulOp
<
DeviceType
::
CPU
,
uint8_t
>:
public
MatMulOpBase
{
class
MatMulOp
<
DeviceType
::
CPU
,
uint8_t
>:
public
MatMulOpBase
{
public:
public:
...
@@ -182,69 +279,37 @@ class MatMulOp<DeviceType::CPU, uint8_t>: public MatMulOpBase {
...
@@ -182,69 +279,37 @@ class MatMulOp<DeviceType::CPU, uint8_t>: public MatMulOpBase {
constexpr
gemmlowp
::
MapOrder
kRowMajor
=
gemmlowp
::
MapOrder
::
RowMajor
;
constexpr
gemmlowp
::
MapOrder
kRowMajor
=
gemmlowp
::
MapOrder
::
RowMajor
;
constexpr
gemmlowp
::
MapOrder
kColMajor
=
gemmlowp
::
MapOrder
::
ColMajor
;
constexpr
gemmlowp
::
MapOrder
kColMajor
=
gemmlowp
::
MapOrder
::
ColMajor
;
#define MATMUL_IMPL(AOrder, BOrder) \
#define MATMUL_FIXPOINT_IMPL(AOrder, BOrder, OutType) \
MatMulImpl<AOrder, BOrder>(context, A, B, height, K, width, C);
MatMulFixpointImpl<AOrder, BOrder, OutType>()( \
context, A, B, height, K, width, C);
#define MATMUL_FIXPOINT_IMPL_TRANSPOSE_OR_NOT(OutType) \
if (transpose_a_) { \
if (transpose_b_) { \
MATMUL_FIXPOINT_IMPL(kColMajor, kColMajor, OutType); \
} else { \
MATMUL_FIXPOINT_IMPL(kColMajor, kRowMajor, OutType); \
} \
} else { \
if (transpose_b_) { \
MATMUL_FIXPOINT_IMPL(kRowMajor, kColMajor, OutType); \
} else { \
MATMUL_FIXPOINT_IMPL(kRowMajor, kRowMajor, OutType); \
} \
}
if
(
transpose_a_
)
{
if
(
!
operator_def_
->
output_type
().
empty
()
if
(
transpose_b_
)
{
&&
operator_def_
->
output_type
()[
0
]
==
DT_INT32
)
{
MATMUL_IMPL
(
kColMajor
,
kColMajor
);
MATMUL_FIXPOINT_IMPL_TRANSPOSE_OR_NOT
(
int32_t
);
}
else
{
MATMUL_IMPL
(
kColMajor
,
kRowMajor
);
}
}
else
{
}
else
{
if
(
transpose_b_
)
{
MATMUL_FIXPOINT_IMPL_TRANSPOSE_OR_NOT
(
uint8_t
);
MATMUL_IMPL
(
kRowMajor
,
kColMajor
);
}
else
{
MATMUL_IMPL
(
kRowMajor
,
kRowMajor
);
}
}
}
#undef MATMUL_IMPL
#undef MATMUL_FIXPOINT_IMPL_TRANSPOSE_OR_NOT
#undef MATMUL_FIXPOINT_IMPL
return
MaceStatus
::
MACE_SUCCESS
;
return
MaceStatus
::
MACE_SUCCESS
;
}
}
private:
template
<
gemmlowp
::
MapOrder
AOrder
,
gemmlowp
::
MapOrder
BOrder
>
void
MatMulImpl
(
OpContext
*
context
,
const
Tensor
*
A
,
const
Tensor
*
B
,
const
index_t
height
,
const
index_t
K
,
const
index_t
width
,
Tensor
*
C
)
{
auto
gemm_context
=
context
->
device
()
->
cpu_runtime
()
->
GetGemmlowpContext
();
MACE_CHECK_NOTNULL
(
gemm_context
);
Tensor
::
MappingGuard
guarda
(
A
);
Tensor
::
MappingGuard
guardb
(
B
);
Tensor
::
MappingGuard
guardc
(
C
);
auto
a_ptr_base
=
A
->
data
<
uint8_t
>
();
auto
b_ptr_base
=
B
->
data
<
uint8_t
>
();
auto
c_ptr_base
=
C
->
mutable_data
<
uint8_t
>
();
index_t
batch
=
std
::
accumulate
(
A
->
shape
().
begin
(),
A
->
shape
().
end
()
-
2
,
1
,
std
::
multiplies
<
index_t
>
());
index_t
a_size
=
height
*
K
;
index_t
b_size
=
K
*
width
;
index_t
c_size
=
height
*
width
;
const
auto
&
output_pipeline
=
GemmlowpOutputPipeline
::
MakeNoBias
(
A
->
scale
(),
B
->
scale
(),
C
->
scale
(),
C
->
zero_point
());
for
(
index_t
i
=
0
;
i
<
batch
;
++
i
)
{
gemmlowp
::
MatrixMap
<
const
uint8_t
,
AOrder
>
a_matrix
(
a_ptr_base
+
i
*
a_size
,
height
,
K
);
gemmlowp
::
MatrixMap
<
const
uint8_t
,
BOrder
>
b_matrix
(
b_ptr_base
+
i
*
b_size
,
K
,
width
);
gemmlowp
::
MatrixMap
<
uint8_t
,
gemmlowp
::
MapOrder
::
RowMajor
>
c_matrix
(
c_ptr_base
+
i
*
c_size
,
height
,
width
);
using
BitDepthParams
=
gemmlowp
::
L8R8WithLhsNonzeroBitDepthParams
;
gemmlowp
::
GemmWithOutputPipeline
<
uint8_t
,
uint8_t
,
BitDepthParams
>
(
gemm_context
,
a_matrix
,
b_matrix
,
&
c_matrix
,
-
A
->
zero_point
(),
-
B
->
zero_point
(),
output_pipeline
);
}
}
};
};
#ifdef MACE_ENABLE_OPENCL
#ifdef MACE_ENABLE_OPENCL
...
...
mace/kernels/quantize.cc
浏览文件 @
5a2ce097
...
@@ -72,8 +72,8 @@ class QuantizeOp<DeviceType::CPU, uint8_t> : public Operation {
...
@@ -72,8 +72,8 @@ class QuantizeOp<DeviceType::CPU, uint8_t> : public Operation {
template
<
DeviceType
D
,
class
T
>
template
<
DeviceType
D
,
class
T
>
class
DequantizeOp
;
class
DequantizeOp
;
template
<
>
template
<
typename
T
>
class
DequantizeOp
<
DeviceType
::
CPU
,
uint8_t
>
:
public
Operation
{
class
DequantizeOp
<
DeviceType
::
CPU
,
T
>
:
public
Operation
{
public:
public:
explicit
DequantizeOp
(
OpConstructContext
*
context
)
explicit
DequantizeOp
(
OpConstructContext
*
context
)
:
Operation
(
context
)
{}
:
Operation
(
context
)
{}
...
@@ -85,9 +85,9 @@ class DequantizeOp<DeviceType::CPU, uint8_t> : public Operation {
...
@@ -85,9 +85,9 @@ class DequantizeOp<DeviceType::CPU, uint8_t> : public Operation {
MACE_RETURN_IF_ERROR
(
output
->
ResizeLike
(
input
));
MACE_RETURN_IF_ERROR
(
output
->
ResizeLike
(
input
));
Tensor
::
MappingGuard
input_guard
(
input
);
Tensor
::
MappingGuard
input_guard
(
input
);
Tensor
::
MappingGuard
output_guard
(
output
);
Tensor
::
MappingGuard
output_guard
(
output
);
const
uint8_t
*
input_data
=
input
->
data
<
uint8_t
>
();
const
T
*
input_data
=
input
->
data
<
T
>
();
float
*
output_data
=
output
->
mutable_data
<
float
>
();
float
*
output_data
=
output
->
mutable_data
<
float
>
();
Dequantize
(
input_data
,
Dequantize
<
T
>
(
input_data
,
input
->
size
(),
input
->
size
(),
input
->
scale
(),
input
->
scale
(),
input
->
zero_point
(),
input
->
zero_point
(),
...
@@ -104,6 +104,8 @@ void RegisterQuantize(OpRegistryBase *op_registry) {
...
@@ -104,6 +104,8 @@ void RegisterQuantize(OpRegistryBase *op_registry) {
void
RegisterDequantize
(
OpRegistryBase
*
op_registry
)
{
void
RegisterDequantize
(
OpRegistryBase
*
op_registry
)
{
MACE_REGISTER_OP
(
op_registry
,
"Dequantize"
,
DequantizeOp
,
MACE_REGISTER_OP
(
op_registry
,
"Dequantize"
,
DequantizeOp
,
DeviceType
::
CPU
,
uint8_t
);
DeviceType
::
CPU
,
uint8_t
);
MACE_REGISTER_OP
(
op_registry
,
"Dequantize"
,
DequantizeOp
,
DeviceType
::
CPU
,
int32_t
);
}
}
}
// namespace kernels
}
// namespace kernels
}
// namespace mace
}
// namespace mace
mace/ops/matmul_test.cc
浏览文件 @
5a2ce097
...
@@ -214,12 +214,12 @@ TEST_F(MatMulOpTest, OPENCLHalfUnAlignedWithBatch) {
...
@@ -214,12 +214,12 @@ TEST_F(MatMulOpTest, OPENCLHalfUnAlignedWithBatch) {
}
}
namespace
{
namespace
{
void
Quant
(
const
std
::
vector
<
index_t
>
&
batch
,
void
Quant
OutputUint8
(
const
std
::
vector
<
index_t
>
&
batch
,
const
index_t
height
,
const
index_t
height
,
const
index_t
channels
,
const
index_t
channels
,
const
index_t
out_width
,
const
index_t
out_width
,
const
bool
transpose_a
,
const
bool
transpose_a
,
const
bool
transpose_b
)
{
const
bool
transpose_b
)
{
// Construct graph
// Construct graph
OpsTestNet
net
;
OpsTestNet
net
;
...
@@ -281,6 +281,7 @@ void Quant(const std::vector<index_t> &batch,
...
@@ -281,6 +281,7 @@ void Quant(const std::vector<index_t> &batch,
.
AddIntArg
(
"transpose_b"
,
transpose_b
?
1
:
0
)
.
AddIntArg
(
"transpose_b"
,
transpose_b
?
1
:
0
)
.
Output
(
"QuantizedOutput"
)
.
Output
(
"QuantizedOutput"
)
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
OutputType
({
DT_UINT8
})
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
net
.
Setup
(
DeviceType
::
CPU
);
net
.
Setup
(
DeviceType
::
CPU
);
Tensor
*
eq_output
=
net
.
GetTensor
(
"ExpectedQuantizedOutput"
);
Tensor
*
eq_output
=
net
.
GetTensor
(
"ExpectedQuantizedOutput"
);
...
@@ -301,26 +302,121 @@ void Quant(const std::vector<index_t> &batch,
...
@@ -301,26 +302,121 @@ void Quant(const std::vector<index_t> &batch,
ExpectTensorSimilar
<
float
>
(
*
net
.
GetOutput
(
"Output"
),
ExpectTensorSimilar
<
float
>
(
*
net
.
GetOutput
(
"Output"
),
*
net
.
GetTensor
(
"DequantizedOutput"
),
0.01
);
*
net
.
GetTensor
(
"DequantizedOutput"
),
0.01
);
}
}
void
QuantOutputInt32
(
const
std
::
vector
<
index_t
>
&
batch
,
const
index_t
height
,
const
index_t
channels
,
const
index_t
out_width
,
const
bool
transpose_a
,
const
bool
transpose_b
)
{
// Construct graph
OpsTestNet
net
;
// Add input data
index_t
batch_count
=
std
::
accumulate
(
batch
.
begin
(),
batch
.
end
(),
1
,
std
::
multiplies
<
index_t
>
());
if
(
transpose_a
)
{
net
.
AddRandomInput
<
CPU
,
float
>
(
"A"
,
{
batch_count
,
channels
,
height
});
}
else
{
net
.
AddRandomInput
<
CPU
,
float
>
(
"A"
,
{
batch_count
,
height
,
channels
});
}
if
(
transpose_b
)
{
net
.
AddRandomInput
<
CPU
,
float
>
(
"B"
,
{
batch_count
,
out_width
,
channels
});
}
else
{
net
.
AddRandomInput
<
CPU
,
float
>
(
"B"
,
{
batch_count
,
channels
,
out_width
});
}
OpDefBuilder
(
"MatMul"
,
"MatMulTest"
)
.
Input
(
"A"
)
.
AddIntArg
(
"transpose_a"
,
transpose_a
?
1
:
0
)
.
Input
(
"B"
)
.
AddIntArg
(
"transpose_b"
,
transpose_b
?
1
:
0
)
.
Output
(
"Output"
)
.
AddIntArg
(
"T"
,
DT_FLOAT
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
(
CPU
);
OpDefBuilder
(
"Quantize"
,
"QuantizeA"
)
.
Input
(
"A"
)
.
Output
(
"QuantizedA"
)
.
OutputType
({
DT_UINT8
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
AddIntArg
(
"non_zero"
,
true
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
OpDefBuilder
(
"Quantize"
,
"QuantizeB"
)
.
Input
(
"B"
)
.
Output
(
"QuantizedB"
)
.
OutputType
({
DT_UINT8
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
AddIntArg
(
"non_zero"
,
true
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
OpDefBuilder
(
"MatMul"
,
"QuantizeMatMulTest"
)
.
Input
(
"QuantizedA"
)
.
AddIntArg
(
"transpose_a"
,
transpose_a
?
1
:
0
)
.
Input
(
"QuantizedB"
)
.
AddIntArg
(
"transpose_b"
,
transpose_b
?
1
:
0
)
.
Output
(
"QuantizedOutput"
)
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
OutputType
({
DT_INT32
})
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
OpDefBuilder
(
"Dequantize"
,
"DeQuantizeTest"
)
.
Input
(
"QuantizedOutput"
)
.
Output
(
"DequantizedOutput"
)
.
OutputType
({
DT_FLOAT
})
.
AddIntArg
(
"T"
,
DT_INT32
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
// Check
ExpectTensorSimilar
<
float
>
(
*
net
.
GetOutput
(
"Output"
),
*
net
.
GetTensor
(
"DequantizedOutput"
),
0.01
);
}
}
// namespace
}
// namespace
TEST_F
(
MatMulOpTest
,
Quant
)
{
TEST_F
(
MatMulOpTest
,
QuantOutputUint8
)
{
Quant
({
1
},
64
,
128
,
32
,
false
,
false
);
QuantOutputUint8
({
1
},
64
,
128
,
32
,
false
,
false
);
Quant
({
1
},
64
,
32
,
128
,
false
,
false
);
QuantOutputUint8
({
1
},
64
,
32
,
128
,
false
,
false
);
Quant
({
2
,
3
},
64
,
32
,
128
,
false
,
false
);
QuantOutputUint8
({
2
,
3
},
64
,
32
,
128
,
false
,
false
);
Quant
({
1
},
64
,
128
,
32
,
false
,
true
);
QuantOutputUint8
({
1
},
64
,
128
,
32
,
false
,
true
);
Quant
({
1
},
64
,
32
,
128
,
false
,
true
);
QuantOutputUint8
({
1
},
64
,
32
,
128
,
false
,
true
);
Quant
({
2
,
3
},
64
,
32
,
128
,
false
,
true
);
QuantOutputUint8
({
2
,
3
},
64
,
32
,
128
,
false
,
true
);
Quant
({
1
},
64
,
128
,
32
,
true
,
false
);
QuantOutputUint8
({
1
},
64
,
128
,
32
,
true
,
false
);
Quant
({
1
},
64
,
32
,
128
,
true
,
false
);
QuantOutputUint8
({
1
},
64
,
32
,
128
,
true
,
false
);
Quant
({
2
,
3
},
64
,
32
,
128
,
true
,
false
);
QuantOutputUint8
({
2
,
3
},
64
,
32
,
128
,
true
,
false
);
Quant
({
1
},
64
,
128
,
32
,
true
,
true
);
QuantOutputUint8
({
1
},
64
,
128
,
32
,
true
,
true
);
Quant
({
1
},
64
,
32
,
128
,
true
,
true
);
QuantOutputUint8
({
1
},
64
,
32
,
128
,
true
,
true
);
Quant
({
2
,
3
},
64
,
32
,
128
,
true
,
true
);
QuantOutputUint8
({
2
,
3
},
64
,
32
,
128
,
true
,
true
);
// UnAligned
QuantOutputUint8
({
2
},
3
,
3
,
3
,
false
,
false
);
QuantOutputUint8
({
16
},
31
,
61
,
67
,
false
,
true
);
QuantOutputUint8
({
31
},
31
,
61
,
67
,
true
,
false
);
QuantOutputUint8
({
2
,
3
},
31
,
61
,
67
,
true
,
true
);
}
TEST_F
(
MatMulOpTest
,
QuantOutputInt32
)
{
QuantOutputInt32
({
1
},
64
,
128
,
32
,
false
,
false
);
QuantOutputInt32
({
1
},
64
,
32
,
128
,
false
,
false
);
QuantOutputInt32
({
2
,
3
},
64
,
32
,
128
,
false
,
false
);
QuantOutputInt32
({
1
},
64
,
128
,
32
,
false
,
true
);
QuantOutputInt32
({
1
},
64
,
32
,
128
,
false
,
true
);
QuantOutputInt32
({
2
,
3
},
64
,
32
,
128
,
false
,
true
);
QuantOutputInt32
({
1
},
64
,
128
,
32
,
true
,
false
);
QuantOutputInt32
({
1
},
64
,
32
,
128
,
true
,
false
);
QuantOutputInt32
({
2
,
3
},
64
,
32
,
128
,
true
,
false
);
QuantOutputInt32
({
1
},
64
,
128
,
32
,
true
,
true
);
QuantOutputInt32
({
1
},
64
,
32
,
128
,
true
,
true
);
QuantOutputInt32
({
2
,
3
},
64
,
32
,
128
,
true
,
true
);
// UnAligned
// UnAligned
Quant
({
2
},
3
,
3
,
3
,
false
,
false
);
Quant
OutputInt32
({
2
},
3
,
3
,
3
,
false
,
false
);
Quant
({
16
},
31
,
61
,
67
,
false
,
true
);
Quant
OutputInt32
({
16
},
31
,
61
,
67
,
false
,
true
);
Quant
({
31
},
31
,
61
,
67
,
true
,
false
);
Quant
OutputInt32
({
31
},
31
,
61
,
67
,
true
,
false
);
Quant
({
2
,
3
},
31
,
61
,
67
,
true
,
true
);
Quant
OutputInt32
({
2
,
3
},
31
,
61
,
67
,
true
,
true
);
}
}
// TODO(liyin): test transpose after implementing gpu runtime
// TODO(liyin): test transpose after implementing gpu runtime
...
...
mace/python/tools/converter_tool/base_converter.py
浏览文件 @
5a2ce097
...
@@ -219,6 +219,7 @@ class TransformerRule(Enum):
...
@@ -219,6 +219,7 @@ class TransformerRule(Enum):
ADD_OPENCL_INFORMATIONS
=
31
ADD_OPENCL_INFORMATIONS
=
31
FOLD_DECONV_AND_BN
=
32
FOLD_DECONV_AND_BN
=
32
FOLD_SQRDIFF_MEAN
=
33
FOLD_SQRDIFF_MEAN
=
33
TRANSPOSE_MATMUL_WEIGHT
=
34
class
ConverterInterface
(
object
):
class
ConverterInterface
(
object
):
...
...
mace/python/tools/converter_tool/transformer.py
浏览文件 @
5a2ce097
...
@@ -80,6 +80,8 @@ class Transformer(base_converter.ConverterInterface):
...
@@ -80,6 +80,8 @@ class Transformer(base_converter.ConverterInterface):
TransformerRule
.
FOLD_ACTIVATION
:
self
.
fold_activation
,
TransformerRule
.
FOLD_ACTIVATION
:
self
.
fold_activation
,
TransformerRule
.
FOLD_SQRDIFF_MEAN
:
self
.
fold_squared_diff_mean
,
TransformerRule
.
FOLD_SQRDIFF_MEAN
:
self
.
fold_squared_diff_mean
,
TransformerRule
.
TRANSPOSE_FILTERS
:
self
.
transpose_filters
,
TransformerRule
.
TRANSPOSE_FILTERS
:
self
.
transpose_filters
,
TransformerRule
.
TRANSPOSE_MATMUL_WEIGHT
:
self
.
transpose_matmul_weight
,
TransformerRule
.
TRANSPOSE_DATA_FORMAT
:
self
.
transpose_data_format
,
TransformerRule
.
TRANSPOSE_DATA_FORMAT
:
self
.
transpose_data_format
,
TransformerRule
.
ADD_IN_OUT_TENSOR_INFO
:
TransformerRule
.
ADD_IN_OUT_TENSOR_INFO
:
self
.
add_in_out_tensor_info
,
self
.
add_in_out_tensor_info
,
...
@@ -1258,24 +1260,24 @@ class Transformer(base_converter.ConverterInterface):
...
@@ -1258,24 +1260,24 @@ class Transformer(base_converter.ConverterInterface):
if
self
.
_option
.
device
!=
DeviceType
.
CPU
.
value
:
if
self
.
_option
.
device
!=
DeviceType
.
CPU
.
value
:
return
False
return
False
net
=
self
.
_model
net
=
self
.
_model
transpose_arg_names
=
[
MaceKeyword
.
mace_transpose_a_str
,
MaceKeyword
.
mace_transpose_b_str
]
for
op
in
net
.
op
:
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
MatMul
.
name
:
# noqa
if
op
.
type
==
MaceOp
.
MatMul
.
name
:
# noqa
for
i
in
range
(
len
(
op
.
input
)):
rhs
=
op
.
input
[
1
]
input
=
op
.
input
[
i
]
if
rhs
in
self
.
_consts
and
len
(
self
.
_consts
[
rhs
].
dims
)
==
2
:
if
input
in
self
.
_consts
\
arg
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_transpose_b_str
)
# noqa
and
len
(
self
.
_consts
[
input
].
dims
)
==
2
:
six
.
print_
(
'transpose matmul weight'
)
arg
=
ConverterUtil
.
get_arg
(
op
,
transpose_arg_names
[
i
])
if
arg
is
None
:
if
arg
is
not
None
and
arg
.
i
==
1
:
arg
=
op
.
arg
.
add
()
six
.
print_
(
'convert matmul'
)
arg
.
name
=
MaceKeyword
.
mace_transpose_b_str
filter
=
self
.
_consts
[
input
]
arg
.
i
=
0
filter_data
=
np
.
array
(
filter
.
float_data
).
reshape
(
if
arg
.
i
==
0
:
filter
.
dims
)
filter
=
self
.
_consts
[
rhs
]
filter_data
=
filter_data
.
transpose
(
1
,
0
)
filter_data
=
np
.
array
(
filter
.
float_data
).
reshape
(
filter
.
float_data
[:]
=
filter_data
.
flat
filter
.
dims
)
filter
.
dims
[:]
=
filter_data
.
shape
filter_data
=
filter_data
.
transpose
(
1
,
0
)
arg
.
i
=
0
filter
.
float_data
[:]
=
filter_data
.
flat
filter
.
dims
[:]
=
filter_data
.
shape
arg
.
i
=
1
def
transpose_filters
(
self
):
def
transpose_filters
(
self
):
net
=
self
.
_model
net
=
self
.
_model
...
@@ -1373,8 +1375,6 @@ class Transformer(base_converter.ConverterInterface):
...
@@ -1373,8 +1375,6 @@ class Transformer(base_converter.ConverterInterface):
filter
.
dims
[:]
=
filter_data
.
shape
filter
.
dims
[:]
=
filter_data
.
shape
transposed_deconv_filter
.
add
(
op
.
input
[
1
])
transposed_deconv_filter
.
add
(
op
.
input
[
1
])
self
.
transpose_matmul_weight
()
return
False
return
False
def
buffer_transform
(
self
,
op
,
input_idx
,
input_type
):
def
buffer_transform
(
self
,
op
,
input_idx
,
input_type
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录