Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
ccd6b9a4
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ccd6b9a4
编写于
8月 03, 2020
作者:
Z
zhanyuan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add fp32 & int8 ops of Matmul(Batchmatmul)
上级
201bcdd9
变更
19
隐藏空白更改
内联
并排
Showing
19 changed file
with
769 addition
and
75 deletion
+769
-75
mindspore/lite/src/ops/matmul.cc
mindspore/lite/src/ops/matmul.cc
+15
-14
mindspore/lite/src/ops/ops.cc
mindspore/lite/src/ops/ops.cc
+2
-0
mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc
mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc
+72
-0
mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h
mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h
+49
-0
mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc
mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc
+83
-25
mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h
mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h
+9
-10
mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.h
...re/lite/src/runtime/kernel/arm/int8/fullconnection_int8.h
+1
-1
mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.cc
mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.cc
+142
-0
mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.h
mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.h
+47
-0
mindspore/lite/src/runtime/kernel/arm/opclib/common_func.cc
mindspore/lite/src/runtime/kernel/arm/opclib/common_func.cc
+17
-0
mindspore/lite/src/runtime/kernel/arm/opclib/common_func.h
mindspore/lite/src/runtime/kernel/arm/opclib/common_func.h
+2
-0
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.cc
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.cc
+1
-3
mindspore/lite/src/runtime/kernel/arm/opclib/int8/matmul.cc
mindspore/lite/src/runtime/kernel/arm/opclib/int8/matmul.cc
+11
-2
mindspore/lite/src/runtime/kernel/arm/opclib/int8/matmul.h
mindspore/lite/src/runtime/kernel/arm/opclib/int8/matmul.h
+1
-1
mindspore/lite/src/runtime/kernel/arm/opclib/matmul.h
mindspore/lite/src/runtime/kernel/arm/opclib/matmul.h
+1
-0
mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h
...ite/src/runtime/kernel/arm/opclib/quantization/quantize.h
+20
-1
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/matmul_fp32_tests.cc
.../test/ut/src/runtime/kernel/arm/fp32/matmul_fp32_tests.cc
+169
-0
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/fullconnection_int8_tests.cc
.../src/runtime/kernel/arm/int8/fullconnection_int8_tests.cc
+1
-18
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/matmul_int8_tests.cc
.../test/ut/src/runtime/kernel/arm/int8/matmul_int8_tests.cc
+126
-0
未找到文件。
mindspore/lite/src/ops/matmul.cc
浏览文件 @
ccd6b9a4
...
...
@@ -33,29 +33,30 @@ int MatMul::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor
auto
output
=
outputs_
.
front
();
MS_ASSERT
(
output
!=
nullptr
);
std
::
vector
<
int
>
x
_shape
=
input0
->
shape
();
std
::
vector
<
int
>
w
_shape
=
input1
->
shape
();
if
(
x_shape
.
size
()
<
2
||
w_shape
.
size
()
<
2
)
{
std
::
vector
<
int
>
a
_shape
=
input0
->
shape
();
std
::
vector
<
int
>
b
_shape
=
input1
->
shape
();
if
(
a_shape
.
size
()
<
3
||
b_shape
.
size
()
<
3
)
{
MS_LOG
(
ERROR
)
<<
"inputs shape is invalid"
;
return
RET_INPUT_TENSOR_ERROR
;
}
for
(
int
i
=
0
;
i
<
a_shape
.
size
()
-
2
;
++
i
)
{
if
(
a_shape
[
i
]
!=
b_shape
[
i
])
{
MS_LOG
(
ERROR
)
<<
"Op MatMul's dimensions must be equal"
;
return
RET_INPUT_TENSOR_ERROR
;
}
}
auto
matmul_prim
=
this
->
primitive
->
value_as_MatMul
();
if
(
matmul_prim
->
transposeA
())
{
int
tmp
=
x_shape
.
back
();
x_shape
[
x_shape
.
size
()
-
1
]
=
x_shape
[
x_shape
.
size
()
-
2
];
x_shape
[
x_shape
.
size
()
-
2
]
=
tmp
;
std
::
swap
(
a_shape
[
a_shape
.
size
()
-
1
],
a_shape
[
a_shape
.
size
()
-
2
]);
}
if
(
matmul_prim
->
transposeB
())
{
int
tmp
=
w_shape
.
back
();
w_shape
[
w_shape
.
size
()
-
1
]
=
w_shape
[
w_shape
.
size
()
-
2
];
w_shape
[
w_shape
.
size
()
-
2
]
=
tmp
;
std
::
swap
(
b_shape
[
b_shape
.
size
()
-
1
],
b_shape
[
b_shape
.
size
()
-
2
]);
}
auto
y_shape_size
=
std
::
max
(
x_shape
.
size
(),
w_shape
.
size
());
std
::
vector
<
int
>
y_shape
(
y_shape_size
);
y_shape
=
x_shape
;
y_shape
[
y_shape_size
-
1
]
=
w_shape
[
w_shape
.
size
()
-
1
];
output
->
set_shape
(
y_shape
);
std
::
vector
<
int
>
c_shape
(
a_shape
);
c_shape
[
c_shape
.
size
()
-
1
]
=
b_shape
[
b_shape
.
size
()
-
1
];
output
->
set_shape
(
c_shape
);
output
->
set_data_type
(
input0
->
data_type
());
output
->
SetFormat
(
input0
->
GetFormat
());
...
...
mindspore/lite/src/ops/ops.cc
浏览文件 @
ccd6b9a4
...
...
@@ -139,6 +139,8 @@ Primitive *Primitive::CreatePrimitive(schema::Primitive *primitive) {
return
new
lite
::
SpaceToBatch
(
const_cast
<
schema
::
Primitive
*>
(
primitive
));
case
schema
::
PrimitiveType_QuantDTypeCast
:
return
new
lite
::
QuantDTypeCast
(
const_cast
<
schema
::
Primitive
*>
(
primitive
));
case
schema
::
PrimitiveType_MatMul
:
return
new
lite
::
MatMul
(
const_cast
<
schema
::
Primitive
*>
(
primitive
));
default:
break
;
}
...
...
mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc
0 → 100644
浏览文件 @
ccd6b9a4
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/base/matmul_base.h"
#include "src/runtime/kernel/arm/fp32/matmul.h"
#include "src/runtime/kernel/arm/int8/matmul_int8.h"
#include "src/kernel_factory.h"
#include "include/errorcode.h"
#include "include/context.h"
using
mindspore
::
lite
::
KernelRegistrar
;
using
mindspore
::
lite
::
RET_ERROR
;
using
mindspore
::
lite
::
RET_OK
;
using
mindspore
::
schema
::
PrimitiveType_MatMul
;
namespace
mindspore
::
kernel
{
kernel
::
LiteKernel
*
CpuMatmulKernelCreator
(
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
OpParameter
*
opParameter
,
const
lite
::
Context
*
ctx
,
const
kernel
::
KernelKey
&
desc
)
{
MS_ASSERT
(
opParameter
!=
nullptr
);
MS_ASSERT
(
desc
.
type
==
schema
::
PrimitiveType_Concat
);
auto
input_tensor
=
inputs
.
at
(
kInputIndex
);
auto
data_type
=
input_tensor
->
data_type
();
kernel
::
LiteKernel
*
kernel
=
nullptr
;
switch
(
data_type
)
{
case
kNumberTypeInt8
:
case
kNumberTypeUInt8
:
{
kernel
=
new
(
std
::
nothrow
)
MatmulInt8CPUKernel
(
opParameter
,
inputs
,
outputs
,
ctx
);
if
(
!
kernel
)
{
MS_LOG
(
ERROR
)
<<
"kernel is nullptr."
;
return
nullptr
;
}
break
;
}
case
kNumberTypeFloat32
:
{
kernel
=
new
(
std
::
nothrow
)
MatmulCPUKernel
(
opParameter
,
inputs
,
outputs
,
ctx
);
if
(
!
kernel
)
{
MS_LOG
(
ERROR
)
<<
"kernel is nullptr."
;
return
nullptr
;
}
break
;
}
default:
break
;
}
auto
ret
=
kernel
->
Init
();
if
(
ret
!=
RET_OK
)
{
delete
kernel
;
MS_LOG
(
ERROR
)
<<
"Init kernel failed, name: "
<<
opParameter
->
name_
<<
", type: "
<<
schema
::
EnumNamePrimitiveType
(
static_cast
<
schema
::
PrimitiveType
>
(
opParameter
->
type_
));
return
nullptr
;
}
return
kernel
;
}
REG_KERNEL
(
kCPU
,
kNumberTypeFloat32
,
PrimitiveType_MatMul
,
CpuMatmulKernelCreator
)
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h
0 → 100644
浏览文件 @
ccd6b9a4
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_MATMUL_BASE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_MATMUL_BASE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "include/context.h"
#include "src/runtime/kernel/arm/opclib/matmul.h"
using
mindspore
::
lite
::
Context
;
namespace
mindspore
::
kernel
{
class
MatmulBaseCPUKernel
:
public
LiteKernel
{
public:
MatmulBaseCPUKernel
(
OpParameter
*
parameter
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
const
Context
*
ctx
)
:
LiteKernel
(
parameter
,
inputs
,
outputs
),
ctx_
(
ctx
),
thread_count_
(
ctx
->
threadNum
)
{
params_
=
reinterpret_cast
<
MatMulParameter
*>
(
opParameter
);
}
~
MatmulBaseCPUKernel
()
=
default
;
int
Init
()
override
{
return
0
;
}
int
ReSize
()
override
{
return
0
;
}
int
Run
()
override
{
return
0
;
}
protected:
MatMulParameter
*
params_
;
int
thread_count_
;
int
thread_stride_
;
const
Context
*
ctx_
;
};
}
// namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_MATMUL_BASE_H_
mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc
浏览文件 @
ccd6b9a4
...
...
@@ -15,44 +15,102 @@
*/
#include "src/runtime/kernel/arm/fp32/matmul.h"
#include <vector>
#include "schema/model_generated.h"
#include "src/kernel_registry.h"
#include "src/runtime/kernel/arm/opclib/fp32/matmul.h"
#include "src/runtime/runtime_api.h"
#include "include/errorcode.h"
using
mindspore
::
kernel
::
KERNEL_ARCH
::
kCPU
;
using
mindspore
::
lite
::
KernelRegistrar
;
using
mindspore
::
lite
::
RET_ERROR
;
using
mindspore
::
lite
::
RET_MEMORY_FAILED
;
using
mindspore
::
lite
::
RET_OK
;
using
mindspore
::
schema
::
PrimitiveType_MatMul
;
namespace
mindspore
::
kernel
{
MatmulCPUKernel
::~
MatmulCPUKernel
()
{
ctx_
->
allocator
->
Free
(
a_c8_ptr_
);
ctx_
->
allocator
->
Free
(
b_r8_ptr_
);
ctx_
->
allocator
->
Free
(
c_r8x8_ptr_
);
}
int
MatmulCPUKernel
::
ReSize
()
{
return
RET_OK
;
}
int
MatmulCPUKernel
::
Run
()
{
return
RET_OK
;
}
int
MatmulCPUKernel
::
Init
()
{
int
batch
=
1
;
auto
x_shape
=
inputs_
[
0
]
->
shape
();
auto
o_shape
=
outputs_
[
0
]
->
shape
();
for
(
int
i
=
0
;
i
<
x_shape
.
size
()
-
2
;
++
i
)
{
batch
*=
x_shape
[
i
];
}
params_
->
batch
=
batch
;
params_
->
row_
=
o_shape
[
o_shape
.
size
()
-
2
];
params_
->
col_
=
o_shape
[
o_shape
.
size
()
-
1
];
params_
->
deep_
=
params_
->
a_transpose_
?
x_shape
[
x_shape
.
size
()
-
2
]
:
x_shape
[
x_shape
.
size
()
-
1
];
params_
->
row_8_
=
UP_ROUND
(
params_
->
row_
,
8
);
params_
->
col_8_
=
UP_ROUND
(
params_
->
col_
,
8
);
thread_count_
=
MSMIN
(
thread_count_
,
UP_DIV
(
params_
->
col_8_
,
8
));
thread_stride_
=
UP_DIV
(
UP_DIV
(
params_
->
col_8_
,
8
),
thread_count_
);
int
MatmulCPUKernel
::
Init
()
{
return
RET_OK
;
}
a_c8_ptr_
=
reinterpret_cast
<
float
*>
(
ctx_
->
allocator
->
Malloc
(
params_
->
row_8_
*
params_
->
deep_
*
sizeof
(
float
)));
if
(
!
a_c8_ptr_
)
{
return
RET_MEMORY_FAILED
;
}
memset
(
a_c8_ptr_
,
0
,
params_
->
row_8_
*
params_
->
deep_
*
sizeof
(
float
));
b_r8_ptr_
=
reinterpret_cast
<
float
*>
(
ctx_
->
allocator
->
Malloc
(
params_
->
col_8_
*
params_
->
deep_
*
sizeof
(
float
)));
if
(
!
b_r8_ptr_
)
{
return
RET_MEMORY_FAILED
;
}
memset
(
b_r8_ptr_
,
0
,
params_
->
col_8_
*
params_
->
deep_
*
sizeof
(
float
));
c_r8x8_ptr_
=
reinterpret_cast
<
float
*>
(
ctx_
->
allocator
->
Malloc
(
params_
->
row_8_
*
params_
->
col_8_
*
sizeof
(
float
)));
if
(
!
c_r8x8_ptr_
)
{
return
RET_MEMORY_FAILED
;
}
memset
(
c_r8x8_ptr_
,
0
,
params_
->
row_8_
*
params_
->
col_8_
*
sizeof
(
float
));
return
RET_OK
;
}
kernel
::
LiteKernel
*
CpuMatmulFp32KernelCreator
(
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
OpParameter
*
opParameter
,
const
lite
::
Context
*
ctx
,
const
kernel
::
KernelKey
&
desc
)
{
MS_ASSERT
(
desc
.
type
==
schema
::
PrimitiveType_MatMul
);
auto
*
kernel
=
new
(
std
::
nothrow
)
MatmulCPUKernel
(
opParameter
,
inputs
,
outputs
);
if
(
kernel
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"new MatmulCPUKernel fail!"
;
return
nullptr
;
int
MatmulCPUKernel
::
RunImpl
(
int
task_id
)
{
int
cur_oc
=
MSMIN
(
thread_stride_
,
UP_DIV
(
params_
->
col_8_
,
8
)
-
task_id
*
thread_stride_
);
if
(
cur_oc
<=
0
)
{
return
RET_OK
;
}
auto
ret
=
kernel
->
Init
();
if
(
ret
!=
RET_OK
)
{
delete
kernel
;
MS_LOG
(
ERROR
)
<<
"Init kernel failed, name: "
<<
opParameter
->
name_
<<
", type: "
<<
schema
::
EnumNamePrimitiveType
(
static_cast
<
schema
::
PrimitiveType
>
(
opParameter
->
type_
));
return
nullptr
;
auto
cur_b
=
b_r8_ptr_
+
task_id
*
thread_stride_
*
C8NUM
*
params_
->
deep_
;
auto
cur_c
=
c_r8x8_ptr_
+
task_id
*
thread_stride_
*
C8NUM
*
params_
->
row_8_
;
MatMul
(
a_c8_ptr_
,
cur_b
,
cur_c
,
NULL
,
ActType_No
,
params_
->
deep_
,
params_
->
row_8_
,
cur_oc
*
8
);
return
RET_OK
;
}
int
MatmulFloatRun
(
int
task_id
,
LiteParallelGroupEnv
*
penv
,
void
*
cdata
)
{
auto
op
=
reinterpret_cast
<
MatmulCPUKernel
*>
(
cdata
);
auto
error_code
=
op
->
RunImpl
(
task_id
);
if
(
error_code
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"MatmulFp32Run error task_id["
<<
task_id
<<
"] error_code["
<<
error_code
<<
"]"
;
return
RET_ERROR
;
}
return
kernel
;
return
RET_OK
;
}
REG_KERNEL
(
kCPU
,
kNumberTypeFloat32
,
PrimitiveType_MatMul
,
CpuMatmulFp32KernelCreator
)
int
MatmulCPUKernel
::
Run
()
{
auto
a_ptr
=
reinterpret_cast
<
float
*>
(
inputs_
[
0
]
->
Data
());
auto
b_ptr
=
reinterpret_cast
<
float
*>
(
inputs_
[
1
]
->
Data
());
auto
c_ptr
=
reinterpret_cast
<
float
*>
(
outputs_
[
0
]
->
Data
());
auto
a_stride
=
params_
->
row_
*
params_
->
deep_
;
auto
b_stride
=
params_
->
deep_
*
params_
->
col_
;
auto
c_stride
=
params_
->
row_
*
params_
->
col_
;
for
(
int
i
=
0
;
i
<
params_
->
batch
;
++
i
)
{
auto
cur_a_ptr
=
a_ptr
+
i
*
a_stride
;
auto
cur_b_ptr
=
b_ptr
+
i
*
b_stride
;
auto
cur_c_ptr
=
c_ptr
+
i
*
c_stride
;
if
(
params_
->
a_transpose_
)
{
RowMajor2Row8Major
(
cur_a_ptr
,
a_c8_ptr_
,
params_
->
deep_
,
params_
->
row_
);
}
else
{
RowMajor2Col8Major
(
cur_a_ptr
,
a_c8_ptr_
,
params_
->
row_
,
params_
->
deep_
);
}
if
(
params_
->
b_transpose_
)
{
RowMajor2Col8Major
(
cur_b_ptr
,
b_r8_ptr_
,
params_
->
col_
,
params_
->
deep_
);
}
else
{
RowMajor2Row8Major
(
cur_b_ptr
,
b_r8_ptr_
,
params_
->
deep_
,
params_
->
col_
);
}
LiteBackendParallelLaunch
(
MatmulFloatRun
,
this
,
thread_count_
);
Row8x8Major2RowMajor
(
c_r8x8_ptr_
,
cur_c_ptr
,
params_
->
row_
,
params_
->
col_
);
}
return
RET_OK
;
}
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h
浏览文件 @
ccd6b9a4
...
...
@@ -19,27 +19,26 @@
#include <vector>
#include "src/lite_kernel.h"
#include "src/runtime/kernel/arm/opclib/matmul.h"
#include "src/runtime/kernel/arm/base/matmul_base.h"
namespace
mindspore
::
kernel
{
class
MatmulCPUKernel
:
public
Lite
Kernel
{
class
MatmulCPUKernel
:
public
MatmulBaseCPU
Kernel
{
public:
explicit
MatmulCPUKernel
(
OpParameter
*
parameter
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
)
:
LiteKernel
(
parameter
,
inputs
,
outputs
)
{
matmul_param_
=
reinterpret_cast
<
MatMulParameter
*>
(
parameter
);
}
~
MatmulCPUKernel
()
override
=
default
;
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
const
Context
*
ctx
)
:
MatmulBaseCPUKernel
(
parameter
,
inputs
,
outputs
,
ctx
)
{}
~
MatmulCPUKernel
()
override
;
int
Init
()
override
;
int
ReSize
()
override
;
int
Run
()
override
;
int
RunImpl
(
int
task_id
);
private:
MatMulParameter
*
matmul_param_
;
float
*
a_c8_ptr_
;
float
*
b_r8_ptr_
;
float
*
c_r8x8_ptr_
;
};
}
// namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_MATMUL_H_
mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.h
浏览文件 @
ccd6b9a4
...
...
@@ -42,7 +42,7 @@ class FullconnectionInt8CPUKernel : public FullconnectionBaseCPUKernel {
int
RunImpl
(
int
task_id
);
private:
Fc
QuantArg
quant_params_
;
Matmul
QuantArg
quant_params_
;
int8_t
*
a_c8_ptr_
;
int8_t
*
b_r8_ptr_
;
int
*
c_r8x8_ptr_
;
...
...
mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.cc
0 → 100644
浏览文件 @
ccd6b9a4
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/int8/matmul_int8.h"
#include "src/runtime/kernel/arm/opclib/int8/matmul.h"
#include "src/runtime/kernel/arm/opclib/common_func.h"
#include "src/runtime/runtime_api.h"
#include "include/errorcode.h"
using
mindspore
::
lite
::
RET_MEMORY_FAILED
;
using
mindspore
::
lite
::
RET_OK
;
namespace
mindspore
::
kernel
{
MatmulInt8CPUKernel
::~
MatmulInt8CPUKernel
()
{
ctx_
->
allocator
->
Free
(
a_c8_ptr_
);
ctx_
->
allocator
->
Free
(
b_r8_ptr_
);
ctx_
->
allocator
->
Free
(
c_r8x8_ptr_
);
}
int
MatmulInt8CPUKernel
::
Init
()
{
int
batch
=
1
;
auto
x_shape
=
inputs_
[
0
]
->
shape
();
auto
o_shape
=
outputs_
[
0
]
->
shape
();
for
(
int
i
=
0
;
i
<
x_shape
.
size
()
-
2
;
++
i
)
{
batch
*=
x_shape
[
i
];
}
params_
->
batch
=
batch
;
params_
->
row_
=
o_shape
[
o_shape
.
size
()
-
2
];
params_
->
col_
=
o_shape
[
o_shape
.
size
()
-
1
];
params_
->
deep_
=
params_
->
a_transpose_
?
x_shape
[
x_shape
.
size
()
-
2
]
:
x_shape
[
x_shape
.
size
()
-
1
];
params_
->
row_8_
=
UP_ROUND
(
params_
->
row_
,
8
);
params_
->
col_8_
=
UP_ROUND
(
params_
->
col_
,
8
);
thread_count_
=
MSMIN
(
thread_count_
,
UP_DIV
(
params_
->
col_8_
,
8
));
thread_stride_
=
UP_DIV
(
UP_DIV
(
params_
->
col_8_
,
8
),
thread_count_
);
a_c8_ptr_
=
reinterpret_cast
<
int8_t
*>
(
ctx_
->
allocator
->
Malloc
(
params_
->
row_8_
*
params_
->
deep_
*
sizeof
(
int8_t
)));
if
(
!
a_c8_ptr_
)
{
return
RET_MEMORY_FAILED
;
}
memset
(
a_c8_ptr_
,
0
,
params_
->
row_8_
*
params_
->
deep_
*
sizeof
(
int8_t
));
b_r8_ptr_
=
reinterpret_cast
<
int8_t
*>
(
ctx_
->
allocator
->
Malloc
(
params_
->
col_8_
*
params_
->
deep_
*
sizeof
(
int8_t
)));
if
(
!
b_r8_ptr_
)
{
return
RET_MEMORY_FAILED
;
}
memset
(
b_r8_ptr_
,
0
,
params_
->
col_8_
*
params_
->
deep_
*
sizeof
(
int8_t
));
c_r8x8_ptr_
=
reinterpret_cast
<
int
*>
(
ctx_
->
allocator
->
Malloc
(
params_
->
row_8_
*
params_
->
col_8_
*
sizeof
(
int
)));
if
(
!
c_r8x8_ptr_
)
{
return
RET_MEMORY_FAILED
;
}
memset
(
c_r8x8_ptr_
,
0
,
params_
->
row_8_
*
params_
->
col_8_
*
sizeof
(
int
));
auto
input_tensor
=
inputs_
[
0
];
auto
params
=
input_tensor
->
GetQuantParams
();
MS_ASSERT
(
params
.
size
()
==
1
);
quant_params_
.
input
.
zp_
=
params
.
front
().
zeroPoint
;
quant_params_
.
input
.
scale_
=
params
.
front
().
scale
;
auto
weight_tensor
=
inputs_
[
1
];
params
=
weight_tensor
->
GetQuantParams
();
MS_ASSERT
(
params
.
size
()
==
1
);
quant_params_
.
weight
.
zp_
=
params
.
front
().
zeroPoint
;
quant_params_
.
weight
.
scale_
=
params
.
front
().
scale
;
auto
output_tensor
=
outputs_
[
0
];
params
=
output_tensor
->
GetQuantParams
();
MS_ASSERT
(
params
.
size
()
==
1
);
quant_params_
.
output
.
zp_
=
params
.
front
().
zeroPoint
;
quant_params_
.
output
.
scale_
=
params
.
front
().
scale
;
double
real_multiplier
=
quant_params_
.
input
.
scale_
*
quant_params_
.
weight
.
scale_
/
quant_params_
.
output
.
scale_
;
QuantizeRoundParameter
(
real_multiplier
,
&
quant_params_
.
quant_multiplier
,
&
quant_params_
.
left_shift
,
&
quant_params_
.
right_shift
);
return
RET_OK
;
}
int
MatmulInt8CPUKernel
::
ReSize
()
{
return
RET_OK
;
}
int
MatmulInt8CPUKernel
::
RunImpl
(
int
task_id
)
{
int
cur_oc
=
MSMIN
(
thread_stride_
,
UP_DIV
(
params_
->
col_8_
,
8
)
-
task_id
*
thread_stride_
);
if
(
cur_oc
<=
0
)
{
return
RET_OK
;
}
auto
cur_b
=
b_r8_ptr_
+
task_id
*
thread_stride_
*
C8NUM
*
params_
->
deep_
;
auto
cur_c
=
c_r8x8_ptr_
+
task_id
*
thread_stride_
*
C8NUM
*
params_
->
row_8_
;
MatMulInt8
(
a_c8_ptr_
,
cur_b
,
cur_c
,
params_
->
row_8_
,
cur_oc
*
8
,
params_
->
deep_
,
quant_params_
.
input
.
zp_
,
quant_params_
.
weight
.
zp_
);
return
RET_OK
;
}
int
MatmulInt8Run
(
int
task_id
,
LiteParallelGroupEnv
*
penv
,
void
*
cdata
)
{
auto
op
=
reinterpret_cast
<
MatmulInt8CPUKernel
*>
(
cdata
);
auto
ret
=
op
->
RunImpl
(
task_id
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"MatmulInt8Run error task_id["
<<
task_id
<<
"] error_code["
<<
ret
<<
"]"
;
return
ret
;
}
return
RET_OK
;
}
int
MatmulInt8CPUKernel
::
Run
()
{
auto
a_ptr
=
reinterpret_cast
<
int8_t
*>
(
inputs_
[
0
]
->
Data
());
auto
b_ptr
=
reinterpret_cast
<
int8_t
*>
(
inputs_
[
1
]
->
Data
());
auto
c_ptr
=
reinterpret_cast
<
int8_t
*>
(
outputs_
[
0
]
->
Data
());
auto
a_stride
=
params_
->
row_
*
params_
->
deep_
;
auto
b_stride
=
params_
->
deep_
*
params_
->
col_
;
auto
c_stride
=
params_
->
row_
*
params_
->
col_
;
for
(
int
i
=
0
;
i
<
params_
->
batch
;
++
i
)
{
auto
cur_a_ptr
=
a_ptr
+
i
*
a_stride
;
auto
cur_b_ptr
=
b_ptr
+
i
*
b_stride
;
auto
cur_c_ptr
=
c_ptr
+
i
*
c_stride
;
if
(
params_
->
a_transpose_
)
{
RowMajor2Row8MajorInt8
(
cur_a_ptr
,
a_c8_ptr_
,
params_
->
deep_
,
params_
->
row_
);
}
else
{
RowMajor2Col8MajorInt8
(
cur_a_ptr
,
a_c8_ptr_
,
params_
->
row_
,
params_
->
deep_
);
}
if
(
params_
->
b_transpose_
)
{
RowMajor2Col8MajorInt8
(
cur_b_ptr
,
b_r8_ptr_
,
params_
->
col_
,
params_
->
deep_
);
}
else
{
RowMajor2Row8MajorInt8
(
cur_b_ptr
,
b_r8_ptr_
,
params_
->
deep_
,
params_
->
col_
);
}
LiteBackendParallelLaunch
(
MatmulInt8Run
,
this
,
thread_count_
);
auto
&
q
=
quant_params_
;
SimplePostFuncInt8
(
c_r8x8_ptr_
,
cur_c_ptr
,
params_
->
col_
,
params_
->
row_
,
params_
->
row_8_
,
q
.
quant_multiplier
,
q
.
left_shift
,
q
.
right_shift
,
q
.
output
.
zp_
);
}
return
RET_OK
;
}
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.h
0 → 100644
浏览文件 @
ccd6b9a4
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_MATMUL_INT8_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_MATMUL_INT8_H_
#include <vector>
#include "include/context.h"
#include "src/runtime/kernel/arm/opclib/quantization/quantize.h"
#include "src/runtime/kernel/arm/base/matmul_base.h"
using
mindspore
::
lite
::
Context
;
namespace
mindspore
::
kernel
{
class
MatmulInt8CPUKernel
:
public
MatmulBaseCPUKernel
{
public:
MatmulInt8CPUKernel
(
OpParameter
*
parameter
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
const
Context
*
ctx
)
:
MatmulBaseCPUKernel
(
parameter
,
inputs
,
outputs
,
ctx
)
{}
~
MatmulInt8CPUKernel
()
override
;
int
Init
()
override
;
int
ReSize
()
override
;
int
Run
()
override
;
int
RunImpl
(
int
task_id
);
private:
MatmulQuantArg
quant_params_
;
int8_t
*
a_c8_ptr_
;
int8_t
*
b_r8_ptr_
;
int
*
c_r8x8_ptr_
;
};
}
// namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_MATMUL_INT8_H_
mindspore/lite/src/runtime/kernel/arm/opclib/common_func.cc
浏览文件 @
ccd6b9a4
...
...
@@ -236,3 +236,20 @@ void PostFuncInt8(const int *in, const int *bias, int8_t *out, int oc, int plane
}
return
;
}
void
SimplePostFuncInt8
(
const
int
*
in
,
int8_t
*
out
,
int
oc
,
int
plane
,
int
plane8
,
int32_t
multiplier
,
int32_t
left_shift
,
int32_t
right_shift
,
int32_t
zp
)
{
/* (int32_t)row8x8-major * multiplier => (int8_t)row-major */
for
(
int
r
=
0
;
r
<
plane
;
r
++
)
{
for
(
int
c
=
0
;
c
<
oc
;
c
++
)
{
int
c8div
=
c
/
8
,
c8mod
=
c
%
8
;
int
src_index
=
c8div
*
plane8
*
8
+
r
*
8
+
c8mod
;
int
dst_index
=
r
*
oc
+
c
;
int32_t
value
=
in
[
src_index
];
value
=
MultiplyByQuantizedMultiplier
(
value
,
multiplier
,
left_shift
,
right_shift
)
+
zp
;
value
=
MSMIN
(
CHAR_MAX
,
value
);
value
=
MSMAX
(
CHAR_MIN
,
value
);
out
[
dst_index
]
=
(
int8_t
)
value
;
}
}
}
mindspore/lite/src/runtime/kernel/arm/opclib/common_func.h
浏览文件 @
ccd6b9a4
...
...
@@ -33,6 +33,8 @@ void ReluFp32(float *data, int ele_num);
void
Relu6Fp32
(
float
*
data
,
int
ele_num
);
void
PostFuncInt8
(
const
int
*
in
,
const
int
*
bias
,
int8_t
*
out
,
int
oc
,
int
plane
,
int
plane8
,
int32_t
multiplier
,
int32_t
left_shift
,
int32_t
right_shift
,
int32_t
zp
,
int8_t
mini
,
int8_t
maxi
);
void
SimplePostFuncInt8
(
const
int
*
in
,
int8_t
*
out
,
int
oc
,
int
plane
,
int
plane8
,
int32_t
multiplier
,
int32_t
left_shift
,
int32_t
right_shift
,
int32_t
zp
);
void
IndirectGemmFp32_8x8
(
float
*
output
,
const
float
*
input
,
const
float
*
weight
,
const
float
*
bias
,
size_t
step
,
size_t
ic4
,
size_t
output_channel
,
size_t
offset
,
size_t
mode
,
size_t
writeC4
,
size_t
relu
,
size_t
relu6
);
...
...
mindspore/lite/src/runtime/kernel/arm/opclib/fp32/matmul.cc
浏览文件 @
ccd6b9a4
...
...
@@ -65,9 +65,7 @@ void MatMul8x8(const float *a, const float *b, float *c, const float *bias, ActT
size_t
bi
=
c8div
*
deep
*
8
+
d
*
8
+
c8mod
;
value
=
value
+
a
[
ai
]
*
b
[
bi
];
}
if
(
bias
!=
nullptr
)
{
value
+=
bias
[
col
];
}
if
(
bias
!=
nullptr
)
value
+=
bias
[
col
];
if
(
act_type
==
ActType_Relu6
)
value
=
MSMIN
(
6.0
f
,
value
);
if
(
act_type
!=
ActType_No
)
value
=
MSMAX
(
0.0
f
,
value
);
c
[
ci
]
=
value
;
...
...
mindspore/lite/src/runtime/kernel/arm/opclib/int8/matmul.cc
浏览文件 @
ccd6b9a4
...
...
@@ -18,6 +18,17 @@
#include <limits.h>
#include "src/runtime/kernel/arm/opclib/quantization/fixed_point.h"
void
RowMajor2Row8MajorInt8
(
int8_t
*
src_ptr
,
int8_t
*
dst_ptr
,
int
row
,
int
col
)
{
for
(
int
r
=
0
;
r
<
row
;
r
++
)
{
int8_t
*
src
=
src_ptr
+
r
*
col
;
for
(
int
c
=
0
;
c
<
col
;
c
++
)
{
int
cd8
=
c
/
8
;
int
cm8
=
c
%
8
;
dst_ptr
[
cd8
*
8
*
row
+
r
*
8
+
cm8
]
=
src
[
c
];
}
}
}
void
RowMajor2Col8MajorInt8
(
int8_t
*
src_ptr
,
int8_t
*
dst_ptr
,
int
row
,
int
col
)
{
for
(
int
r
=
0
;
r
<
row
;
r
++
)
{
int
rd8
=
r
/
8
;
...
...
@@ -26,7 +37,6 @@ void RowMajor2Col8MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col)
dst_ptr
[
rd8
*
col
*
8
+
c
*
8
+
rm8
]
=
src_ptr
[
r
*
col
+
c
];
}
}
return
;
}
void
MatMulInt8
(
const
int8_t
*
a
,
const
int8_t
*
b
,
int32_t
*
c
,
const
int
row8
,
const
int
col8
,
const
int
deep
,
...
...
@@ -46,5 +56,4 @@ void MatMulInt8(const int8_t *a, const int8_t *b, int32_t *c, const int row8, co
c
[
ci
]
=
value
;
}
}
return
;
}
mindspore/lite/src/runtime/kernel/arm/opclib/int8/matmul.h
浏览文件 @
ccd6b9a4
...
...
@@ -22,7 +22,7 @@
void
MatMulInt8
(
const
int8_t
*
a
,
const
int8_t
*
b
,
int32_t
*
c
,
const
int
row8
,
const
int
col8
,
const
int
deep
,
const
int32_t
a_zp
,
const
int32_t
b_zp
);
void
RowMajor2Row8MajorInt8
(
int8_t
*
src_ptr
,
int8_t
*
dst_ptr
,
int
row
,
int
col
);
void
RowMajor2Col8MajorInt8
(
int8_t
*
src_ptr
,
int8_t
*
dst_ptr
,
int
row
,
int
col
);
#endif // MINDSPORE_LITE_SRC_BACKEND_ARM_OPCLIB_INT8_MATMUL_H_
mindspore/lite/src/runtime/kernel/arm/opclib/matmul.h
浏览文件 @
ccd6b9a4
...
...
@@ -29,6 +29,7 @@ struct MatMulParameter {
int
col_8_
;
int
deep_
;
bool
has_bias_
;
int
batch
;
bool
a_transpose_
;
/* false : row-major */
bool
b_transpose_
;
/* true : col-major */
ActType
act_type_
;
...
...
mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h
浏览文件 @
ccd6b9a4
...
...
@@ -22,6 +22,7 @@
#include <stdlib.h>
#include <limits.h>
#include <limits>
#include "src/runtime/kernel/arm/opclib/op_base.h"
struct
QuantArg
{
double
scale_
;
...
...
@@ -49,7 +50,7 @@ struct ConcatQuantArg {
QuantArg
out_quant_args_
;
};
struct
Fc
QuantArg
{
struct
Matmul
QuantArg
{
QuantArg
input
;
QuantArg
weight
;
QuantArg
output
;
...
...
@@ -130,4 +131,22 @@ inline void CalculateActivationRangeQuantized(bool is_relu, bool is_relu6, int32
*
mini
=
min
;
*
maxi
=
max
;
}
// quantize from float to int8
inline
void
Quantize
(
float
*
input_data
,
int
length
,
float
scale
,
int
zero_point
,
int8_t
*
output_data
)
{
for
(
int
i
=
0
;
i
<
length
;
++
i
)
{
int
r
=
(
int
)
round
(
input_data
[
i
]
/
scale
+
zero_point
);
int8_t
q
=
r
>
CHAR_MAX
?
CHAR_MAX
:
r
;
q
=
q
<
CHAR_MIN
?
CHAR_MIN
:
q
;
output_data
[
i
]
=
q
;
}
}
// dequantize from int8 to float
inline
void
Dequantize
(
int8_t
*
input_data
,
int
length
,
float
scale
,
int
zero_point
,
float
*
output_data
)
{
for
(
int
i
=
0
;
i
<
length
;
++
i
)
{
output_data
[
i
]
=
scale
*
(
input_data
[
i
]
-
zero_point
);
}
}
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_QUANTIZATION_QUANTIZE_H_
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/matmul_fp32_tests.cc
0 → 100644
浏览文件 @
ccd6b9a4
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include "mindspore/core/utils/log_adapter.h"
#include "common/common_test.h"
#include "mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h"
#include "src/kernel_registry.h"
#include "src/lite_kernel.h"
namespace
mindspore
{
class
TestMatMulFp32
:
public
mindspore
::
Common
{
public:
TestMatMulFp32
()
{}
};
int
MMTestInit
(
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
inputs_
,
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
outputs_
,
float
*
a_ptr
,
float
*
b_ptr
,
std
::
vector
<
int
>
a_shape
,
std
::
vector
<
int
>
b_shape
,
std
::
vector
<
int
>
c_shape
)
{
auto
in_t
=
new
lite
::
tensor
::
Tensor
(
kNumberTypeFloat
,
a_shape
,
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
in_t
->
MallocData
();
memcpy
(
in_t
->
Data
(),
a_ptr
,
sizeof
(
float
)
*
in_t
->
ElementsNum
());
inputs_
->
push_back
(
in_t
);
auto
weight_t
=
new
lite
::
tensor
::
Tensor
(
kNumberTypeFloat
,
b_shape
,
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
weight_t
->
MallocData
();
memcpy
(
weight_t
->
Data
(),
b_ptr
,
sizeof
(
float
)
*
weight_t
->
ElementsNum
());
inputs_
->
push_back
(
weight_t
);
auto
out_t
=
new
lite
::
tensor
::
Tensor
(
kNumberTypeFloat
,
c_shape
,
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
out_t
->
MallocData
();
outputs_
->
push_back
(
out_t
);
return
out_t
->
ElementsNum
();
}
TEST_F
(
TestMatMulFp32
,
simple
)
{
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs_
;
auto
matmul_param
=
new
MatMulParameter
();
matmul_param
->
a_transpose_
=
false
;
matmul_param
->
b_transpose_
=
false
;
matmul_param
->
has_bias_
=
false
;
float
a
[]
=
{
-
3.2366564
,
-
4.7733846
,
-
7.8329225
,
16.146885
,
5.060793
,
-
6.1471
,
-
1.7680453
,
-
6.5721383
,
17.87506
,
-
5.1192183
,
10.742863
,
1.4536934
,
19.693445
,
19.45783
,
5.063163
,
0.5234792
};
float
b
[]
=
{
-
0.0024438887
,
0.0006738146
,
-
0.008169129
,
0.0021510671
,
-
0.012470592
,
-
0.0053063435
,
0.006050155
,
0.008656233
,
0.012911413
,
-
0.0028635843
,
-
0.00034080597
,
-
0.0010622552
,
-
0.012254699
,
-
0.01312836
,
0.0025241964
,
-
0.004706142
,
0.002451482
,
-
0.009558459
,
0.004481974
,
0.0033251503
,
-
0.011705584
,
-
0.001720293
,
-
0.0039410214
,
-
0.0073637343
};
std
::
vector
<
int
>
a_shape
=
{
1
,
2
,
8
};
std
::
vector
<
int
>
b_shape
=
{
1
,
8
,
3
};
std
::
vector
<
int
>
c_shape
=
{
1
,
2
,
3
};
int
total_size
=
MMTestInit
(
&
inputs_
,
&
outputs_
,
a
,
b
,
a_shape
,
b_shape
,
c_shape
);
auto
ctx
=
new
lite
::
Context
;
ctx
->
threadNum
=
2
;
auto
mm
=
new
kernel
::
MatmulCPUKernel
(
reinterpret_cast
<
OpParameter
*>
(
matmul_param
),
inputs_
,
outputs_
,
ctx
);
mm
->
Init
();
mm
->
Run
();
float
correct
[]
=
{
-
0.1256939023733139
,
-
0.07744802534580231
,
0.07410638779401779
,
-
0.3049793541431427
,
-
0.027687929570674896
,
-
0.18109679222106934
};
CompareOutputData
(
reinterpret_cast
<
float
*>
(
outputs_
[
0
]
->
Data
()),
correct
,
total_size
,
0.0001
);
delete
matmul_param
;
delete
mm
;
for
(
auto
t
:
inputs_
)
delete
t
;
for
(
auto
t
:
outputs_
)
delete
t
;
}
TEST_F
(
TestMatMulFp32
,
simple_transb
)
{
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs_
;
auto
matmul_param
=
new
MatMulParameter
();
matmul_param
->
a_transpose_
=
false
;
matmul_param
->
b_transpose_
=
true
;
matmul_param
->
has_bias_
=
false
;
float
a
[]
=
{
-
3.2366564
,
-
4.7733846
,
-
7.8329225
,
16.146885
,
5.060793
,
-
6.1471
,
-
1.7680453
,
-
6.5721383
,
17.87506
,
-
5.1192183
,
10.742863
,
1.4536934
,
19.693445
,
19.45783
,
5.063163
,
0.5234792
};
float
b
[]
=
{
-
0.0024438887
,
0.0006738146
,
-
0.008169129
,
0.0021510671
,
-
0.012470592
,
-
0.0053063435
,
0.006050155
,
0.008656233
,
0.012911413
,
-
0.0028635843
,
-
0.00034080597
,
-
0.0010622552
,
-
0.012254699
,
-
0.01312836
,
0.0025241964
,
-
0.004706142
,
0.002451482
,
-
0.009558459
,
0.004481974
,
0.0033251503
,
-
0.011705584
,
-
0.001720293
,
-
0.0039410214
,
-
0.0073637343
};
std
::
vector
<
int
>
a_shape
=
{
1
,
2
,
8
};
std
::
vector
<
int
>
b_shape
=
{
1
,
3
,
8
};
std
::
vector
<
int
>
c_shape
=
{
1
,
2
,
3
};
int
total_size
=
MMTestInit
(
&
inputs_
,
&
outputs_
,
a
,
b
,
a_shape
,
b_shape
,
c_shape
);
auto
ctx
=
new
lite
::
Context
;
ctx
->
threadNum
=
2
;
auto
mm
=
new
kernel
::
MatmulCPUKernel
(
reinterpret_cast
<
OpParameter
*>
(
matmul_param
),
inputs_
,
outputs_
,
ctx
);
mm
->
Init
();
mm
->
Run
();
float
correct
[]
=
{
0.00533547
,
0.002545945
,
0.062974121
,
-
0.445441471
,
-
0.246223617
,
-
0.142070031
};
CompareOutputData
(
reinterpret_cast
<
float
*>
(
outputs_
[
0
]
->
Data
()),
correct
,
total_size
,
0.0001
);
delete
matmul_param
;
delete
mm
;
for
(
auto
t
:
inputs_
)
delete
t
;
for
(
auto
t
:
outputs_
)
delete
t
;
}
TEST_F
(
TestMatMulFp32
,
batch
)
{
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs_
;
auto
matmul_param
=
new
MatMulParameter
();
matmul_param
->
a_transpose_
=
false
;
matmul_param
->
b_transpose_
=
true
;
matmul_param
->
has_bias_
=
false
;
float
a
[]
=
{
-
4.946672525326248
,
11.154420027909701
,
-
7.831129637356922
,
17.309845099949953
,
-
10.46177877610444
,
2.5412751480833897
,
2.700113860276929
,
-
12.616715572097341
,
-
15.513316568881574
,
-
9.513294738065516
,
17.931148376418896
,
-
10.83801964632579
,
-
14.023733862948017
,
-
14.50805001403956
,
0.7952221556310306
,
6.619720423569035
,
-
19.277904230909357
,
-
13.450479287024839
,
19.914652156692625
,
16.542571697048878
,
-
2.9715041389268926
,
4.949555349889412
,
-
1.9408110276290103
,
-
15.062828261031868
,
0.20012569643335
,
8.260383531209776
,
3.1092344458607357
,
16.742272486091487
,
17.31277252415167
,
-
16.60303202099434
,
-
8.980314693173042
,
-
11.735087989358268
,
-
14.918976184088514
,
-
11.347592686892733
,
11.808756029220604
,
-
18.76179414554809
,
7.579758962360987
,
3.13240880962163
,
6.528181981442103
,
-
16.802624652419794
,
-
14.323146919914901
,
-
16.197579076296144
,
9.738053920125779
,
-
12.245780062949866
,
8.817905278096319
,
0.5261391331275007
,
-
18.26152522535471
,
-
2.400461208771226
};
float
b
[]
=
{
-
0.895183867395529
,
-
0.8146900207660068
,
-
0.27931593219652817
,
0.783554361201179
,
-
0.05080215007779798
,
-
0.9879631271568501
,
0.07710949009001333
,
-
0.9562579726211344
,
0.29505553318356825
,
-
0.26651960351085124
,
-
0.12755456259718279
,
-
0.8221417897250098
,
-
0.5094334041431876
,
-
0.9117373380256013
,
0.991501784215064
,
0.20131976450979394
,
0.07889260559412059
,
-
0.8138407752750305
,
-
0.047622075866657454
,
-
0.2778043115153188
,
-
0.6269973420163957
,
-
0.44345812666611617
,
-
0.8571568605933642
,
0.020192166011526735
,
0.4860054298402434
,
0.41525925469513614
,
-
0.40270506445219967
,
-
0.8716538067535347
,
0.5276448387223114
,
0.6064500154192936
,
-
0.9553204135772526
,
0.3253219646257437
,
-
0.7237956595774822
,
0.3271284879679077
,
-
0.534543967339336
,
-
0.4076498484281894
,
0.01574797075171963
,
-
0.37322004720586244
,
0.16425071396119928
,
-
0.5328652244800547
,
0.7389336170615435
,
-
0.6552069958923377
,
-
0.042305872596973604
,
-
0.6714941466767734
,
-
0.9281411415119043
,
-
0.7748558258281224
,
-
0.6209799945964443
,
0.02526428593887675
,
-
0.44984776800225856
,
0.6281401952319337
,
0.9907258228680276
,
0.6288646615999687
,
-
0.82076880150175
,
0.3065944740797497
,
-
0.29201038744043584
,
-
0.025685501802048982
,
-
0.07273175145419652
,
0.9370449239208709
,
-
0.8233807408078093
,
-
0.4195634619023012
,
0.9799555630257346
,
-
0.23461882935715228
,
-
0.8884793313829993
,
-
0.4760267734754635
,
-
0.2874539543614072
,
-
0.8795685985480997
,
-
0.08099698251915255
,
-
0.1626521023321741
,
-
0.9337167240793414
,
0.40924842916829207
,
-
0.7375713045221615
,
-
0.0065659291539015285
};
std
::
vector
<
int
>
a_shape
=
{
3
,
2
,
8
};
std
::
vector
<
int
>
b_shape
=
{
3
,
3
,
8
};
std
::
vector
<
int
>
c_shape
=
{
3
,
2
,
3
};
int
total_size
=
MMTestInit
(
&
inputs_
,
&
outputs_
,
a
,
b
,
a_shape
,
b_shape
,
c_shape
);
auto
ctx
=
new
lite
::
Context
;
ctx
->
threadNum
=
1
;
auto
mm
=
new
kernel
::
MatmulCPUKernel
(
reinterpret_cast
<
OpParameter
*>
(
matmul_param
),
inputs_
,
outputs_
,
ctx
);
mm
->
Init
();
mm
->
Run
();
float
correct
[]
=
{
21.38518524169922
,
-
14.514888763427734
,
-
11.040614128112793
,
16.91403579711914
,
27.07421112060547
,
23.35394287109375
,
-
39.006141662597656
,
-
2.021998405456543
,
-
17.63555145263672
,
-
8.490625381469727
,
5.317771911621094
,
-
14.561882019042969
,
-
7.251564025878906
,
-
2.508212089538574
,
5.86458683013916
,
-
3.466249465942383
,
8.869029998779297
,
25.034008026123047
};
float
*
output
=
reinterpret_cast
<
float
*>
(
outputs_
[
0
]
->
Data
());
for
(
int
i
=
0
;
i
<
18
;
++
i
)
printf
(
"%f "
,
output
[
i
]);
CompareOutputData
(
reinterpret_cast
<
float
*>
(
outputs_
[
0
]
->
Data
()),
correct
,
total_size
,
0.0001
);
delete
matmul_param
;
delete
mm
;
for
(
auto
t
:
inputs_
)
delete
t
;
for
(
auto
t
:
outputs_
)
delete
t
;
}
}
// namespace mindspore
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/fullconnection_int8_tests.cc
浏览文件 @
ccd6b9a4
...
...
@@ -13,13 +13,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include <memory>
#include "utils/log_adapter.h"
#include "common/common_test.h"
#include "mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/int8/matmul.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/common_func.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h"
#include "mindspore/lite/src/kernel_registry.h"
#include "mindspore/lite/src/lite_kernel.h"
...
...
@@ -30,21 +28,6 @@ class TestFcInt8 : public mindspore::Common {
TestFcInt8
()
{}
};
void
Quantize
(
float
*
input_data
,
int
length
,
float
scale
,
int
zero_point
,
int8_t
*
output_data
)
{
for
(
int
i
=
0
;
i
<
length
;
++
i
)
{
int8_t
q
=
static_cast
<
int8_t
>
(
std
::
max
<
float
>
(
std
::
numeric_limits
<
int8_t
>::
min
(),
std
::
min
<
float
>
(
std
::
numeric_limits
<
int8_t
>::
max
(),
std
::
round
(
zero_point
+
(
input_data
[
i
]
/
scale
)))));
output_data
[
i
]
=
q
;
}
}
void
Dequantize
(
int8_t
*
input_data
,
int
length
,
float
scale
,
int
zero_point
,
float
*
output_data
)
{
for
(
int
i
=
0
;
i
<
length
;
++
i
)
{
output_data
[
i
]
=
scale
*
(
input_data
[
i
]
-
zero_point
);
}
}
int
FcInt8TestInit
(
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
inputs_
,
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
outputs_
,
MatMulParameter
*
matmal_param
,
float
**
correct
,
double
*
scale
,
int
*
zeropoint
)
{
float
input_max
=
20
;
...
...
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/matmul_int8_tests.cc
0 → 100644
浏览文件 @
ccd6b9a4
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "utils/log_adapter.h"
#include "common/common_test.h"
#include "mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/common_func.h"
#include "mindspore/lite/src/kernel_registry.h"
#include "mindspore/lite/src/lite_kernel.h"
namespace
mindspore
{
class
TestMatmulInt8
:
public
mindspore
::
Common
{
public:
TestMatmulInt8
()
{}
};
int
MMInt8TestInit
(
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
inputs_
,
std
::
vector
<
lite
::
tensor
::
Tensor
*>
*
outputs_
,
MatMulParameter
*
matmal_param
,
float
**
correct
,
double
*
scale
,
int
*
zeropoint
)
{
float
input_max
=
20
;
float
input_min
=
-
20
;
float
weight_max
=
1
;
float
weight_min
=
-
1
;
float
output_max
=
30
;
float
output_min
=
-
30
;
double
input_scale
=
(
input_max
-
input_min
)
/
(
std
::
numeric_limits
<
int8_t
>::
max
()
-
std
::
numeric_limits
<
int8_t
>::
min
());
int
input_zp
=
std
::
numeric_limits
<
int8_t
>::
max
()
-
input_max
/
input_scale
;
double
weight_scale
=
(
weight_max
-
weight_min
)
/
(
std
::
numeric_limits
<
int8_t
>::
max
()
-
std
::
numeric_limits
<
int8_t
>::
min
());
int
weight_zp
=
std
::
numeric_limits
<
int8_t
>::
max
()
-
weight_max
/
weight_scale
;
double
output_scale
=
(
output_max
-
output_min
)
/
(
std
::
numeric_limits
<
int8_t
>::
max
()
-
std
::
numeric_limits
<
int8_t
>::
min
());
int
output_zp
=
std
::
numeric_limits
<
int8_t
>::
max
()
-
output_max
/
output_scale
;
*
scale
=
output_scale
;
*
zeropoint
=
output_zp
;
auto
in_t
=
new
lite
::
tensor
::
Tensor
(
kNumberTypeInt8
,
{
1
,
2
,
8
},
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
in_t
->
MallocData
();
float
in
[]
=
{
6.583835634764597
,
11.337275140963907
,
-
4.125256949459629
,
10.994337291530833
,
19.086065139532636
,
3.620842999158455
,
13.167624585590346
,
-
18.326739299407755
,
14.877693740734841
,
-
17.092677920571653
,
19.24147072807235
,
-
15.14805323833401
,
-
18.075654829688737
,
-
0.9164404591894204
,
-
3.836646280336332
,
-
10.870298671273918
};
Quantize
(
in
,
in_t
->
ElementsNum
(),
input_scale
,
input_zp
,
reinterpret_cast
<
int8_t
*>
(
in_t
->
Data
()));
auto
in_quant_arg
=
new
mindspore
::
lite
::
tensor
::
QuantArg
();
in_quant_arg
->
zeroPoint
=
input_zp
;
in_quant_arg
->
scale
=
input_scale
;
in_t
->
AddQuantParam
(
*
in_quant_arg
);
inputs_
->
push_back
(
in_t
);
auto
weight_t
=
new
lite
::
tensor
::
Tensor
(
kNumberTypeInt8
,
{
1
,
3
,
8
},
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
weight_t
->
MallocData
();
float
weight
[]
=
{
0.3651070698591563
,
-
0.5856943921727129
,
-
0.7472032663840145
,
0.9489992871641959
,
-
0.8179490270358738
,
-
0.873058811259344
,
0.39876672713807215
,
-
0.1816769383004213
,
-
0.13584645926733696
,
-
0.7614673836659709
,
-
0.2535825872616164
,
-
0.05265760030895916
,
0.28558728305658754
,
0.15404213943520118
,
-
0.1634824450738006
,
-
0.5068199082730189
,
-
0.026961256849111326
,
-
0.1508441942453307
,
0.9375335677537737
,
0.3304690744194263
,
-
0.5091563780251127
,
0.029887336278646925
,
-
0.39540496207319276
,
0.46094065001445084
};
Quantize
(
weight
,
weight_t
->
ElementsNum
(),
weight_scale
,
weight_zp
,
reinterpret_cast
<
int8_t
*>
(
weight_t
->
Data
()));
auto
weight_quant_arg
=
new
mindspore
::
lite
::
tensor
::
QuantArg
();
weight_quant_arg
->
zeroPoint
=
weight_zp
;
weight_quant_arg
->
scale
=
weight_scale
;
weight_t
->
AddQuantParam
(
*
weight_quant_arg
);
inputs_
->
push_back
(
weight_t
);
auto
out_t
=
new
lite
::
tensor
::
Tensor
(
kNumberTypeInt8
,
{
1
,
2
,
3
},
schema
::
Format_NHWC
,
static_cast
<
schema
::
NodeType
>
(
1
));
out_t
->
MallocData
();
auto
output_quant_arg
=
new
mindspore
::
lite
::
tensor
::
QuantArg
();
output_quant_arg
->
zeroPoint
=
output_zp
;
output_quant_arg
->
scale
=
output_scale
;
out_t
->
AddQuantParam
(
*
output_quant_arg
);
outputs_
->
push_back
(
out_t
);
*
correct
=
reinterpret_cast
<
float
*>
(
malloc
(
out_t
->
ElementsNum
()
*
sizeof
(
float
)));
float
nchw_co
[]
=
{
-
0.912632942
,
4.08398056
,
-
25.385608673
,
2.720281124
,
7.745952606
,
20.893184662
};
memcpy
(
*
correct
,
nchw_co
,
out_t
->
ElementsNum
()
*
sizeof
(
float
));
matmal_param
->
b_transpose_
=
true
;
matmal_param
->
a_transpose_
=
false
;
matmal_param
->
has_bias_
=
false
;
return
out_t
->
ElementsNum
();
}
TEST_F
(
TestMatmulInt8
,
mmint8
)
{
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_
;
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs_
;
auto
matmul_param
=
new
MatMulParameter
();
float
*
correct
;
double
output_scale
;
int
output_zp
;
int
total_size
=
MMInt8TestInit
(
&
inputs_
,
&
outputs_
,
matmul_param
,
&
correct
,
&
output_scale
,
&
output_zp
);
auto
ctx
=
new
lite
::
Context
;
ctx
->
threadNum
=
2
;
kernel
::
MatmulInt8CPUKernel
*
mm
=
new
kernel
::
MatmulInt8CPUKernel
(
reinterpret_cast
<
OpParameter
*>
(
matmul_param
),
inputs_
,
outputs_
,
ctx
);
mm
->
Init
();
mm
->
Run
();
float
fout
[
6
]
=
{
0
};
Dequantize
(
reinterpret_cast
<
int8_t
*>
(
outputs_
[
0
]
->
Data
()),
outputs_
[
0
]
->
ElementsNum
(),
output_scale
,
output_zp
,
fout
);
CompareOutputData
(
fout
,
correct
,
6
,
0.3
);
delete
matmul_param
;
delete
mm
;
for
(
auto
t
:
inputs_
)
delete
t
;
for
(
auto
t
:
outputs_
)
delete
t
;
free
(
correct
);
}
}
// namespace mindspore
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录