Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
eaa28ac4
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
eaa28ac4
编写于
8月 06, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 06, 2020
浏览文件
操作
浏览文件
下载
差异文件
!4008 Add new hms ops of split and eltwise with type of int8
Merge pull request !4008 from liuwenhao/master
上级
dd68fb16
2ccb34d4
变更
19
展开全部
隐藏空白更改
内联
并排
Showing
19 changed file
with
1691 addition
and
141 deletion
+1691
-141
mindspore/lite/src/populate_parameter.cc
mindspore/lite/src/populate_parameter.cc
+1
-1
mindspore/lite/src/runtime/kernel/arm/base/split_base.cc
mindspore/lite/src/runtime/kernel/arm/base/split_base.cc
+136
-0
mindspore/lite/src/runtime/kernel/arm/base/split_base.h
mindspore/lite/src/runtime/kernel/arm/base/split_base.h
+50
-0
mindspore/lite/src/runtime/kernel/arm/fp32/split.cc
mindspore/lite/src/runtime/kernel/arm/fp32/split.cc
+4
-55
mindspore/lite/src/runtime/kernel/arm/fp32/split.h
mindspore/lite/src/runtime/kernel/arm/fp32/split.h
+3
-9
mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.cc
.../lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.cc
+27
-7
mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.h
...e/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.h
+34
-2
mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc
mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc
+92
-0
mindspore/lite/src/runtime/kernel/arm/int8/split_int8.h
mindspore/lite/src/runtime/kernel/arm/int8/split_int8.h
+47
-0
mindspore/lite/src/runtime/kernel/arm/opclib/int8/arithmetic_self_int8.cc
...rc/runtime/kernel/arm/opclib/int8/arithmetic_self_int8.cc
+242
-56
mindspore/lite/src/runtime/kernel/arm/opclib/int8/arithmetic_self_int8.h
...src/runtime/kernel/arm/opclib/int8/arithmetic_self_int8.h
+17
-1
mindspore/lite/src/runtime/kernel/arm/opclib/int8/split_int8.cc
...ore/lite/src/runtime/kernel/arm/opclib/int8/split_int8.cc
+73
-0
mindspore/lite/src/runtime/kernel/arm/opclib/int8/split_int8.h
...pore/lite/src/runtime/kernel/arm/opclib/int8/split_int8.h
+25
-0
mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h
...ite/src/runtime/kernel/arm/opclib/quantization/quantize.h
+10
-0
mindspore/lite/src/runtime/kernel/arm/opclib/split.cc
mindspore/lite/src/runtime/kernel/arm/opclib/split.cc
+1
-0
mindspore/lite/src/runtime/kernel/arm/opclib/split.h
mindspore/lite/src/runtime/kernel/arm/opclib/split.h
+1
-10
mindspore/lite/src/runtime/kernel/arm/opclib/split_parameter.h
...pore/lite/src/runtime/kernel/arm/opclib/split_parameter.h
+33
-0
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/arithmetic_self_int8_tests.cc
...src/runtime/kernel/arm/int8/arithmetic_self_int8_tests.cc
+590
-0
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/split_int8_tests.cc
...e/test/ut/src/runtime/kernel/arm/int8/split_int8_tests.cc
+305
-0
未找到文件。
mindspore/lite/src/populate_parameter.cc
浏览文件 @
eaa28ac4
...
...
@@ -47,7 +47,7 @@
#include "src/runtime/kernel/arm/opclib/pad_parameter.h"
#include "src/runtime/kernel/arm/opclib/fp32/fill.h"
#include "src/runtime/kernel/arm/opclib/transpose.h"
#include "src/runtime/kernel/arm/opclib/split.h"
#include "src/runtime/kernel/arm/opclib/split
_parameter
.h"
#include "src/runtime/kernel/arm/opclib/squeeze.h"
#include "src/runtime/kernel/arm/opclib/fp32/gather.h"
#include "src/runtime/kernel/arm/opclib/fp32/reverse.h"
...
...
mindspore/lite/src/runtime/kernel/arm/base/split_base.cc
0 → 100644
浏览文件 @
eaa28ac4
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/base/split_base.h"
#include <vector>
#include "src/runtime/kernel/arm/int8/split_int8.h"
#include "src/runtime/kernel/arm/fp32/split.h"
#include "schema/model_generated.h"
#include "src/kernel_factory.h"
#include "include/errorcode.h"
#include "include/context.h"
using
mindspore
::
lite
::
KernelRegistrar
;
using
mindspore
::
lite
::
RET_ERROR
;
using
mindspore
::
lite
::
RET_OK
;
using
mindspore
::
schema
::
PrimitiveType_Split
;
namespace
mindspore
::
kernel
{
int
SplitBaseCPUKernel
::
Init
()
{
auto
in_tensor
=
inputs_
.
front
();
auto
input_shape
=
in_tensor
->
shape
();
param
->
strides_
[
input_shape
.
size
()
-
1
]
=
1
;
for
(
int
i
=
input_shape
.
size
()
-
2
;
i
>=
0
;
i
--
)
{
param
->
strides_
[
i
]
=
param
->
strides_
[
i
+
1
]
*
input_shape
[
i
+
1
];
}
param
->
split_count_
=
param
->
strides_
[
0
]
*
input_shape
[
0
]
/
(
input_shape
[
param
->
split_dim_
]
*
param
->
strides_
[
param
->
split_dim_
]);
param
->
n_dims_
=
input_shape
.
size
();
if
(
param
->
split_sizes_
[
0
]
==
0
)
{
if
(
input_shape
[
param
->
split_dim_
]
%
param
->
num_split_
!=
0
)
{
MS_LOG
(
ERROR
)
<<
"Default split size is not usable."
;
return
RET_ERROR
;
}
int
split_size
=
input_shape
[
param
->
split_dim_
]
/
param
->
num_split_
;
for
(
int
i
=
0
;
i
<
param
->
num_split_
;
i
++
)
{
param
->
split_sizes_
[
i
]
=
split_size
;
}
}
num_unit_
=
param
->
split_count_
*
param
->
num_split_
;
thread_n_num_
=
MSMIN
(
thread_count_
,
num_unit_
);
thread_n_stride_
=
UP_DIV
(
num_unit_
,
thread_n_num_
);
return
RET_OK
;
}
kernel
::
LiteKernel
*
CpuSplitInt8KernelCreator
(
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
OpParameter
*
opParameter
,
const
Context
*
ctx
,
const
kernel
::
KernelKey
&
desc
)
{
if
(
opParameter
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Input opParameter is nullptr!"
;
return
nullptr
;
}
MS_ASSERT
(
desc
.
type
==
schema
::
PrimitiveType_Split
);
auto
*
kernel
=
new
(
std
::
nothrow
)
SplitInt8CPUKernel
(
opParameter
,
inputs
,
outputs
,
ctx
);
if
(
kernel
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"new SplitCPUKernel fail!"
;
return
nullptr
;
}
auto
ret
=
kernel
->
Init
();
if
(
ret
!=
RET_OK
)
{
delete
kernel
;
MS_LOG
(
ERROR
)
<<
"Init kernel failed, name: "
<<
opParameter
->
name_
<<
", type: "
<<
schema
::
EnumNamePrimitiveType
(
static_cast
<
schema
::
PrimitiveType
>
(
opParameter
->
type_
));
return
nullptr
;
}
return
kernel
;
}
kernel
::
LiteKernel
*
CpuSplitInt32KernelCreator
(
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
OpParameter
*
opParameter
,
const
Context
*
ctx
,
const
kernel
::
KernelKey
&
desc
)
{
if
(
opParameter
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Input opParameter is nullptr!"
;
return
nullptr
;
}
MS_ASSERT
(
desc
.
type
==
schema
::
PrimitiveType_Split
);
auto
*
kernel
=
new
(
std
::
nothrow
)
SplitCPUKernel
(
opParameter
,
inputs
,
outputs
,
ctx
);
if
(
kernel
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"new SplitCPUKernel fail!"
;
return
nullptr
;
}
auto
ret
=
kernel
->
Init
();
if
(
ret
!=
RET_OK
)
{
delete
kernel
;
MS_LOG
(
ERROR
)
<<
"Init kernel failed, name: "
<<
opParameter
->
name_
<<
", type: "
<<
schema
::
EnumNamePrimitiveType
(
static_cast
<
schema
::
PrimitiveType
>
(
opParameter
->
type_
));
return
nullptr
;
}
return
kernel
;
}
kernel
::
LiteKernel
*
CpuSplitFp32KernelCreator
(
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
OpParameter
*
opParameter
,
const
Context
*
ctx
,
const
kernel
::
KernelKey
&
desc
)
{
if
(
opParameter
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Input opParameter is nullptr!"
;
return
nullptr
;
}
MS_ASSERT
(
desc
.
type
==
schema
::
PrimitiveType_Split
);
auto
*
kernel
=
new
(
std
::
nothrow
)
SplitCPUKernel
(
opParameter
,
inputs
,
outputs
,
ctx
);
if
(
kernel
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"new SplitCPUKernel fail!"
;
return
nullptr
;
}
auto
ret
=
kernel
->
Init
();
if
(
ret
!=
RET_OK
)
{
delete
kernel
;
MS_LOG
(
ERROR
)
<<
"Init kernel failed, name: "
<<
opParameter
->
name_
<<
", type: "
<<
schema
::
EnumNamePrimitiveType
(
static_cast
<
schema
::
PrimitiveType
>
(
opParameter
->
type_
));
return
nullptr
;
}
return
kernel
;
}
REG_KERNEL
(
kCPU
,
kNumberTypeInt8
,
PrimitiveType_Split
,
CpuSplitInt8KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeInt32
,
PrimitiveType_Split
,
CpuSplitInt32KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeFloat32
,
PrimitiveType_Split
,
CpuSplitFp32KernelCreator
)
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/arm/base/split_base.h
0 → 100644
浏览文件 @
eaa28ac4
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SPLIT_BASE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SPLIT_BASE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/runtime/kernel/arm/opclib/split_parameter.h"
using
mindspore
::
lite
::
Context
;
namespace
mindspore
::
kernel
{
class
SplitBaseCPUKernel
:
public
LiteKernel
{
public:
SplitBaseCPUKernel
(
OpParameter
*
parameter
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
const
Context
*
ctx
)
:
LiteKernel
(
parameter
,
inputs
,
outputs
),
ctx_
(
ctx
),
thread_count_
(
ctx
->
thread_num_
)
{
param
=
reinterpret_cast
<
SplitParameter
*>
(
opParameter
);
}
~
SplitBaseCPUKernel
()
=
default
;
int
Init
()
override
;
int
ReSize
()
override
{
return
0
;
}
int
Run
()
override
{
return
0
;
}
protected:
int
thread_count_
;
const
Context
*
ctx_
;
int
thread_n_stride_
;
int
thread_n_num_
;
int
num_unit_
;
SplitParameter
*
param
;
};
}
// namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SPLIT_BASE_H_
mindspore/lite/src/runtime/kernel/arm/fp32/split.cc
浏览文件 @
eaa28ac4
...
...
@@ -14,11 +14,10 @@
* limitations under the License.
*/
#include <string.h>
#include <vector>
#include "src/runtime/kernel/arm/fp32/split.h"
#include "src/runtime/kernel/arm/base/split_base.h"
#include "src/runtime/kernel/arm/opclib/split.h"
#include "s
chema/model_generated
.h"
#include "s
rc/runtime/kernel/arm/opclib/split_parameter
.h"
#include "src/kernel_registry.h"
#include "include/errorcode.h"
#include "src/runtime/runtime_api.h"
...
...
@@ -32,38 +31,12 @@ using mindspore::schema::PrimitiveType_Split;
namespace
mindspore
::
kernel
{
int
SplitCPUKernel
::
Init
()
{
SplitBaseCPUKernel
::
Init
();
auto
in_tensor
=
inputs_
.
front
();
input_ptr_
=
reinterpret_cast
<
float
*>
(
in_tensor
->
Data
());
auto
input_shape
=
in_tensor
->
shape
();
auto
param
=
reinterpret_cast
<
SplitParameter
*>
(
opParameter
);
param
->
strides_
[
input_shape
.
size
()
-
1
]
=
1
;
for
(
int
i
=
input_shape
.
size
()
-
2
;
i
>=
0
;
i
--
)
{
param
->
strides_
[
i
]
=
param
->
strides_
[
i
+
1
]
*
input_shape
[
i
+
1
];
}
param
->
split_count_
=
param
->
strides_
[
0
]
*
input_shape
[
0
]
/
(
input_shape
[
param
->
split_dim_
]
*
param
->
strides_
[
param
->
split_dim_
]);
for
(
int
i
=
0
;
i
<
param
->
num_split_
;
i
++
)
{
output_ptr_
.
push_back
(
reinterpret_cast
<
float
*>
(
outputs_
.
at
(
i
)
->
Data
()));
}
param
->
n_dims_
=
input_shape
.
size
();
if
(
param
->
split_sizes_
[
0
]
==
0
)
{
if
(
input_shape
[
param
->
split_dim_
]
%
param
->
num_split_
!=
0
)
{
MS_LOG
(
ERROR
)
<<
"Default split size is not usable."
;
return
RET_ERROR
;
}
int
split_size
=
input_shape
[
param
->
split_dim_
]
/
param
->
num_split_
;
for
(
int
i
=
0
;
i
<
param
->
num_split_
;
i
++
)
{
param
->
split_sizes_
[
i
]
=
split_size
;
}
}
num_unit_
=
param
->
split_count_
*
param
->
num_split_
;
unit_size_
=
param
->
strides_
[
param
->
split_dim_
];
thread_n_num_
=
MSMIN
(
thread_num_
,
num_unit_
);
thread_n_stride_
=
UP_DIV
(
num_unit_
,
thread_n_num_
);
return
RET_OK
;
}
...
...
@@ -76,7 +49,7 @@ int SplitCPUKernel::Split(int task_id) {
}
int
thread_offset
=
task_id
*
thread_n_stride_
;
auto
ret
=
DoSplit
(
input_ptr_
,
output_ptr_
.
data
(),
inputs_
.
front
()
->
shape
().
data
(),
thread_offset
,
num_unit_thread
,
reinterpret_cast
<
SplitParameter
*>
(
opParameter
)
);
param
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Split error task_id["
<<
task_id
<<
"] error_code["
<<
ret
<<
"]"
;
return
RET_ERROR
;
...
...
@@ -103,28 +76,4 @@ int SplitCPUKernel::Run() {
return
RET_OK
;
}
kernel
::
LiteKernel
*
CpuSplitFp32KernelCreator
(
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
OpParameter
*
opParameter
,
const
lite
::
Context
*
ctx
,
const
kernel
::
KernelKey
&
desc
)
{
MS_ASSERT
(
opParameter
!=
nullptr
);
MS_ASSERT
(
desc
.
type
==
schema
::
PrimitiveType_Split
);
auto
*
kernel
=
new
(
std
::
nothrow
)
SplitCPUKernel
(
opParameter
,
inputs
,
outputs
,
ctx
);
if
(
kernel
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"New kernel fails."
;
return
nullptr
;
}
auto
ret
=
kernel
->
Init
();
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Init kernel failed, name: "
<<
opParameter
->
name_
<<
", type: "
<<
schema
::
EnumNamePrimitiveType
(
static_cast
<
schema
::
PrimitiveType
>
(
opParameter
->
type_
));
delete
kernel
;
return
nullptr
;
}
return
kernel
;
}
REG_KERNEL
(
kCPU
,
kNumberTypeFloat32
,
PrimitiveType_Split
,
CpuSplitFp32KernelCreator
)
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/arm/fp32/split.h
浏览文件 @
eaa28ac4
...
...
@@ -18,15 +18,15 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SPLIT_H_
#include <vector>
#include "src/runtime/kernel/arm/base/split_base.h"
#include "src/lite_kernel.h"
namespace
mindspore
::
kernel
{
class
SplitCPUKernel
:
public
Lite
Kernel
{
class
SplitCPUKernel
:
public
SplitBaseCPU
Kernel
{
public:
SplitCPUKernel
(
OpParameter
*
parameter
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
const
lite
::
Context
*
ctx
)
:
LiteKernel
(
parameter
,
inputs
,
outputs
),
thread_num_
(
ctx
->
thread_num_
)
{}
:
SplitBaseCPUKernel
(
parameter
,
inputs
,
outputs
,
ctx
)
{}
~
SplitCPUKernel
()
override
=
default
;
int
Init
()
override
;
...
...
@@ -35,15 +35,9 @@ class SplitCPUKernel : public LiteKernel {
int
Split
(
int
task_id
);
private:
int
thread_num_
;
int
thread_n_stride_
;
int
thread_n_num_
;
int
num_unit_
;
int
unit_size_
;
float
*
input_ptr_
;
std
::
vector
<
float
*>
output_ptr_
;
};
}
// namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SPLIT_H_
mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.cc
浏览文件 @
eaa28ac4
...
...
@@ -31,16 +31,28 @@ int ArithmeticSelfInt8CPUKernel::Init() {
int
ret
=
ReSize
();
auto
*
input_tensor
=
inputs_
.
at
(
kInputIndex
);
auto
in_quant_args
=
input_tensor
->
GetQuantParams
();
arithmeticSelfParameter
_
->
quant_arg_
.
in_args_
.
scale_
=
in_quant_args
.
front
().
scale
;
arithmeticSelfParameter_
->
quant_arg_
.
in_args_
.
zp_
=
in_quant_args
.
front
().
zeroPoint
;
para
_
->
quant_arg_
.
in_args_
.
scale_
=
in_quant_args
.
front
().
scale
;
para_
->
quant_arg_
.
in_args_
.
zp_
=
in_quant_args
.
front
().
zeroPoint
*
(
-
1
)
;
auto
*
out_tensor
=
outputs_
.
at
(
kOutputIndex
);
auto
out_quant_args
=
out_tensor
->
GetQuantParams
();
arithmeticSelfParameter_
->
quant_arg_
.
out_args_
.
scale_
=
out_quant_args
.
front
().
scale
;
arithmeticSelfParameter_
->
quant_arg_
.
out_args_
.
zp_
=
out_quant_args
.
front
().
zeroPoint
;
para_
->
quant_arg_
.
out_args_
.
scale_
=
out_quant_args
.
front
().
scale
;
para_
->
quant_arg_
.
out_args_
.
zp_
=
out_quant_args
.
front
().
zeroPoint
;
para_
->
quant_arg_
.
output_activation_max_
=
std
::
numeric_limits
<
int8_t
>::
max
();
para_
->
quant_arg_
.
output_activation_min_
=
std
::
numeric_limits
<
int8_t
>::
min
();
if
(
para_
->
op_parameter_
.
type_
==
PrimitiveType_Square
)
{
const
double
real_multiplier
=
(
para_
->
quant_arg_
.
in_args_
.
scale_
*
para_
->
quant_arg_
.
in_args_
.
scale_
)
/
para_
->
quant_arg_
.
out_args_
.
scale_
;
int
right_shift
=
0
;
QuantizeMultiplierSmallerThanOne
(
real_multiplier
,
&
para_
->
quant_arg_
.
output_multiplier_
,
&
right_shift
);
para_
->
quant_arg_
.
shift_left_
=
right_shift
<
0
?
-
right_shift
:
0
;
para_
->
quant_arg_
.
shift_right_
=
right_shift
>
0
?
right_shift
:
0
;
}
arithmeticSelfParameter_
->
quant_arg_
.
output_activation_max_
=
std
::
numeric_limits
<
int8_t
>::
max
();
arithmeticSelfParameter_
->
quant_arg_
.
output_activation_min_
=
std
::
numeric_limits
<
int8_t
>::
min
();
return
ret
;
}
...
...
@@ -68,7 +80,7 @@ int ArithmeticSelfInt8CPUKernel::DoArithmeticSelf(int task_id) {
}
int
offset
=
task_id
*
thread_sz_stride_
;
if
(
arithmeticSelf_run_
)
{
auto
ret
=
arithmeticSelf_run_
(
in_ptr_
+
offset
,
out_ptr_
+
offset
,
size
,
arithmeticSelfParameter
_
->
quant_arg_
);
auto
ret
=
arithmeticSelf_run_
(
in_ptr_
+
offset
,
out_ptr_
+
offset
,
size
,
para
_
->
quant_arg_
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Run failed, illegal input! "
;
return
ret
;
...
...
@@ -117,4 +129,12 @@ kernel::LiteKernel *CpuArithmeticSelfInt8KernelCreator(const std::vector<lite::t
REG_KERNEL
(
kCPU
,
kNumberTypeInt8
,
PrimitiveType_Round
,
CpuArithmeticSelfInt8KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeInt8
,
PrimitiveType_Floor
,
CpuArithmeticSelfInt8KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeInt8
,
PrimitiveType_Ceil
,
CpuArithmeticSelfInt8KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeInt8
,
PrimitiveType_Abs
,
CpuArithmeticSelfInt8KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeInt8
,
PrimitiveType_Sin
,
CpuArithmeticSelfInt8KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeInt8
,
PrimitiveType_Cos
,
CpuArithmeticSelfInt8KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeInt8
,
PrimitiveType_Log
,
CpuArithmeticSelfInt8KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeInt8
,
PrimitiveType_Sqrt
,
CpuArithmeticSelfInt8KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeInt8
,
PrimitiveType_Rsqrt
,
CpuArithmeticSelfInt8KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeInt8
,
PrimitiveType_Square
,
CpuArithmeticSelfInt8KernelCreator
)
REG_KERNEL
(
kCPU
,
kNumberTypeInt8
,
PrimitiveType_LogicalNot
,
CpuArithmeticSelfInt8KernelCreator
)
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.h
浏览文件 @
eaa28ac4
...
...
@@ -29,6 +29,14 @@ using mindspore::lite::Context;
using
mindspore
::
schema
::
PrimitiveType_Round
;
using
mindspore
::
schema
::
PrimitiveType_Floor
;
using
mindspore
::
schema
::
PrimitiveType_Ceil
;
using
mindspore
::
schema
::
PrimitiveType_Abs
;
using
mindspore
::
schema
::
PrimitiveType_Sin
;
using
mindspore
::
schema
::
PrimitiveType_Cos
;
using
mindspore
::
schema
::
PrimitiveType_Log
;
using
mindspore
::
schema
::
PrimitiveType_Sqrt
;
using
mindspore
::
schema
::
PrimitiveType_Rsqrt
;
using
mindspore
::
schema
::
PrimitiveType_Square
;
using
mindspore
::
schema
::
PrimitiveType_LogicalNot
;
namespace
mindspore
::
kernel
{
class
ArithmeticSelfInt8CPUKernel
:
public
LiteKernel
{
...
...
@@ -48,10 +56,34 @@ class ArithmeticSelfInt8CPUKernel : public LiteKernel {
case
PrimitiveType_Ceil
:
arithmeticSelf_run_
=
ElementCeil
;
break
;
case
PrimitiveType_Abs
:
arithmeticSelf_run_
=
ElementAbs
;
break
;
case
PrimitiveType_Sin
:
arithmeticSelf_run_
=
ElementSin
;
break
;
case
PrimitiveType_Cos
:
arithmeticSelf_run_
=
ElementCos
;
break
;
case
PrimitiveType_Log
:
arithmeticSelf_run_
=
ElementLog
;
break
;
case
PrimitiveType_Sqrt
:
arithmeticSelf_run_
=
ElementSqrt
;
break
;
case
PrimitiveType_Rsqrt
:
arithmeticSelf_run_
=
ElementRsqrt
;
break
;
case
PrimitiveType_Square
:
arithmeticSelf_run_
=
ElementSquare
;
break
;
case
PrimitiveType_LogicalNot
:
arithmeticSelf_run_
=
ElementLogicalNot
;
break
;
default:
break
;
}
arithmeticSelfParameter
_
=
reinterpret_cast
<
ArithmeticSelfParameter
*>
(
parameter
);
para
_
=
reinterpret_cast
<
ArithmeticSelfParameter
*>
(
parameter
);
}
~
ArithmeticSelfInt8CPUKernel
()
override
=
default
;
...
...
@@ -65,7 +97,7 @@ class ArithmeticSelfInt8CPUKernel : public LiteKernel {
int
thread_sz_count_
;
int
thread_sz_stride_
;
size_t
data_size_
;
ArithmeticSelfParameter
*
arithmeticSelfParameter
_
;
ArithmeticSelfParameter
*
para
_
;
ArithmeticSelfInt8Run
arithmeticSelf_run_
;
const
Context
*
ctx_
;
int8_t
*
in_ptr_
;
...
...
mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc
0 → 100644
浏览文件 @
eaa28ac4
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/int8/split_int8.h"
#include <limits>
#include "src/runtime/kernel/arm/opclib/split_parameter.h"
#include "src/runtime/kernel/arm/opclib/int8/split_int8.h"
#include "include/errorcode.h"
#include "src/runtime/runtime_api.h"
using
mindspore
::
kernel
::
KERNEL_ARCH
::
kCPU
;
using
mindspore
::
lite
::
RET_ERROR
;
using
mindspore
::
lite
::
RET_OK
;
namespace
mindspore
::
kernel
{
int
SplitInt8CPUKernel
::
Init
()
{
SplitBaseCPUKernel
::
Init
();
auto
in_tensor
=
inputs_
.
at
(
kInputIndex
);
input_ptr_
=
reinterpret_cast
<
int8_t
*>
(
in_tensor
->
Data
());
for
(
int
i
=
0
;
i
<
param
->
num_split_
;
i
++
)
{
output_ptr_
.
push_back
(
reinterpret_cast
<
int8_t
*>
(
outputs_
.
at
(
i
)
->
Data
()));
}
auto
in_quant_args
=
in_tensor
->
GetQuantParams
();
param
->
quant_arg_
.
in_args_
.
scale_
=
in_quant_args
.
front
().
scale
;
param
->
quant_arg_
.
in_args_
.
zp_
=
in_quant_args
.
front
().
zeroPoint
;
MS_ASSERT
(
param
->
num_split_
==
outputs_
.
size
());
for
(
int
i
=
0
;
i
<
param
->
num_split_
;
i
++
)
{
auto
*
out_tensor
=
outputs_
.
at
(
i
);
auto
out_quant_args
=
out_tensor
->
GetQuantParams
();
param
->
quant_arg_
.
out_args_
[
i
].
scale_
=
out_quant_args
.
front
().
scale
;
param
->
quant_arg_
.
out_args_
[
i
].
zp_
=
out_quant_args
.
front
().
zeroPoint
;
}
param
->
quant_arg_
.
output_activation_max_
=
std
::
numeric_limits
<
int8_t
>::
max
();
param
->
quant_arg_
.
output_activation_min_
=
std
::
numeric_limits
<
int8_t
>::
min
();
return
RET_OK
;
}
int
SplitInt8CPUKernel
::
ReSize
()
{
return
RET_OK
;
}
int
SplitInt8CPUKernel
::
Split
(
int
task_id
)
{
int
num_unit_thread
=
MSMIN
(
thread_n_stride_
,
num_unit_
-
task_id
*
thread_n_stride_
);
if
(
num_unit_thread
<=
0
)
{
return
RET_OK
;
}
int
thread_offset
=
task_id
*
thread_n_stride_
;
auto
ret
=
DoSplit
(
input_ptr_
,
output_ptr_
.
data
(),
inputs_
.
front
()
->
shape
().
data
(),
thread_offset
,
num_unit_thread
,
param
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Split error task_id["
<<
task_id
<<
"] error_code["
<<
ret
<<
"]"
;
return
RET_ERROR
;
}
return
RET_OK
;
}
int
SplitInt8Run
(
int
task_id
,
LiteParallelGroupEnv
*
penv
,
void
*
cdata
)
{
auto
g_kernel
=
reinterpret_cast
<
SplitInt8CPUKernel
*>
(
cdata
);
auto
ret
=
g_kernel
->
Split
(
task_id
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"SplitRun error task_id["
<<
task_id
<<
"] error_code["
<<
ret
<<
"]"
;
return
RET_ERROR
;
}
return
RET_OK
;
}
int
SplitInt8CPUKernel
::
Run
()
{
int
ret
=
LiteBackendParallelLaunch
(
SplitInt8Run
,
this
,
thread_n_num_
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"Scale error error_code["
<<
ret
<<
"]"
;
return
RET_ERROR
;
}
return
RET_OK
;
}
}
// namespace mindspore::kernel
mindspore/lite/src/runtime/kernel/arm/int8/split_int8.h
0 → 100644
浏览文件 @
eaa28ac4
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_SPLIT_INT8_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_SPLIT_INT8_H_
#include <vector>
#include "src/lite_kernel.h"
#include "include/context.h"
#include "src/runtime/kernel/arm/base/split_base.h"
#include "src/runtime/runtime_api.h"
using
mindspore
::
lite
::
Context
;
namespace
mindspore
::
kernel
{
class
SplitInt8CPUKernel
:
public
SplitBaseCPUKernel
{
public:
SplitInt8CPUKernel
(
OpParameter
*
parameter
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
const
Context
*
ctx
)
:
SplitBaseCPUKernel
(
parameter
,
inputs
,
outputs
,
ctx
)
{}
~
SplitInt8CPUKernel
()
=
default
;
int
Init
()
override
;
int
ReSize
()
override
;
int
Run
()
override
;
int
Split
(
int
tId
);
private:
int8_t
*
input_ptr_
;
std
::
vector
<
int8_t
*>
output_ptr_
;
};
}
// namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_SPLIT_INT8_H_
mindspore/lite/src/runtime/kernel/arm/opclib/int8/arithmetic_self_int8.cc
浏览文件 @
eaa28ac4
...
...
@@ -16,77 +16,263 @@
#include <math.h>
#include "src/runtime/kernel/arm/opclib/int8/arithmetic_self_int8.h"
#ifdef ENABLE_NEON
#include <arm_neon.h>
#include "src/runtime/kernel/arm/opclib/add_int8.h"
#endif
#include "src/runtime/kernel/arm/opclib/quantization/fixed_point.h"
int
ElementFloor
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
)
{
if
(
para
.
in_args_
.
scale_
==
para
.
out_args_
.
scale_
&&
para
.
in_args_
.
zp_
==
para
.
out_args_
.
zp_
)
{
for
(
int
i
=
0
;
i
<
element_size
;
i
++
)
{
output
[
i
]
=
floorf
(
input
[
i
]);
}
}
else
{
float
in_scale
=
para
.
in_args_
.
scale_
;
int32_t
in_zp
=
para
.
in_args_
.
zp_
;
float
out_scale
=
para
.
out_args_
.
scale_
;
int32_t
out_zp
=
para
.
out_args_
.
zp_
;
float
bias
=
-
in_zp
*
in_scale
;
for
(
int
i
=
0
;
i
<
element_size
;
i
++
)
{
int32_t
output_tmp
=
round
(
floorf
(
input
[
i
]
*
in_scale
+
bias
)
/
out_scale
)
+
out_zp
;
if
(
output_tmp
>
para
.
output_activation_max_
)
{
output
[
i
]
=
para
.
output_activation_max_
;
}
else
if
(
output_tmp
<
para
.
output_activation_min_
)
{
output
[
i
]
=
para
.
output_activation_min_
;
}
else
{
output
[
i
]
=
static_cast
<
int8_t
>
(
output_tmp
);
}
float
in_scale
=
para
.
in_args_
.
scale_
;
int32_t
in_zp
=
para
.
in_args_
.
zp_
;
float
out_scale
=
para
.
out_args_
.
scale_
;
int32_t
out_zp
=
para
.
out_args_
.
zp_
;
float
bias
=
in_zp
*
in_scale
;
for
(
int
i
=
0
;
i
<
element_size
;
i
++
)
{
int32_t
output_tmp
=
round
(
floorf
(
input
[
i
]
*
in_scale
+
bias
)
/
out_scale
)
+
out_zp
;
if
(
output_tmp
>
para
.
output_activation_max_
)
{
output
[
i
]
=
para
.
output_activation_max_
;
}
else
if
(
output_tmp
<
para
.
output_activation_min_
)
{
output
[
i
]
=
para
.
output_activation_min_
;
}
else
{
output
[
i
]
=
static_cast
<
int8_t
>
(
output_tmp
);
}
}
return
OPCLIB_OK
;
}
int
ElementRound
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
)
{
if
(
para
.
in_args_
.
scale_
==
para
.
out_args_
.
scale_
&&
para
.
in_args_
.
zp_
==
para
.
out_args_
.
zp_
)
{
for
(
int
i
=
0
;
i
<
element_size
;
i
++
)
{
output
[
i
]
=
round
(
input
[
i
]);
}
}
else
{
float
in_scale
=
para
.
in_args_
.
scale_
;
int32_t
in_zp
=
para
.
in_args_
.
zp_
;
float
out_scale
=
para
.
out_args_
.
scale_
;
int32_t
out_zp
=
para
.
out_args_
.
zp_
;
float
bias
=
-
in_zp
*
in_scale
;
for
(
int
i
=
0
;
i
<
element_size
;
i
++
)
{
int32_t
output_tmp
=
round
(
round
(
input
[
i
]
*
in_scale
+
bias
)
/
out_scale
)
+
out_zp
;
if
(
output_tmp
>
para
.
output_activation_max_
)
{
output
[
i
]
=
para
.
output_activation_max_
;
}
else
if
(
output_tmp
<
para
.
output_activation_min_
)
{
output
[
i
]
=
para
.
output_activation_min_
;
}
else
{
output
[
i
]
=
static_cast
<
int8_t
>
(
output_tmp
);
}
float
in_scale
=
para
.
in_args_
.
scale_
;
int32_t
in_zp
=
para
.
in_args_
.
zp_
;
float
out_scale
=
para
.
out_args_
.
scale_
;
int32_t
out_zp
=
para
.
out_args_
.
zp_
;
float
bias
=
in_zp
*
in_scale
;
for
(
int
i
=
0
;
i
<
element_size
;
i
++
)
{
int32_t
output_tmp
=
round
(
round
(
input
[
i
]
*
in_scale
+
bias
)
/
out_scale
)
+
out_zp
;
if
(
output_tmp
>
para
.
output_activation_max_
)
{
output
[
i
]
=
para
.
output_activation_max_
;
}
else
if
(
output_tmp
<
para
.
output_activation_min_
)
{
output
[
i
]
=
para
.
output_activation_min_
;
}
else
{
output
[
i
]
=
static_cast
<
int8_t
>
(
output_tmp
);
}
}
return
OPCLIB_OK
;
}
int
ElementCeil
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
)
{
if
(
para
.
in_args_
.
scale_
==
para
.
out_args_
.
scale_
&&
para
.
in_args_
.
zp_
==
para
.
out_args_
.
zp_
)
{
for
(
int
i
=
0
;
i
<
element_size
;
i
++
)
{
output
[
i
]
=
ceil
(
input
[
i
]);
float
in_scale
=
para
.
in_args_
.
scale_
;
int32_t
in_zp
=
para
.
in_args_
.
zp_
;
float
out_scale
=
para
.
out_args_
.
scale_
;
int32_t
out_zp
=
para
.
out_args_
.
zp_
;
float
bias
=
in_zp
*
in_scale
;
for
(
int
i
=
0
;
i
<
element_size
;
i
++
)
{
int32_t
output_tmp
=
round
(
ceil
(
input
[
i
]
*
in_scale
+
bias
)
/
out_scale
)
+
out_zp
;
if
(
output_tmp
>
para
.
output_activation_max_
)
{
output
[
i
]
=
para
.
output_activation_max_
;
}
else
if
(
output_tmp
<
para
.
output_activation_min_
)
{
output
[
i
]
=
para
.
output_activation_min_
;
}
else
{
output
[
i
]
=
static_cast
<
int8_t
>
(
output_tmp
);
}
}
return
OPCLIB_OK
;
}
int
ElementAbs
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
)
{
float
in_scale
=
para
.
in_args_
.
scale_
;
int32_t
in_zp
=
para
.
in_args_
.
zp_
;
float
out_scale
=
para
.
out_args_
.
scale_
;
int32_t
out_zp
=
para
.
out_args_
.
zp_
;
float
bias
=
in_zp
*
in_scale
;
for
(
int
i
=
0
;
i
<
element_size
;
i
++
)
{
int32_t
output_tmp
=
round
(
fabsf
(
input
[
i
]
*
in_scale
+
bias
)
/
out_scale
)
+
out_zp
;
if
(
output_tmp
>
para
.
output_activation_max_
)
{
output
[
i
]
=
para
.
output_activation_max_
;
}
else
if
(
output_tmp
<
para
.
output_activation_min_
)
{
output
[
i
]
=
para
.
output_activation_min_
;
}
else
{
output
[
i
]
=
static_cast
<
int8_t
>
(
output_tmp
);
}
}
return
OPCLIB_OK
;
}
int
ElementSin
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
)
{
float
in_scale
=
para
.
in_args_
.
scale_
;
int32_t
in_zp
=
para
.
in_args_
.
zp_
;
float
out_scale
=
para
.
out_args_
.
scale_
;
int32_t
out_zp
=
para
.
out_args_
.
zp_
;
float
bias
=
in_zp
*
in_scale
;
for
(
int
i
=
0
;
i
<
element_size
;
i
++
)
{
int32_t
output_tmp
=
round
(
sinf
(
input
[
i
]
*
in_scale
+
bias
)
/
out_scale
)
+
out_zp
;
if
(
output_tmp
>
para
.
output_activation_max_
)
{
output
[
i
]
=
para
.
output_activation_max_
;
}
else
if
(
output_tmp
<
para
.
output_activation_min_
)
{
output
[
i
]
=
para
.
output_activation_min_
;
}
else
{
output
[
i
]
=
static_cast
<
int8_t
>
(
output_tmp
);
}
}
return
OPCLIB_OK
;
}
int
ElementCos
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
)
{
float
in_scale
=
para
.
in_args_
.
scale_
;
int32_t
in_zp
=
para
.
in_args_
.
zp_
;
float
out_scale
=
para
.
out_args_
.
scale_
;
int32_t
out_zp
=
para
.
out_args_
.
zp_
;
float
bias
=
in_zp
*
in_scale
;
for
(
int
i
=
0
;
i
<
element_size
;
i
++
)
{
int32_t
output_tmp
=
round
(
cosf
(
input
[
i
]
*
in_scale
+
bias
)
/
out_scale
)
+
out_zp
;
if
(
output_tmp
>
para
.
output_activation_max_
)
{
output
[
i
]
=
para
.
output_activation_max_
;
}
else
if
(
output_tmp
<
para
.
output_activation_min_
)
{
output
[
i
]
=
para
.
output_activation_min_
;
}
else
{
output
[
i
]
=
static_cast
<
int8_t
>
(
output_tmp
);
}
}
return
OPCLIB_OK
;
}
int
ElementLog
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
)
{
float
in_scale
=
para
.
in_args_
.
scale_
;
int32_t
in_zp
=
para
.
in_args_
.
zp_
;
float
out_scale
=
para
.
out_args_
.
scale_
;
int32_t
out_zp
=
para
.
out_args_
.
zp_
;
float
bias
=
in_zp
*
in_scale
;
for
(
int
i
=
0
;
i
<
element_size
;
i
++
)
{
int32_t
output_tmp
=
round
(
logf
(
input
[
i
]
*
in_scale
+
bias
)
/
out_scale
)
+
out_zp
;
if
(
output_tmp
>
para
.
output_activation_max_
)
{
output
[
i
]
=
para
.
output_activation_max_
;
}
else
if
(
output_tmp
<
para
.
output_activation_min_
)
{
output
[
i
]
=
para
.
output_activation_min_
;
}
else
{
output
[
i
]
=
static_cast
<
int8_t
>
(
output_tmp
);
}
}
return
OPCLIB_OK
;
}
int
ElementSqrt
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
)
{
float
in_scale
=
para
.
in_args_
.
scale_
;
int32_t
in_zp
=
para
.
in_args_
.
zp_
;
float
out_scale
=
para
.
out_args_
.
scale_
;
int32_t
out_zp
=
para
.
out_args_
.
zp_
;
float
bias
=
in_zp
*
in_scale
;
for
(
int
i
=
0
;
i
<
element_size
;
i
++
)
{
float
input_f32
=
input
[
i
]
*
in_scale
+
bias
;
if
(
input_f32
<
0
)
{
return
OPCLIB_ERRCODE_SQRT_NEGATIVE
;
}
}
else
{
float
in_scale
=
para
.
in_args_
.
scale_
;
int32_t
in_zp
=
para
.
in_args_
.
zp_
;
float
out_scale
=
para
.
out_args_
.
scale_
;
int32_t
out_zp
=
para
.
out_args_
.
zp_
;
float
bias
=
-
in_zp
*
in_scale
;
for
(
int
i
=
0
;
i
<
element_size
;
i
++
)
{
int32_t
output_tmp
=
round
(
ceil
(
input
[
i
]
*
in_scale
+
bias
)
/
out_scale
)
+
out_zp
;
if
(
output_tmp
>
para
.
output_activation_max_
)
{
output
[
i
]
=
para
.
output_activation_max_
;
}
else
if
(
output_tmp
<
para
.
output_activation_min_
)
{
output
[
i
]
=
para
.
output_activation_min_
;
}
else
{
output
[
i
]
=
static_cast
<
int8_t
>
(
output_tmp
);
}
int32_t
output_tmp
=
round
(
sqrtf
(
input_f32
)
/
out_scale
)
+
out_zp
;
if
(
output_tmp
>
para
.
output_activation_max_
)
{
output
[
i
]
=
para
.
output_activation_max_
;
}
else
if
(
output_tmp
<
para
.
output_activation_min_
)
{
output
[
i
]
=
para
.
output_activation_min_
;
}
else
{
output
[
i
]
=
static_cast
<
int8_t
>
(
output_tmp
);
}
}
return
OPCLIB_OK
;
}
int
ElementRsqrt
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
)
{
float
in_scale
=
para
.
in_args_
.
scale_
;
int32_t
in_zp
=
para
.
in_args_
.
zp_
;
float
out_scale
=
para
.
out_args_
.
scale_
;
int32_t
out_zp
=
para
.
out_args_
.
zp_
;
float
bias
=
in_zp
*
in_scale
;
for
(
int
i
=
0
;
i
<
element_size
;
i
++
)
{
float
input_f32
=
input
[
i
]
*
in_scale
+
bias
;
if
(
input_f32
<=
0
)
{
return
OPCLIB_ERRCODE_RSQRT_NEGATIVE_OR_ZERO
;
}
int32_t
output_tmp
=
round
(
1.
f
/
(
sqrtf
(
input_f32
)
*
out_scale
))
+
out_zp
;
if
(
output_tmp
>
para
.
output_activation_max_
)
{
output
[
i
]
=
para
.
output_activation_max_
;
}
else
if
(
output_tmp
<
para
.
output_activation_min_
)
{
output
[
i
]
=
para
.
output_activation_min_
;
}
else
{
output
[
i
]
=
static_cast
<
int8_t
>
(
output_tmp
);
}
}
return
OPCLIB_OK
;
}
#ifdef ENABLE_NEON
int16x4_t
ClacSumHalfWord
(
int32x4_t
scaled_input
,
int32x4_t
left_shift_out_vec
,
int32x4_t
output_multiplier_vec
,
ArithSelfQuantArg
para
)
{
int32x4_t
input_scale
=
vmulq_s32
(
scaled_input
,
scaled_input
);
int32x4_t
raw_sum
=
RoundingDivideByPOTInt32x4
(
SaturatingRoundingDoublingHighMulInt32x4
(
vmulq_s32
(
input_scale
,
left_shift_out_vec
),
output_multiplier_vec
),
para
.
shift_right_
);
raw_sum
=
vaddq_s32
(
raw_sum
,
vdupq_n_s32
(
para
.
out_args_
.
zp_
));
raw_sum
=
vmaxq_s32
(
raw_sum
,
vdupq_n_s32
(
para
.
output_activation_min_
));
raw_sum
=
vminq_s32
(
raw_sum
,
vdupq_n_s32
(
para
.
output_activation_max_
));
return
vqmovn_s32
(
raw_sum
);
}
void
SquareInt8NEON
(
int8_t
*
input_data
,
int8_t
*
output_data
,
int64_t
element_size
,
ArithSelfQuantArg
para
,
int
*
index
)
{
int32x4_t
output_multiplier_vec
=
vdupq_n_s32
(
para
.
output_multiplier_
);
int32x4_t
left_shift_out_vec
=
vdupq_n_s32
(
1
<<
para
.
shift_left_
);
for
(;
(
*
index
)
<=
element_size
-
8
;
(
*
index
)
+=
8
)
{
int16x8_t
input_val
=
LoadAndAddOffset
(
input_data
,
*
index
,
para
.
in_args_
.
zp_
);
int32x4_t
input_low
=
vmovl_s16
(
vget_low_s16
(
input_val
));
int32x4_t
input_high
=
vmovl_s16
(
vget_high_s16
(
input_val
));
int16x4_t
sum_low
=
ClacSumHalfWord
(
input_low
,
left_shift_out_vec
,
output_multiplier_vec
,
para
);
int16x4_t
sum_high
=
ClacSumHalfWord
(
input_high
,
left_shift_out_vec
,
output_multiplier_vec
,
para
);
int16x8_t
res_s16
=
vcombine_s16
(
sum_low
,
sum_high
);
int8x8_t
res_u8_n0
=
vqmovn_s16
(
res_s16
);
vst1_s8
(
output_data
,
res_u8_n0
);
}
}
#endif
int
ElementSquare
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
)
{
int32_t
in_zp
=
para
.
in_args_
.
zp_
;
int32_t
out_zp
=
para
.
out_args_
.
zp_
;
int
index
=
0
;
#ifdef ENABLE_NEON
SquareInt8NEON
(
input
,
output
,
element_size
,
para
,
&
index
);
#endif
for
(;
index
<
element_size
;
index
++
)
{
const
int32_t
input_val
=
input
[
index
]
+
in_zp
;
int32_t
output_tmp
=
RoundingDivideByPOT
(
SaturatingRoundingDoublingHighMul
(
input_val
*
input_val
*
(
1
<<
para
.
shift_left_
),
para
.
output_multiplier_
),
para
.
shift_right_
);
output_tmp
+=
out_zp
;
if
(
output_tmp
>
para
.
output_activation_max_
)
{
output
[
index
]
=
para
.
output_activation_max_
;
}
else
if
(
output_tmp
<
para
.
output_activation_min_
)
{
output
[
index
]
=
para
.
output_activation_min_
;
}
else
{
output
[
index
]
=
static_cast
<
int8_t
>
(
output_tmp
);
}
}
return
OPCLIB_OK
;
}
int
ElementLogicalNot
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
)
{
float
in_scale
=
para
.
in_args_
.
scale_
;
int32_t
in_zp
=
para
.
in_args_
.
zp_
;
float
out_scale
=
para
.
out_args_
.
scale_
;
int32_t
out_zp
=
para
.
out_args_
.
zp_
;
float
bias
=
in_zp
*
in_scale
;
for
(
int
i
=
0
;
i
<
element_size
;
i
++
)
{
int32_t
output_tmp
=
round
(((
float
)(
!
(
bool
)(
input
[
i
]
*
in_scale
+
bias
)))
/
out_scale
)
+
out_zp
;
if
(
output_tmp
>
para
.
output_activation_max_
)
{
output
[
i
]
=
para
.
output_activation_max_
;
}
else
if
(
output_tmp
<
para
.
output_activation_min_
)
{
output
[
i
]
=
para
.
output_activation_min_
;
}
else
{
output
[
i
]
=
static_cast
<
int8_t
>
(
output_tmp
);
}
}
return
OPCLIB_OK
;
...
...
mindspore/lite/src/runtime/kernel/arm/opclib/int8/arithmetic_self_int8.h
浏览文件 @
eaa28ac4
...
...
@@ -27,6 +27,22 @@ int ElementRound(int8_t *input, int8_t *output, int element_size, ArithSelfQuant
int
ElementFloor
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
);
int
ElementCeil
(
int8_t
*
input
,
int8_t
*
output
,
int
number
,
ArithSelfQuantArg
para
);
int
ElementCeil
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
);
int
ElementAbs
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
);
int
ElementSin
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
);
int
ElementCos
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
);
int
ElementLog
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
);
int
ElementSqrt
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
);
int
ElementRsqrt
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
);
int
ElementSquare
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
);
int
ElementLogicalNot
(
int8_t
*
input
,
int8_t
*
output
,
int
element_size
,
ArithSelfQuantArg
para
);
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARITHMETIC_SELF_INT8_H_
mindspore/lite/src/runtime/kernel/arm/opclib/int8/split_int8.cc
0 → 100644
浏览文件 @
eaa28ac4
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/opclib/int8/split_int8.h"
#include "src/runtime/kernel/arm/opclib/split_parameter.h"
#include <string.h>
#include "src/runtime/kernel/arm/opclib/errorcode.h"
int
DoSplit
(
int8_t
*
in_data
,
int8_t
**
out_data
,
const
int
*
input_shape
,
int
offset
,
int
num_unit
,
SplitParameter
*
param
)
{
if
(
in_data
==
nullptr
||
out_data
==
nullptr
)
{
return
OPCLIB_ERR
;
}
int
num_split
=
param
->
num_split_
;
int
*
split_sizes
=
param
->
split_sizes_
;
int
*
strides
=
param
->
strides_
;
int
split_dim
=
param
->
split_dim_
;
int
in_stride
=
strides
[
split_dim
];
int
stride_per_split
=
in_stride
*
input_shape
[
split_dim
];
int
split_which
=
offset
%
num_split
;
int
split_times
=
offset
/
num_split
;
int8_t
*
src
=
in_data
+
split_times
*
stride_per_split
;
for
(
int
i
=
0
;
i
<
split_which
;
i
++
)
{
src
+=
split_sizes
[
i
]
*
in_stride
;
}
QuantArg
in_quant_arg
=
param
->
quant_arg_
.
in_args_
;
float
in_scale
=
in_quant_arg
.
scale_
;
int32_t
in_zp
=
in_quant_arg
.
zp_
;
QuantArg
*
out_quant_arg
=
param
->
quant_arg_
.
out_args_
;
for
(
int
i
=
offset
;
i
<
offset
+
num_unit
;
i
++
)
{
split_which
=
i
%
num_split
;
split_times
=
i
/
num_split
;
int
copy_size
=
split_sizes
[
split_which
]
*
in_stride
;
int8_t
*
dst
=
out_data
[
split_which
]
+
split_times
*
copy_size
;
float
out_scale
=
out_quant_arg
[
split_which
].
scale_
;
int32_t
out_zp
=
out_quant_arg
[
split_which
].
zp_
;
if
(
in_scale
==
out_scale
&&
in_zp
==
out_zp
)
{
(
void
)
memcpy
(
dst
,
src
,
copy_size
*
sizeof
(
int8_t
));
}
else
{
float
scale
=
in_scale
/
out_scale
;
float
bias
=
-
in_zp
*
scale
;
for
(
int
j
=
0
;
j
<
copy_size
;
j
++
)
{
int32_t
output_tmp
=
round
(
src
[
j
]
*
scale
+
bias
)
+
out_zp
;
if
(
output_tmp
>
param
->
quant_arg_
.
output_activation_max_
)
{
dst
[
j
]
=
param
->
quant_arg_
.
output_activation_max_
;
}
else
if
(
output_tmp
<
param
->
quant_arg_
.
output_activation_min_
)
{
dst
[
j
]
=
param
->
quant_arg_
.
output_activation_min_
;
}
else
{
dst
[
j
]
=
static_cast
<
int8_t
>
(
output_tmp
);
}
}
}
src
+=
copy_size
;
}
return
OPCLIB_OK
;
}
mindspore/lite/src/runtime/kernel/arm/opclib/int8/split_int8.h
0 → 100644
浏览文件 @
eaa28ac4
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_SPLIT_INT8_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_SPLIT_INT8_H_
#include "src/runtime/kernel/arm/opclib/op_base.h"
#include "src/runtime/kernel/arm/opclib/split_parameter.h"
int
DoSplit
(
int8_t
*
in_data
,
int8_t
**
out_data
,
const
int
*
input_shape
,
int
offset
,
int
num_unit
,
SplitParameter
*
split_param
);
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_SPLIT_INT8_H_
mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h
浏览文件 @
eaa28ac4
...
...
@@ -89,6 +89,16 @@ struct ArithSelfQuantArg {
QuantArg
out_args_
;
int
output_activation_min_
;
int
output_activation_max_
;
int
output_multiplier_
;
int
shift_left_
;
int
shift_right_
;
};
struct
SplitQuantArg
{
QuantArg
in_args_
;
QuantArg
out_args_
[
20
];
int
output_activation_min_
;
int
output_activation_max_
;
};
void
QuantizeMultiplier
(
double
double_multiplier
,
int32_t
*
quantized_multiplier
,
int
*
shift
);
...
...
mindspore/lite/src/runtime/kernel/arm/opclib/split.cc
浏览文件 @
eaa28ac4
...
...
@@ -15,6 +15,7 @@
*/
#include "src/runtime/kernel/arm/opclib/split.h"
#include "src/runtime/kernel/arm/opclib/split_parameter.h"
#include <string.h>
#include "src/runtime/kernel/arm/opclib/errorcode.h"
...
...
mindspore/lite/src/runtime/kernel/arm/opclib/split.h
浏览文件 @
eaa28ac4
...
...
@@ -18,16 +18,7 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_SPLIT_H_
#include "src/runtime/kernel/arm/opclib/op_base.h"
struct
SplitParameter
{
OpParameter
op_parameter_
;
int
num_split_
;
int
split_sizes_
[
20
]
=
{
0
};
int
strides_
[
8
];
int
split_dim_
;
int
n_dims_
;
int
split_count_
;
};
#include "src/runtime/kernel/arm/opclib/split_parameter.h"
int
DoSplit
(
float
*
in_data
,
float
**
out_data
,
const
int
*
input_shape
,
int
offset
,
int
num_unit
,
SplitParameter
*
split_param
);
...
...
mindspore/lite/src/runtime/kernel/arm/opclib/split_parameter.h
0 → 100644
浏览文件 @
eaa28ac4
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_SPLIT_PARAMETER_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_SPLIT_PARAMETER_H_
#include "src/runtime/kernel/arm/opclib/op_base.h"
struct
SplitParameter
{
OpParameter
op_parameter_
;
SplitQuantArg
quant_arg_
;
int
num_split_
;
int
split_sizes_
[
20
]
=
{
0
};
int
strides_
[
20
];
int
split_dim_
;
int
n_dims_
;
int
split_count_
;
};
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_SPLIT_PARAMETER_H_
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/arithmetic_self_int8_tests.cc
浏览文件 @
eaa28ac4
此差异已折叠。
点击以展开。
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/split_int8_tests.cc
0 → 100644
浏览文件 @
eaa28ac4
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include "utils/log_adapter.h"
#include "common/common_test.h"
#include "mindspore/lite/src/runtime/kernel/arm/opclib/split_parameter.h"
#include "mindspore/lite/src/kernel_registry.h"
#include "mindspore/lite/src/lite_kernel.h"
#include "mindspore/lite/src/ir/tensor.h"
namespace
mindspore
{
class
TestSplitInt8
:
public
mindspore
::
Common
{
public:
TestSplitInt8
()
{}
};
TEST_F
(
TestSplitInt8
,
Split_quant0_thread2
)
{
std
::
vector
<
int8_t
>
input1
=
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
};
std
::
vector
<
int
>
shape1
=
{
2
,
3
,
2
};
std
::
vector
<
int8_t
*>
input
(
1
,
nullptr
);
input
[
0
]
=
input1
.
data
();
const
int
output1_size
=
4
;
int8_t
output1
[
4
];
const
int
output2_size
=
8
;
int8_t
output2
[
8
];
std
::
vector
<
int
>
output1_shape
=
{
2
,
1
,
2
};
std
::
vector
<
int
>
output2_shape
=
{
2
,
2
,
2
};
lite
::
tensor
::
QuantArg
input_quant_arg
;
input_quant_arg
.
scale
=
1.0
;
input_quant_arg
.
zeroPoint
=
0
;
lite
::
tensor
::
QuantArg
output_quant_arg
;
output_quant_arg
.
scale
=
1.0
;
output_quant_arg
.
zeroPoint
=
0
;
TypeId
tid_int8
=
kNumberTypeInt8
;
lite
::
tensor
::
Tensor
*
input_tensor1
=
new
lite
::
tensor
::
Tensor
;
input_tensor1
->
SetData
(
input1
.
data
());
input_tensor1
->
set_shape
(
shape1
);
input_tensor1
->
AddQuantParam
(
input_quant_arg
);
input_tensor1
->
set_data_type
(
tid_int8
);
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_tensor
(
1
);
inputs_tensor
[
0
]
=
input_tensor1
;
lite
::
tensor
::
Tensor
*
output1_tensor
=
new
lite
::
tensor
::
Tensor
;
output1_tensor
->
SetData
(
output1
);
output1_tensor
->
set_shape
(
output1_shape
);
output1_tensor
->
AddQuantParam
(
output_quant_arg
);
output1_tensor
->
set_data_type
(
tid_int8
);
lite
::
tensor
::
Tensor
*
output2_tensor
=
new
lite
::
tensor
::
Tensor
;
output2_tensor
->
SetData
(
output2
);
output2_tensor
->
set_shape
(
output2_shape
);
output2_tensor
->
AddQuantParam
(
output_quant_arg
);
output2_tensor
->
set_data_type
(
tid_int8
);
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs_tensor
(
2
);
outputs_tensor
[
0
]
=
output1_tensor
;
outputs_tensor
[
1
]
=
output2_tensor
;
SplitParameter
op_param
;
op_param
.
op_parameter_
.
type_
=
schema
::
PrimitiveType_Split
;
op_param
.
num_split_
=
2
;
op_param
.
split_dim_
=
1
;
op_param
.
split_sizes_
[
0
]
=
1
;
op_param
.
split_sizes_
[
1
]
=
2
;
lite
::
Context
*
ctx
=
new
lite
::
Context
;
ctx
->
thread_num_
=
2
;
kernel
::
KernelKey
desc
=
{
kernel
::
KERNEL_ARCH
::
kCPU
,
kNumberTypeInt8
,
schema
::
PrimitiveType_Split
};
auto
creator
=
lite
::
KernelRegistry
::
GetInstance
()
->
GetCreator
(
desc
);
ASSERT_NE
(
creator
,
nullptr
);
kernel
::
LiteKernel
*
kernel
=
creator
(
inputs_tensor
,
outputs_tensor
,
reinterpret_cast
<
OpParameter
*>
(
&
op_param
),
ctx
,
desc
);
ASSERT_NE
(
kernel
,
nullptr
);
auto
output1_tensor_shape
=
output1_tensor
->
shape
();
auto
output2_tensor_shape
=
output2_tensor
->
shape
();
ASSERT_EQ
(
output1_tensor_shape
,
output1_shape
);
ASSERT_EQ
(
output2_tensor_shape
,
output2_shape
);
kernel
->
Run
();
std
::
vector
<
int8_t
>
except_result1
=
{
1
,
2
,
7
,
8
};
std
::
vector
<
int8_t
>
except_result2
=
{
3
,
4
,
5
,
6
,
9
,
10
,
11
,
12
};
PrintData
(
"output data"
,
output1
,
output1_size
);
PrintData
(
"output data shape"
,
output1_tensor_shape
.
data
(),
output1_tensor_shape
.
size
());
PrintData
(
"output data"
,
output2
,
output2_size
);
PrintData
(
"output data shape"
,
output2_tensor_shape
.
data
(),
output2_tensor_shape
.
size
());
CompareOutputData
(
output1
,
except_result1
.
data
(),
output1_size
,
0.000001
);
CompareOutputData
(
output2
,
except_result2
.
data
(),
output2_size
,
0.000001
);
input_tensor1
->
SetData
(
nullptr
);
output1_tensor
->
SetData
(
nullptr
);
output2_tensor
->
SetData
(
nullptr
);
delete
input_tensor1
;
delete
output1_tensor
;
delete
output2_tensor
;
delete
ctx
;
}
TEST_F
(
TestSplitInt8
,
Split_quant0_thread2_num
)
{
std
::
vector
<
int8_t
>
input1
=
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
};
std
::
vector
<
int
>
shape1
=
{
2
,
3
,
2
};
std
::
vector
<
int8_t
*>
input
(
1
,
nullptr
);
input
[
0
]
=
input1
.
data
();
const
int
output1_size
=
4
;
int8_t
output1
[
4
];
const
int
output2_size
=
4
;
int8_t
output2
[
4
];
const
int
output3_size
=
4
;
int8_t
output3
[
4
];
std
::
vector
<
int
>
output1_shape
=
{
2
,
1
,
2
};
std
::
vector
<
int
>
output2_shape
=
{
2
,
1
,
2
};
std
::
vector
<
int
>
output3_shape
=
{
2
,
1
,
2
};
lite
::
tensor
::
QuantArg
input_quant_arg
;
input_quant_arg
.
scale
=
1.0
;
input_quant_arg
.
zeroPoint
=
0
;
lite
::
tensor
::
QuantArg
output_quant_arg
;
output_quant_arg
.
scale
=
1.0
;
output_quant_arg
.
zeroPoint
=
0
;
TypeId
tid_int8
=
kNumberTypeInt8
;
lite
::
tensor
::
Tensor
*
input_tensor1
=
new
lite
::
tensor
::
Tensor
;
input_tensor1
->
SetData
(
input1
.
data
());
input_tensor1
->
set_shape
(
shape1
);
input_tensor1
->
AddQuantParam
(
input_quant_arg
);
input_tensor1
->
set_data_type
(
tid_int8
);
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_tensor
(
1
);
inputs_tensor
[
0
]
=
input_tensor1
;
lite
::
tensor
::
Tensor
*
output1_tensor
=
new
lite
::
tensor
::
Tensor
;
output1_tensor
->
SetData
(
output1
);
output1_tensor
->
set_shape
(
output1_shape
);
output1_tensor
->
AddQuantParam
(
output_quant_arg
);
output1_tensor
->
set_data_type
(
tid_int8
);
lite
::
tensor
::
Tensor
*
output2_tensor
=
new
lite
::
tensor
::
Tensor
;
output2_tensor
->
SetData
(
output2
);
output2_tensor
->
set_shape
(
output2_shape
);
output2_tensor
->
AddQuantParam
(
output_quant_arg
);
output2_tensor
->
set_data_type
(
tid_int8
);
lite
::
tensor
::
Tensor
*
output3_tensor
=
new
lite
::
tensor
::
Tensor
;
output3_tensor
->
SetData
(
output3
);
output3_tensor
->
set_shape
(
output3_shape
);
output3_tensor
->
AddQuantParam
(
output_quant_arg
);
output3_tensor
->
set_data_type
(
tid_int8
);
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs_tensor
(
3
);
outputs_tensor
[
0
]
=
output1_tensor
;
outputs_tensor
[
1
]
=
output2_tensor
;
outputs_tensor
[
2
]
=
output3_tensor
;
SplitParameter
op_param
;
op_param
.
op_parameter_
.
type_
=
schema
::
PrimitiveType_Split
;
op_param
.
num_split_
=
3
;
op_param
.
split_dim_
=
1
;
lite
::
Context
*
ctx
=
new
lite
::
Context
;
ctx
->
thread_num_
=
2
;
kernel
::
KernelKey
desc
=
{
kernel
::
KERNEL_ARCH
::
kCPU
,
kNumberTypeInt8
,
schema
::
PrimitiveType_Split
};
auto
creator
=
lite
::
KernelRegistry
::
GetInstance
()
->
GetCreator
(
desc
);
ASSERT_NE
(
creator
,
nullptr
);
kernel
::
LiteKernel
*
kernel
=
creator
(
inputs_tensor
,
outputs_tensor
,
reinterpret_cast
<
OpParameter
*>
(
&
op_param
),
ctx
,
desc
);
ASSERT_NE
(
kernel
,
nullptr
);
auto
output1_tensor_shape
=
output1_tensor
->
shape
();
auto
output2_tensor_shape
=
output2_tensor
->
shape
();
auto
output3_tensor_shape
=
output3_tensor
->
shape
();
ASSERT_EQ
(
output1_tensor_shape
,
output1_shape
);
ASSERT_EQ
(
output2_tensor_shape
,
output2_shape
);
ASSERT_EQ
(
output3_tensor_shape
,
output3_shape
);
kernel
->
Run
();
std
::
vector
<
int8_t
>
except_result1
=
{
1
,
2
,
7
,
8
};
std
::
vector
<
int8_t
>
except_result2
=
{
3
,
4
,
9
,
10
};
std
::
vector
<
int8_t
>
except_result3
=
{
5
,
6
,
11
,
12
};
PrintData
(
"output data"
,
output1
,
output1_size
);
PrintData
(
"output data shape"
,
output1_tensor_shape
.
data
(),
output1_tensor_shape
.
size
());
PrintData
(
"output data"
,
output2
,
output2_size
);
PrintData
(
"output data shape"
,
output2_tensor_shape
.
data
(),
output2_tensor_shape
.
size
());
PrintData
(
"output data"
,
output3
,
output3_size
);
PrintData
(
"output data shape"
,
output3_tensor_shape
.
data
(),
output3_tensor_shape
.
size
());
CompareOutputData
(
output1
,
except_result1
.
data
(),
output1_size
,
0.000001
);
CompareOutputData
(
output2
,
except_result2
.
data
(),
output2_size
,
0.000001
);
CompareOutputData
(
output3
,
except_result3
.
data
(),
output3_size
,
0.000001
);
input_tensor1
->
SetData
(
nullptr
);
output1_tensor
->
SetData
(
nullptr
);
output2_tensor
->
SetData
(
nullptr
);
output3_tensor
->
SetData
(
nullptr
);
delete
input_tensor1
;
delete
output1_tensor
;
delete
output2_tensor
;
delete
output3_tensor
;
delete
ctx
;
}
TEST_F
(
TestSplitInt8
,
Split_quant1_thread2_num
)
{
std
::
vector
<
int8_t
>
input1
=
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
};
std
::
vector
<
int
>
shape1
=
{
2
,
3
,
2
};
std
::
vector
<
int8_t
*>
input
(
1
,
nullptr
);
input
[
0
]
=
input1
.
data
();
const
int
output1_size
=
4
;
int8_t
output1
[
4
];
const
int
output2_size
=
4
;
int8_t
output2
[
4
];
const
int
output3_size
=
4
;
int8_t
output3
[
4
];
std
::
vector
<
int
>
output1_shape
=
{
2
,
1
,
2
};
std
::
vector
<
int
>
output2_shape
=
{
2
,
1
,
2
};
std
::
vector
<
int
>
output3_shape
=
{
2
,
1
,
2
};
lite
::
tensor
::
QuantArg
input_quant_arg
;
input_quant_arg
.
scale
=
1.0
;
input_quant_arg
.
zeroPoint
=
0
;
lite
::
tensor
::
QuantArg
output_quant_arg
;
output_quant_arg
.
scale
=
2.0
;
output_quant_arg
.
zeroPoint
=
0
;
TypeId
tid_int8
=
kNumberTypeInt8
;
lite
::
tensor
::
Tensor
*
input_tensor1
=
new
lite
::
tensor
::
Tensor
;
input_tensor1
->
SetData
(
input1
.
data
());
input_tensor1
->
set_shape
(
shape1
);
input_tensor1
->
AddQuantParam
(
input_quant_arg
);
input_tensor1
->
set_data_type
(
tid_int8
);
std
::
vector
<
lite
::
tensor
::
Tensor
*>
inputs_tensor
(
1
);
inputs_tensor
[
0
]
=
input_tensor1
;
lite
::
tensor
::
Tensor
*
output1_tensor
=
new
lite
::
tensor
::
Tensor
;
output1_tensor
->
SetData
(
output1
);
output1_tensor
->
set_shape
(
output1_shape
);
output1_tensor
->
AddQuantParam
(
output_quant_arg
);
output1_tensor
->
set_data_type
(
tid_int8
);
lite
::
tensor
::
Tensor
*
output2_tensor
=
new
lite
::
tensor
::
Tensor
;
output2_tensor
->
SetData
(
output2
);
output2_tensor
->
set_shape
(
output2_shape
);
output2_tensor
->
AddQuantParam
(
output_quant_arg
);
output2_tensor
->
set_data_type
(
tid_int8
);
lite
::
tensor
::
Tensor
*
output3_tensor
=
new
lite
::
tensor
::
Tensor
;
output3_tensor
->
SetData
(
output3
);
output3_tensor
->
set_shape
(
output3_shape
);
output3_tensor
->
AddQuantParam
(
output_quant_arg
);
output3_tensor
->
set_data_type
(
tid_int8
);
std
::
vector
<
lite
::
tensor
::
Tensor
*>
outputs_tensor
(
3
);
outputs_tensor
[
0
]
=
output1_tensor
;
outputs_tensor
[
1
]
=
output2_tensor
;
outputs_tensor
[
2
]
=
output3_tensor
;
SplitParameter
op_param
;
op_param
.
op_parameter_
.
type_
=
schema
::
PrimitiveType_Split
;
op_param
.
num_split_
=
3
;
op_param
.
split_dim_
=
1
;
lite
::
Context
*
ctx
=
new
lite
::
Context
;
ctx
->
thread_num_
=
2
;
kernel
::
KernelKey
desc
=
{
kernel
::
KERNEL_ARCH
::
kCPU
,
kNumberTypeInt8
,
schema
::
PrimitiveType_Split
};
auto
creator
=
lite
::
KernelRegistry
::
GetInstance
()
->
GetCreator
(
desc
);
ASSERT_NE
(
creator
,
nullptr
);
kernel
::
LiteKernel
*
kernel
=
creator
(
inputs_tensor
,
outputs_tensor
,
reinterpret_cast
<
OpParameter
*>
(
&
op_param
),
ctx
,
desc
);
ASSERT_NE
(
kernel
,
nullptr
);
auto
output1_tensor_shape
=
output1_tensor
->
shape
();
auto
output2_tensor_shape
=
output2_tensor
->
shape
();
auto
output3_tensor_shape
=
output3_tensor
->
shape
();
ASSERT_EQ
(
output1_tensor_shape
,
output1_shape
);
ASSERT_EQ
(
output2_tensor_shape
,
output2_shape
);
ASSERT_EQ
(
output3_tensor_shape
,
output3_shape
);
kernel
->
Run
();
std
::
vector
<
int8_t
>
except_result1
=
{
1
,
1
,
4
,
4
};
std
::
vector
<
int8_t
>
except_result2
=
{
2
,
2
,
5
,
5
};
std
::
vector
<
int8_t
>
except_result3
=
{
3
,
3
,
6
,
6
};
PrintData
(
"output data"
,
output1
,
output1_size
);
PrintData
(
"output data shape"
,
output1_tensor_shape
.
data
(),
output1_tensor_shape
.
size
());
PrintData
(
"output data"
,
output2
,
output2_size
);
PrintData
(
"output data shape"
,
output2_tensor_shape
.
data
(),
output2_tensor_shape
.
size
());
PrintData
(
"output data"
,
output3
,
output3_size
);
PrintData
(
"output data shape"
,
output3_tensor_shape
.
data
(),
output3_tensor_shape
.
size
());
CompareOutputData
(
output1
,
except_result1
.
data
(),
output1_size
,
0.000001
);
CompareOutputData
(
output2
,
except_result2
.
data
(),
output2_size
,
0.000001
);
CompareOutputData
(
output3
,
except_result3
.
data
(),
output3_size
,
0.000001
);
input_tensor1
->
SetData
(
nullptr
);
output1_tensor
->
SetData
(
nullptr
);
output2_tensor
->
SetData
(
nullptr
);
output3_tensor
->
SetData
(
nullptr
);
delete
input_tensor1
;
delete
output1_tensor
;
delete
output2_tensor
;
delete
output3_tensor
;
delete
ctx
;
}
}
// namespace mindspore
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录