Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
867afc71
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
867afc71
编写于
8月 08, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 08, 2020
浏览文件
操作
浏览文件
下载
差异文件
!4157 [MS][LITE] fix bug of arm cpu fp32 op: batchnorm, scale, pooling
Merge pull request !4157 from yangruoqi713/lite
上级
ac851f37
d995debc
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
49 addition
and
51 deletion
+49
-51
mindspore/lite/src/ops/pooling.cc
mindspore/lite/src/ops/pooling.cc
+4
-7
mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc
mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc
+16
-17
mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h
mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h
+3
-7
mindspore/lite/src/runtime/kernel/arm/fp32/nchw2nhwc.cc
mindspore/lite/src/runtime/kernel/arm/fp32/nchw2nhwc.cc
+6
-2
mindspore/lite/src/runtime/kernel/arm/fp32/nhwc2nchw.cc
mindspore/lite/src/runtime/kernel/arm/fp32/nhwc2nchw.cc
+6
-2
mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
+2
-1
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.cc
...spore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.cc
+6
-6
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.h
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.h
+4
-2
mindspore/lite/src/runtime/thread_pool.cc
mindspore/lite/src/runtime/thread_pool.cc
+0
-7
mindspore/lite/tools/converter/parser/caffe/caffe_pooling_parser.cc
...lite/tools/converter/parser/caffe/caffe_pooling_parser.cc
+2
-0
未找到文件。
mindspore/lite/src/ops/pooling.cc
浏览文件 @
867afc71
...
...
@@ -56,13 +56,11 @@ int Pooling::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tenso
}
else
{
auto
round_mode
=
pooling_prim
->
roundMode
();
if
(
round_mode
==
schema
::
RoundMode_FLOOR
)
{
output_h
=
std
::
floor
(
(
input_h
+
pad_u_
+
pad_d_
-
window_h
)
/
pooling_prim
->
strideH
()
+
1
)
;
output_w
=
std
::
floor
(
(
input_w
+
pad_l_
+
pad_r_
-
window_w
)
/
pooling_prim
->
strideW
()
+
1
)
;
output_h
=
std
::
floor
(
static_cast
<
float
>
(
input_h
+
pad_u_
+
pad_d_
-
window_h
)
/
pooling_prim
->
strideH
())
+
1
;
output_w
=
std
::
floor
(
static_cast
<
float
>
(
input_w
+
pad_l_
+
pad_r_
-
window_w
)
/
pooling_prim
->
strideW
())
+
1
;
}
else
if
(
round_mode
==
schema
::
RoundMode_CEIL
)
{
output_h
=
std
::
ceil
((
input_h
+
pooling_prim
->
padUp
()
+
pooling_prim
->
padDown
()
-
window_h
)
/
pooling_prim
->
strideH
()
+
1
);
output_w
=
std
::
ceil
(
(
input_w
+
pooling_prim
->
padLeft
()
+
pooling_prim
->
padRight
()
-
window_w
)
/
pooling_prim
->
strideW
()
+
1
);
output_h
=
std
::
ceil
(
static_cast
<
float
>
(
input_h
+
pad_u_
+
pad_d_
-
window_h
)
/
pooling_prim
->
strideH
())
+
1
;
output_w
=
std
::
ceil
(
static_cast
<
float
>
(
input_w
+
pad_l_
+
pad_r_
-
window_w
)
/
pooling_prim
->
strideW
())
+
1
;
}
else
{
MS_LOG
(
ERROR
)
<<
"unsupported round mode."
;
}
...
...
@@ -80,4 +78,3 @@ int Pooling::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tenso
return
RET_OK
;
}
}
// namespace mindspore::lite
mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc
浏览文件 @
867afc71
...
...
@@ -28,17 +28,23 @@ using mindspore::lite::RET_OK;
using
mindspore
::
schema
::
PrimitiveType_BatchNorm
;
namespace
mindspore
::
kernel
{
int
BatchnormCPUKernel
::
Init
()
{
return
RET_OK
;
}
int
BatchnormCPUKernel
::
Init
()
{
auto
input_shapes
=
inputs_
[
0
]
->
shape
();
auto
n_dim
=
input_shapes
.
size
();
batchnorm_param_
->
channel_
=
input_shapes
[
n_dim
-
1
];
batchnorm_param_
->
unit_
=
1
;
for
(
int
i
=
0
;
i
<
n_dim
-
1
;
i
++
)
{
batchnorm_param_
->
unit_
*=
input_shapes
[
i
];
}
batchnorm_param_
->
op_parameter_
.
thread_num_
=
MSMIN
(
batchnorm_param_
->
op_parameter_
.
thread_num_
,
batchnorm_param_
->
unit_
);
return
RET_OK
;
}
int
BatchnormCPUKernel
::
ReSize
()
{
return
RET_OK
;
}
int
BatchnormCPUKernel
::
DoExecute
(
int
tid
)
{
int
count
=
MSMIN
(
thread_unit_
,
units_
-
tid
*
thread_unit_
);
if
(
count
<=
0
)
{
return
RET_OK
;
}
int
offset
=
tid
*
thread_unit_
*
channel_
;
BatchNorm
(
in_addr_
+
offset
,
mean_addr_
,
var_addr_
,
count
,
channel_
,
batchnorm_param_
->
epsilon_
,
out_addr_
+
offset
);
int
BatchnormCPUKernel
::
DoExecute
(
int
task_id
)
{
BatchNorm
(
out_addr_
,
in_addr_
,
mean_addr_
,
var_addr_
,
task_id
,
batchnorm_param_
);
return
RET_OK
;
}
...
...
@@ -62,15 +68,8 @@ int BatchnormCPUKernel::Run() {
mean_addr_
=
reinterpret_cast
<
float
*>
(
inputs_
.
at
(
1
)
->
Data
());
var_addr_
=
reinterpret_cast
<
float
*>
(
inputs_
.
at
(
2
)
->
Data
());
out_addr_
=
reinterpret_cast
<
float
*>
(
outputs_
.
at
(
0
)
->
Data
());
auto
input_shapes
=
inputs_
[
0
]
->
shape
();
channel_
=
input_shapes
[
3
];
units_
=
1
;
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
units_
*=
input_shapes
[
i
];
}
thread_count_
=
MSMIN
(
thread_count_
,
units_
);
thread_unit_
=
UP_DIV
(
units_
,
thread_count_
);
int
ret
=
LiteBackendParallelLaunch
(
BatchNormRun
,
this
,
thread_count_
);
int
ret
=
LiteBackendParallelLaunch
(
BatchNormRun
,
this
,
batchnorm_param_
->
op_parameter_
.
thread_num_
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"BatchnormRun error error_code["
<<
ret
<<
"]"
;
return
ret
;
...
...
mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h
浏览文件 @
867afc71
...
...
@@ -30,10 +30,11 @@ class BatchnormCPUKernel : public LiteKernel {
BatchnormCPUKernel
(
OpParameter
*
parameter
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
inputs
,
const
std
::
vector
<
lite
::
tensor
::
Tensor
*>
&
outputs
,
const
Context
*
ctx
,
const
lite
::
Primitive
*
primitive
)
:
LiteKernel
(
parameter
,
inputs
,
outputs
,
ctx
,
primitive
),
ctx_
(
ctx
),
thread_count_
(
ctx
->
thread_num_
)
{
:
LiteKernel
(
parameter
,
inputs
,
outputs
,
ctx
,
primitive
)
{
opParameter
->
thread_num_
=
ctx
->
thread_num_
;
batchnorm_param_
=
reinterpret_cast
<
BatchNormParameter
*>
(
parameter
);
}
~
BatchnormCPUKernel
()
override
{
delete
batchnorm_param_
;
}
~
BatchnormCPUKernel
()
override
=
default
;
int
Init
()
override
;
int
ReSize
()
override
;
...
...
@@ -41,15 +42,10 @@ class BatchnormCPUKernel : public LiteKernel {
int
DoExecute
(
int
tid
);
private:
int
thread_count_
;
int
thread_unit_
;
int
units_
;
int
channel_
;
float
*
in_addr_
;
float
*
mean_addr_
;
float
*
var_addr_
;
float
*
out_addr_
;
const
Context
*
ctx_
;
BatchNormParameter
*
batchnorm_param_
;
};
}
// namespace mindspore::kernel
...
...
mindspore/lite/src/runtime/kernel/arm/fp32/nchw2nhwc.cc
浏览文件 @
867afc71
...
...
@@ -36,8 +36,12 @@ int Nchw2NhwcCPUKernel::Run() {
auto
input
=
inputs_
[
0
];
auto
output
=
outputs_
[
0
];
PackNCHWToNHWCFp32
(
input
->
Data
(),
output
->
Data
(),
output
->
Batch
(),
output
->
Height
()
*
output
->
Width
(),
output
->
Channel
());
if
(
input
->
shape
().
size
()
==
4
)
{
PackNCHWToNHWCFp32
(
input
->
Data
(),
output
->
Data
(),
output
->
Batch
(),
output
->
Height
()
*
output
->
Width
(),
output
->
Channel
());
}
else
{
memcpy
(
output
->
Data
(),
input
->
Data
(),
input
->
ElementsNum
()
*
sizeof
(
float
));
}
return
RET_OK
;
}
...
...
mindspore/lite/src/runtime/kernel/arm/fp32/nhwc2nchw.cc
浏览文件 @
867afc71
...
...
@@ -36,8 +36,12 @@ int Nhwc2NchwCPUKernel::Run() {
auto
input
=
inputs_
[
0
];
auto
output
=
outputs_
[
0
];
PackNHWCToNCHWFp32
(
input
->
Data
(),
output
->
Data
(),
output
->
Batch
(),
output
->
Height
()
*
output
->
Width
(),
output
->
Channel
());
if
(
input
->
shape
().
size
()
==
4
)
{
PackNHWCToNCHWFp32
(
input
->
Data
(),
output
->
Data
(),
output
->
Batch
(),
output
->
Height
()
*
output
->
Width
(),
output
->
Channel
());
}
else
{
memcpy
(
output
->
Data
(),
input
->
Data
(),
input
->
ElementsNum
()
*
sizeof
(
float
));
}
return
RET_OK
;
}
...
...
mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
浏览文件 @
867afc71
...
...
@@ -45,12 +45,13 @@ int ScaleCPUKernel::InitScaleOffset() {
}
if
(
inputs_
.
size
()
==
3
)
{
auto
offset_tensor
=
inputs_
.
at
(
1
);
auto
offset_tensor
=
inputs_
.
at
(
2
);
offset_
=
reinterpret_cast
<
float
*>
(
malloc
(
offset_tensor
->
ElementsNum
()
*
sizeof
(
float
)));
if
(
offset_
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Malloc buffer failed."
;
return
RET_ERROR
;
}
memcpy
(
offset_
,
offset_tensor
->
Data
(),
offset_tensor
->
ElementsNum
()
*
sizeof
(
float
));
param
->
has_offset_
=
true
;
}
else
{
offset_
=
nullptr
;
...
...
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.cc
浏览文件 @
867afc71
...
...
@@ -16,12 +16,12 @@
#include "src/runtime/kernel/arm/nnacl/fp32/batchnorm.h"
void
BatchNorm
(
const
float
*
input_ptr
,
const
float
*
mean_ptr
,
const
float
*
variance_ptr
,
int
units
,
int
channel
,
float
epsilon
,
float
*
output_ptr
)
{
for
(
int
u
=
0
;
u
<
units
;
u
++
)
{
for
(
int
c
=
0
;
c
<
channel
;
c
++
)
{
auto
variance_sqrt
=
sqrt
(
variance_ptr
[
c
]
+
epsilon
);
output_ptr
[
u
*
channel
+
c
]
=
(
input_ptr
[
u
*
channel
+
c
]
-
mean_ptr
[
c
])
/
variance_sqrt
;
void
BatchNorm
(
float
*
output_ptr
,
const
float
*
input_ptr
,
const
float
*
mean_ptr
,
const
float
*
variance_ptr
,
int
task_id
,
BatchNormParameter
*
param
)
{
for
(
int
u
=
task_id
;
u
<
param
->
unit_
;
u
+=
param
->
op_parameter_
.
thread_num_
)
{
for
(
int
c
=
0
;
c
<
param
->
channel_
;
c
++
)
{
auto
variance_sqrt
=
sqrt
(
variance_ptr
[
c
]
+
param
->
epsilon_
);
output_ptr
[
u
*
param
->
channel_
+
c
]
=
(
input_ptr
[
u
*
param
->
channel_
+
c
]
-
mean_ptr
[
c
])
/
variance_sqrt
;
}
}
}
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.h
浏览文件 @
867afc71
...
...
@@ -22,9 +22,11 @@
struct
BatchNormParameter
{
OpParameter
op_parameter_
;
float
epsilon_
;
int
unit_
;
int
channel_
;
};
void
BatchNorm
(
const
float
*
input_ptr
,
const
float
*
mean_ptr
,
const
float
*
variance_ptr
,
int
count
,
int
channel
,
float
epsilon
,
float
*
output_ptr
);
void
BatchNorm
(
float
*
output_ptr
,
const
float
*
input_ptr
,
const
float
*
mean_ptr
,
const
float
*
variance_ptr
,
int
task_id
,
BatchNormParameter
*
param
);
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FUSED_BATCHNORM_H_
mindspore/lite/src/runtime/thread_pool.cc
浏览文件 @
867afc71
...
...
@@ -245,8 +245,6 @@ bool ThreadPool::SetThreadPool() {
}
else
{
AddRunThread
(
localMaxThreadNums
);
}
MS_LOG
(
DEBUG
)
<<
"configThreadNums="
<<
configThreadNums
<<
", curThreadNums="
<<
curThreadNums
<<
", curThreadRunNums="
<<
curThreadRunNums
<<
", localMaxThreadNums="
<<
localMaxThreadNums
;
return
true
;
}
...
...
@@ -276,7 +274,6 @@ void ThreadPool::AddNewThread(int newNums) {
}
curThreadNums
+=
newNums
;
curThreadRunNums
+=
newNums
;
MS_LOG
(
DEBUG
)
<<
"add "
<<
newNums
<<
" thread"
;
}
bool
ThreadPool
::
SetThreadCpuBind
(
bool
ifBind
,
int
mode
,
bool
master
)
{
...
...
@@ -330,7 +327,6 @@ bool ThreadPool::AddTask(WorkFun &&worker, void *cdata, int numTask) {
}
bool
ThreadPool
::
DistributeTask
(
ThreadPoolTask
*
task
,
int
numTask
)
{
MS_LOG
(
DEBUG
)
<<
"numTask = "
<<
numTask
<<
", curThreadRunNums = "
<<
curThreadRunNums
;
auto
taskOri
=
*
task
;
if
(
numTask
>
curThreadRunNums
)
{
task
->
first
=
[
taskOri
,
numTask
,
this
](
int
task_id
,
TvmEnv
*
penv
,
void
*
cdata
)
->
int
{
...
...
@@ -370,12 +366,10 @@ bool ThreadPool::DistributeTask(ThreadPoolTask *task, int numTask) {
}
}
}
MS_LOG
(
DEBUG
)
<<
"finish "
<<
numTask
<<
" task successful"
;
return
CheckResult
();
}
void
ThreadPool
::
AddRunThread
(
int
num
)
{
MS_LOG
(
DEBUG
)
<<
"num="
<<
num
<<
", curThreadRunNums="
<<
curThreadRunNums
;
int
activeNums
=
num
-
curThreadRunNums
;
if
(
activeNums
<=
0
||
activateList
.
size
()
<
activeNums
)
{
return
;
...
...
@@ -389,7 +383,6 @@ void ThreadPool::AddRunThread(int num) {
}
void
ThreadPool
::
SubRunThread
(
int
num
)
{
MS_LOG
(
DEBUG
)
<<
"num="
<<
num
<<
", curThreadRunNums="
<<
curThreadRunNums
;
int
deactiveNums
=
curThreadRunNums
-
num
;
if
(
deactiveNums
<=
0
)
{
return
;
...
...
mindspore/lite/tools/converter/parser/caffe/caffe_pooling_parser.cc
浏览文件 @
867afc71
...
...
@@ -56,6 +56,8 @@ STATUS CaffePoolingParser::Parse(const caffe::LayerParameter &proto,
return
RET_ERROR
;
}
// default roundMode RoundMode_CEIL
attr
->
roundMode
=
schema
::
RoundMode_CEIL
;
if
(
poolingParam
.
has_round_mode
())
{
if
(
poolingParam
.
round_mode
()
==
caffe
::
PoolingParameter_RoundMode_FLOOR
)
{
attr
->
roundMode
=
schema
::
RoundMode_FLOOR
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录