Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
3785aab8
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
3785aab8
编写于
3月 28, 2020
作者:
J
jackzhang235
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support mlu set_input_layout
上级
12a1a095
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
81 addition
and
35 deletion
+81
-35
lite/api/paddle_api.cc
lite/api/paddle_api.cc
+6
-7
lite/api/paddle_api.h
lite/api/paddle_api.h
+6
-7
lite/api/python/pybind/pybind.cc
lite/api/python/pybind/pybind.cc
+6
-5
lite/core/device_info.cc
lite/core/device_info.cc
+2
-1
lite/core/device_info.h
lite/core/device_info.h
+1
-1
lite/core/mir/mlu_postprocess_pass.cc
lite/core/mir/mlu_postprocess_pass.cc
+17
-8
lite/core/mir/mlu_postprocess_pass.h
lite/core/mir/mlu_postprocess_pass.h
+1
-1
lite/kernels/mlu/bridges/utility.h
lite/kernels/mlu/bridges/utility.h
+1
-1
lite/kernels/mlu/layout_compute.cc
lite/kernels/mlu/layout_compute.cc
+17
-0
lite/kernels/mlu/layout_compute.h
lite/kernels/mlu/layout_compute.h
+24
-4
未找到文件。
lite/api/paddle_api.cc
浏览文件 @
3785aab8
...
...
@@ -203,23 +203,22 @@ void ConfigBase::set_threads(int threads) {
#endif
}
void
CxxConfig
::
mlu_set_mlu_core_version
(
lite_api
::
MLUCoreVersion
core_version
)
{
void
CxxConfig
::
set_mlu_core_version
(
lite_api
::
MLUCoreVersion
core_version
)
{
mlu_core_version_
=
core_version
;
}
void
CxxConfig
::
mlu_
set_mlu_core_number
(
int
core_number
)
{
void
CxxConfig
::
set_mlu_core_number
(
int
core_number
)
{
mlu_core_number_
=
core_number
;
}
void
CxxConfig
::
mlu_set_input_layout
()
(
DataLayoutType
layout
)
{
void
CxxConfig
::
set_mlu_input_layout
(
DataLayoutType
layout
)
{
mlu_input_layout_
=
layout
;
}
void
CxxConfig
::
mlu_set
_use_first_conv
(
bool
use_first_conv
)
{
void
CxxConfig
::
set_mlu
_use_first_conv
(
bool
use_first_conv
)
{
mlu_use_first_conv_
=
use_first_conv
;
}
void
CxxConfig
::
mlu_set
_first_conv_mean
(
const
std
::
vector
<
float
>
&
mean
)
{
void
CxxConfig
::
set_mlu
_first_conv_mean
(
const
std
::
vector
<
float
>
&
mean
)
{
mlu_first_conv_mean_
=
mean
;
}
void
CxxConfig
::
mlu_set
_first_conv_std
(
const
std
::
vector
<
float
>
&
std
)
{
void
CxxConfig
::
set_mlu
_first_conv_std
(
const
std
::
vector
<
float
>
&
std
)
{
mlu_first_conv_std_
=
std
;
}
lite_api
::
MLUCoreVersion
CxxConfig
::
mlu_core_version
()
const
{
...
...
lite/api/paddle_api.h
浏览文件 @
3785aab8
...
...
@@ -136,7 +136,6 @@ class LITE_API CxxConfig : public ConfigBase {
#ifdef LITE_WITH_X86
int
x86_math_library_math_threads_
=
1
;
#endif
lite_api
::
MLUCoreVersion
mlu_core_version_
{
lite_api
::
MLUCoreVersion
::
MLU_270
};
int
mlu_core_number_
{
1
};
DataLayoutType
mlu_input_layout_
{
DATALAYOUT
(
kNCHW
)};
...
...
@@ -171,12 +170,12 @@ class LITE_API CxxConfig : public ConfigBase {
}
#endif
void
mlu_
set_mlu_core_version
(
lite_api
::
MLUCoreVersion
core_version
);
void
mlu_
set_mlu_core_number
(
int
core_number
);
void
mlu_set_input_layout
()
(
DataLayoutType
layout
);
void
mlu_set
_use_first_conv
(
bool
use_first_conv
);
void
mlu_set
_first_conv_mean
(
const
std
::
vector
<
float
>&
mean
);
void
mlu_set
_first_conv_std
(
const
std
::
vector
<
float
>&
std
);
void
set_mlu_core_version
(
lite_api
::
MLUCoreVersion
core_version
);
void
set_mlu_core_number
(
int
core_number
);
void
set_mlu_input_layout
(
DataLayoutType
layout
);
void
set_mlu
_use_first_conv
(
bool
use_first_conv
);
void
set_mlu
_first_conv_mean
(
const
std
::
vector
<
float
>&
mean
);
void
set_mlu
_first_conv_std
(
const
std
::
vector
<
float
>&
std
);
lite_api
::
MLUCoreVersion
mlu_core_version
()
const
;
int
mlu_core_number
()
const
;
...
...
lite/api/python/pybind/pybind.cc
浏览文件 @
3785aab8
...
...
@@ -128,11 +128,12 @@ void BindLiteCxxConfig(py::module *m) {
.
def
(
"power_mode"
,
&
CxxConfig
::
power_mode
);
#endif
#ifdef LITE_WITH_MLU
cxx_config
.
def
(
"set_use_firstconv"
,
&
CxxConfig
::
set_use_firstconv
)
.
def
(
"set_mean"
,
&
CxxConfig
::
set_mean
)
.
def
(
"set_std"
,
&
CxxConfig
::
set_std
)
.
def
(
"set_mlu_core_version"
,
&
CxxConfig
::
set_mlu_core_version
)
.
def
(
"set_mlu_core_number"
,
&
CxxConfig
::
set_mlu_core_number
);
cxx_config
.
def
(
"set_mlu_core_version"
,
&
CxxConfig
::
set_mlu_core_version
)
.
def
(
"set_mlu_core_number"
,
&
CxxConfig
::
set_mlu_core_number
)
.
def
(
"set_mlu_input_layout"
,
&
CxxConfig
::
set_mlu_input_layout
)
.
def
(
"set_mlu_use_first_conv"
,
&
CxxConfig
::
set_mlu_use_first_conv
)
.
def
(
"set_mlu_first_conv_mean"
,
&
CxxConfig
::
set_mlu_first_conv_mean
)
.
def
(
"set_mlu_first_conv_std"
,
&
CxxConfig
::
set_mlu_first_conv_std
);
#endif
}
...
...
lite/core/device_info.cc
浏览文件 @
3785aab8
...
...
@@ -72,6 +72,7 @@ thread_local int DeviceInfo::mlu_core_number_{1};
thread_local
bool
DeviceInfo
::
use_first_conv_
{
false
};
thread_local
std
::
vector
<
float
>
DeviceInfo
::
mean_vec_
;
thread_local
std
::
vector
<
float
>
DeviceInfo
::
std_vec_
;
thread_local
DataLayoutType
DeviceInfo
::
input_layout_
{
DATALAYOUT
(
kNCHW
)};
#endif
#ifdef TARGET_IOS
...
...
@@ -1123,7 +1124,7 @@ const std::vector<float>& DeviceInfo::MeanVec() const { return mean_vec_; }
const
std
::
vector
<
float
>&
DeviceInfo
::
StdVec
()
const
{
return
std_vec_
;
}
const
DataLayoutType
InputLayout
()
const
{
return
input_layout_
;
}
DataLayoutType
DeviceInfo
::
InputLayout
()
const
{
return
input_layout_
;
}
#endif // LITE_WITH_MLU
...
...
lite/core/device_info.h
浏览文件 @
3785aab8
...
...
@@ -67,7 +67,7 @@ class DeviceInfo {
bool
UseFirstConv
();
const
std
::
vector
<
float
>&
MeanVec
()
const
;
const
std
::
vector
<
float
>&
StdVec
()
const
;
const
DataLayoutType
InputLayout
()
const
;
DataLayoutType
InputLayout
()
const
;
#endif
void
SetCache
(
int
l1size
,
int
l2size
,
int
l3size
);
void
SetArch
(
ARMArch
arch
)
{
arch_
=
arch
;
}
...
...
lite/core/mir/mlu_postprocess_pass.cc
浏览文件 @
3785aab8
...
...
@@ -74,7 +74,9 @@ Node* MLUPostprocessPass::InsertCastBefore(const std::string& op_type,
const
Type
*
in_arg_ty
=
kernel
->
GetInputDeclType
(
"Input"
);
const
Type
*
out_arg_ty
=
kernel
->
GetOutputDeclType
(
"Out"
);
if
(
DataLayoutCompatible
(
*
in_arg_ty
,
*
cur_node
->
AsArg
().
type
)
&&
DataLayoutCompatible
(
*
out_arg_ty
,
*
cast_type
))
{
DataLayoutCompatible
(
*
out_arg_ty
,
*
cast_type
)
&&
// for first conv
PrecisionCompatibleTo
(
*
in_arg_ty
,
*
cur_node
->
AsArg
().
type
))
{
is_found
=
true
;
}
}
else
if
(
op_type
==
"io_copy"
)
{
...
...
@@ -121,7 +123,7 @@ Node* MLUPostprocessPass::InsertCastAfter(const std::string& op_type,
cast_arg
->
AsArg
().
type
=
cast_type
;
auto
*
var
=
inst_node
->
AsStmt
().
op
()
->
scope
()
->
Var
(
cast_arg_name
);
// for CastAfter manully set the tensor's type
var
->
GetMutable
<
::
paddle
::
lite
::
Tensor
>
();
var
->
GetMutable
<
paddle
::
lite
::
Tensor
>
();
// create the stmt node
auto
*
cast_inst
=
graph
->
NewInstructNode
();
...
...
@@ -281,7 +283,7 @@ void MLUPostprocessPass::GetSubgraphOpArgType(Node* inst_node,
// get subgraph's valid precision
const
auto
&
places
=
graph
->
valid_places
();
std
::
set
<
::
paddle
::
lite_api
::
PrecisionType
>
prec_set
;
std
::
set
<
paddle
::
lite_api
::
PrecisionType
>
prec_set
;
for
(
const
auto
&
place
:
places
)
{
if
(
place
.
target
==
TARGET
(
kMLU
))
{
prec_set
.
insert
(
place
.
precision
);
...
...
@@ -474,13 +476,20 @@ bool MLUPostprocessPass::IsFirstConvNode(Node* arg_node) {
return
false
;
}
void
MLUPostprocessPass
::
GatherFirstConvNodes
(
SSAGraph
*
graph
)
{
void
MLUPostprocessPass
::
Gather
AndModify
FirstConvNodes
(
SSAGraph
*
graph
)
{
for
(
auto
&
node
:
graph
->
mutable_nodes
())
{
if
(
!
node
.
IsStmt
())
continue
;
if
(
node
.
AsStmt
().
op_type
()
==
"feed"
)
{
for
(
auto
&
out
:
node
.
outlinks
)
{
if
(
IsFirstConvNode
(
out
))
{
first_conv_nodes_
.
insert
(
out
->
AsArg
().
name
);
// modify first conv nodes' type
const
auto
*
old_type
=
out
->
AsArg
().
type
;
out
->
AsArg
().
type
=
LiteType
::
GetTensorTy
(
old_type
->
target
(),
paddle
::
lite_api
::
PrecisionType
::
kInt8
,
old_type
->
layout
(),
old_type
->
device
());
}
}
}
...
...
@@ -504,7 +513,7 @@ void MLUPostprocessPass::ModifyLayout(SSAGraph* graph) {
out
->
AsArg
().
type
=
LiteType
::
GetTensorTy
(
old_type
->
target
(),
old_type
->
precision
(),
::
paddle
::
lite_api
::
DataLayoutType
::
kNHWC
,
paddle
::
lite_api
::
DataLayoutType
::
kNHWC
,
old_type
->
device
());
}
}
...
...
@@ -523,7 +532,7 @@ void MLUPostprocessPass::ModifyLayout(SSAGraph* graph) {
inp
->
AsArg
().
type
=
LiteType
::
GetTensorTy
(
old_type
->
target
(),
old_type
->
precision
(),
::
paddle
::
lite_api
::
DataLayoutType
::
kNHWC
,
paddle
::
lite_api
::
DataLayoutType
::
kNHWC
,
old_type
->
device
());
}
}
...
...
@@ -539,12 +548,12 @@ void MLUPostprocessPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
// 1: feed->arg_in->subgraph->... 2: ...->subgraph->arg_out->fetch;
// arg_in and arg_out are assumed to be NHWC which user should be aware of.
// Thus here we change these args' layout to NHWC
if
(
lite
::
DeviceInfo
::
Global
().
InputLayout
()
==
DATALAYOUT
(
kNHWC
)
{
if
(
lite
::
DeviceInfo
::
Global
().
InputLayout
()
==
DATALAYOUT
(
kNHWC
)
)
{
ModifyLayout
(
graph
.
get
());
}
if
(
lite
::
DeviceInfo
::
Global
().
UseFirstConv
())
{
GatherFirstConvNodes
(
graph
.
get
());
Gather
AndModify
FirstConvNodes
(
graph
.
get
());
}
// insert io_copy, layout and precision cast of subgraph's inputs and outputs
...
...
lite/core/mir/mlu_postprocess_pass.h
浏览文件 @
3785aab8
...
...
@@ -109,7 +109,7 @@ class MLUPostprocessPass : public ProgramPass {
void
RecreateOp
(
Node
*
inst_node
,
SSAGraph
*
graph
);
void
GatherFirstConvNodes
(
SSAGraph
*
graph
);
void
Gather
AndModify
FirstConvNodes
(
SSAGraph
*
graph
);
bool
IsFirstConvNode
(
Node
*
arg_node
);
...
...
lite/kernels/mlu/bridges/utility.h
浏览文件 @
3785aab8
...
...
@@ -84,7 +84,7 @@ struct FPTypeTraits<paddle::lite_api::PrecisionType::kFloat> {
template
<
>
struct
FPTypeTraits
<
paddle
::
lite_api
::
PrecisionType
::
kFP16
>
{
typedef
::
paddle
::
lite
::
fluid
::
float16
T
;
typedef
paddle
::
lite
::
fluid
::
float16
T
;
};
}
// namespace mlu
...
...
lite/kernels/mlu/layout_compute.cc
浏览文件 @
3785aab8
...
...
@@ -89,3 +89,20 @@ REGISTER_LITE_KERNEL(
PRECISION
(
kFloat
),
DATALAYOUT
(
kNHWC
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
layout
,
kMLU
,
kInt8
,
kNHWC
,
paddle
::
lite
::
kernels
::
mlu
::
LayoutNchwToNhwcCompute
<
PRECISION
(
kInt8
)
>
,
def_layout_nchw2nhwc_fp32_int8
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
),
PRECISION
(
kInt8
),
DATALAYOUT
(
kNCHW
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
),
PRECISION
(
kInt8
),
DATALAYOUT
(
kNHWC
))})
.
Finalize
();
lite/kernels/mlu/layout_compute.h
浏览文件 @
3785aab8
...
...
@@ -29,6 +29,24 @@ namespace lite {
namespace
kernels
{
namespace
mlu
{
template
<
paddle
::
lite_api
::
PrecisionType
>
struct
FPTypeTraits
{};
template
<
>
struct
FPTypeTraits
<
paddle
::
lite_api
::
PrecisionType
::
kFloat
>
{
typedef
float
T
;
};
template
<
>
struct
FPTypeTraits
<
paddle
::
lite_api
::
PrecisionType
::
kFP16
>
{
typedef
paddle
::
lite
::
fluid
::
float16
T
;
};
template
<
>
struct
FPTypeTraits
<
paddle
::
lite_api
::
PrecisionType
::
kInt8
>
{
typedef
int8_t
T
;
};
template
<
lite
::
TargetType
Target
,
typename
T
>
inline
void
LayoutTransCompute
(
const
int
dim
,
const
lite
::
Context
<
Target
>&
context
,
...
...
@@ -63,7 +81,7 @@ class LayoutNchwToNhwcCompute
auto
&
param
=
this
->
template
Param
<
param_t
>();
auto
*
x
=
param
.
x
;
auto
*
out
=
param
.
y
;
out
->
template
mutable_data
<
float
>();
out
->
template
mutable_data
<
typename
FPTypeTraits
<
Precision
>
::
T
>
();
auto
x_dims
=
param
.
x
->
dims
().
size
();
auto
&
context
=
this
->
ctx_
->
template
As
<
X86Context
>();
...
...
@@ -88,7 +106,8 @@ class LayoutNchwToNhwcCompute
CHECK
(
0
)
<<
"Unsupport dim in mlu layout nchw to nhwc"
;
}
LayoutTransCompute
<
lite
::
TargetType
::
kX86
,
float
>
(
LayoutTransCompute
<
lite
::
TargetType
::
kX86
,
typename
FPTypeTraits
<
Precision
>::
T
>
(
x_dims
,
context
,
*
x
,
out
,
axis
);
if
(
x_dims
>
2
)
{
...
...
@@ -111,7 +130,7 @@ class LayoutNhwcToNchwCompute
auto
&
param
=
this
->
template
Param
<
param_t
>();
auto
*
x
=
param
.
x
;
auto
*
out
=
param
.
y
;
out
->
template
mutable_data
<
float
>();
out
->
template
mutable_data
<
typename
FPTypeTraits
<
Precision
>
::
T
>
();
auto
x_dims
=
param
.
x
->
dims
().
size
();
auto
&
context
=
this
->
ctx_
->
template
As
<
X86Context
>();
...
...
@@ -136,7 +155,8 @@ class LayoutNhwcToNchwCompute
CHECK
(
0
)
<<
"Unsupport dim in mlu layout nhwc to nchw"
;
}
LayoutTransCompute
<
lite
::
TargetType
::
kX86
,
float
>
(
LayoutTransCompute
<
lite
::
TargetType
::
kX86
,
typename
FPTypeTraits
<
Precision
>::
T
>
(
x_dims
,
context
,
*
x
,
out
,
axis
);
if
(
x_dims
>
2
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录